file.name) + with open(temp_file_path, "wb") as temp_file: + temp_file.write(file.getvalue()) + return temp_file_path + + +@st.cache_resource(show_spinner=False) +def load_model(model_size: str): + print(f"model size : {MODEL_SIZES[model_size]}") + return whisper.load_model( + MODEL_SIZES[model_size], device="cpu", download_root="models" + ) + + +def get_sentence_data(filename: str, timestamp_dict: dict): + sentence_df = pd.DataFrame( + columns=["Audio file", "Sentence", "Start", "End", "Duration"] + ) + for sentence_i in timestamp_dict["segments"]: + sentence_i = pd.DataFrame( + { + "Audio file": [filename], + "Sentence": [str(sentence_i["text"])], + "Start": [sentence_i["start"]], + "End": [sentence_i["end"]], + "Duration": [sentence_i["end"] - sentence_i["start"]], + } + ) + sentence_df = pd.concat([sentence_df, sentence_i], ignore_index=True) + return sentence_df + + +def get_word_data(filename: str, timestamp_dict: dict): + pass + + +def get_word_data(): + pass + + +st.title("⏱️🧾 Timestamp generator") + +# Audio load +audio_file = st.file_uploader( + "Load audio file to transcribe", type=["wav", "mp3"], accept_multiple_files=True +) + +stamp_type, lang, size = st.columns(3) + +with stamp_type: + timestamp_type = st.selectbox("Timestamp type", options=list(STAMP_TYPES.keys())) + +with lang: + language = st.selectbox("Language", options=list(LANGUAGES.keys())) + +with size: + model_size = st.selectbox("Model size", options=list(MODEL_SIZES.keys())) + +# Botón para generar el timestamp +if st.button("Generate Timestamp", use_container_width=True): + with st.spinner("Loading model..."): + model = load_model(model_size) + sentences_df = pd.DataFrame() + for audio_i in audio_file: + with st.spinner(f"Processing audio: {audio_i.name}"): + tmp_audio = save_temp_file(audio_i) + tmp_audio_file = whisper.load_audio(tmp_audio) + timestamp_result = whisper.transcribe( + model, tmp_audio_file, language=LANGUAGES[language] + ) + audio_i_df = get_sentence_data(audio_i.name, timestamp_result) + sentences_df = pd.concat([sentences_df, audio_i_df], ignore_index=True) + + st.dataframe(sentences_df) + + st.download_button( + "Save timestamps", + sentences_df.to_csv(index=False), + file_name="timestamps.csv", + mime="text/csv", + use_container_width=True, + ) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000000000000000000000000000000000000..4be688fc4e1586f63c565d045dabbcd06835d916 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,21 @@ +[tool.poetry] +name = "whisper-timestamped" +version = "0.1.0" +description = "" +authors = ["nahue-passano "] +readme = "README.md" +packages = [{include = "whisper_timestamped"}] + +[tool.poetry.dependencies] +python = ">=3.8,<3.9.7 || >3.9.7,<4.0" +Cython = "*" +dtw-python = "*" +openai-whisper = "*" +torch = "1.13" +matplotlib = "^3.7.1" +streamlit = "^1.24.0" + + +[build-system] +requires = ["poetry-core"] +build-backend = "poetry.core.masonry.api" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000000000000000000000000000000000..90d070f7d4a1f647ef32566bfbe73ee02ab203b3 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +Cython +dtw-python +openai-whisper +torch==1.13 +streamlit==1.24 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..96758e3dbaa94b8a3629a113df365679d229da05 --- /dev/null +++ b/setup.py @@ -0,0 +1,43 @@ +import os + +from setuptools import setup, find_packages + +install_requires = open(os.path.join(os.path.dirname(__file__), "requirements.txt")).readlines() + +version = None +license = None +with open(os.path.join(os.path.dirname(__file__), "whisper_timestamped", "transcribe.py")) as f: + for line in f: + if line.strip().startswith("__version__"): + version = line.split("=")[1].strip().strip("\"'") + if version and license: + break + if line.strip().startswith("__license__"): + license = line.split("=")[1].strip().strip("\"'") + if version and license: + break +assert version and license + +setup( + name="whisper-timestamped", + py_modules=["whisper_timestamped"], + version=version, + description="Add to OpenAI Whisper the capability to give word timestamps", + python_requires=">=3.7", + author="Jeronymous", + url="https://github.com/linto-ai/whisper-timestamped", + license=license, + packages=find_packages(exclude=["tests*"]), + install_requires=install_requires, + entry_points = { + 'console_scripts': [ + 'whisper_timestamped=whisper_timestamped.transcribe:cli', + 'whisper_timestamped_make_subtitles=whisper_timestamped.make_subtitles:cli' + ], + }, + include_package_data=True, + extras_require={ + 'dev': ['matplotlib', 'jsonschema', 'transformers'], + 'vad': ['onnxruntime', 'torchaudio'], + }, +) diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..8b137891791fe96927ad78e64b0aad7bded08bdc --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1 @@ + diff --git "end": 62.08, + "confidence": 0.785 + } + ] + }, + { + "id": 37, + "seek": 5300, + "start": 62.6, + "end": 65.32, + "text": " Okay, fine. We weren't sure that this was a suggestion.", + "tokens": [ + 1033, + 11, + 2489, + 13, + 492, + 4999, + 380, + 988, + 300, + 341, + 390, + 257, + 16541, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.722, + "words": [ + { + "text": "Okay,", + "start": 62.6, + "end": 62.92, + "confidence": 0.513 + }, + { + "text": "fine.", + "start": 62.92, + "end": 63.2, + "confidence": 0.861 + }, + { + "text": "We", + "start": 63.2, + "end": 63.36, + "confidence": 0.702 + }, + { + "text": "weren't", + "start": 63.36, + "end": 63.68, + "confidence": 0.909 + }, + { + "text": "sure", + "start": 63.68, + "end": 63.84, + "confidence": 0.829 + }, + { + "text": "that", + "start": 63.84, + "end": 64.24, + "confidence": 0.552 + }, + { + "text": "this", + "start": 64.24, + "end": 64.64, + "confidence": 0.741 + }, + { + "text": "was", + "start": 64.64, + "end": 64.8, + "confidence": 0.783 + }, + { + "text": "a", + "start": 64.8, + "end": 64.88, + "confidence": 0.819 + }, + { + "text": "suggestion.", + "start": 64.88, + "end": 65.32, + "confidence": 0.498 + } + ] + }, + { + "id": 38, + "seek": 5300, + "start": 66.76, + "end": 68.21, + "text": " We thought you could check it out.", + "tokens": [ + 492, + 1194, + 291, + 727, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.721, + "words": [ + { + "text": "We", + "start": 66.76, + "end": 66.8, + "confidence": 0.891 + }, + { + "text": "thought", + "start": 66.8, + "end": 66.84, + "confidence": 0.813 + }, + { + "text": "you", + "start": 66.84, + "end": 67.06, + "confidence": 0.325 + }, + { + "text": "could", + "start": 67.06, + "end": 67.24, + "confidence": 0.784 + }, + { + "text": "check", + "start": 67.24, + "end": 67.5, + "confidence": 0.751 + }, + { + "text": "it", + "start": 67.5, + "end": 67.7, + "confidence": 0.899 + }, + { + "text": "out.", + "start": 67.7, + "end": 68.21, + "confidence": 0.812 + } + ] + }, + { + "id": 39, + "seek": 5300, + "start": 68.21, + "end": 69.48, + "text": " I'm not sure we've already done that.", + "tokens": [ + 286, + 478, + 406, + 988, + 321, + 600, + 1217, + 1096, + 300, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.455, + "words": [ + { + "text": "I'm", + "start": 68.21, + "end": 68.5, + "confidence": 0.179 + }, + { + "text": "not", + "start": 68.5, + "end": 68.6, + "confidence": 0.258 + }, + { + "text": "sure", + "start": 68.6, + "end": 68.76, + "confidence": 0.761 + }, + { + "text": "we've", + "start": 68.76, + "end": 68.9, + "confidence": 0.495 + }, + { + "text": "already", + "start": 68.9, + "end": 68.94, + "confidence": 0.765 + }, + { + "text": "done", + "start": 68.94, + "end": 69.18, + "confidence": 0.799 + }, + { + "text": "that.", + "start": 69.18, + "end": 69.48, + "confidence": 0.878 + } + ] + }, + { + "id": 40, + "seek": 5300, + "start": 70.38, + "end": 72.62, + "text": " I guess, whatever you come up with, just let us know.", + "tokens": [ + 286, + 2041, + 11, + 2035, + 291, + 808, + 493, + 365, + 11, + 445, + 718, + 505, + 458, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.738, + "words": [ + { + "text": "I", + "start": 70.38, + "end": 70.52, + "confidence": 0.55 + }, + { + "text": "guess,", + "start": 70.52, + "end": 70.96, + "confidence": 0.903 + }, + { + "text": "whatever", + "start": 70.96, + "end": 71.18, + "confidence": 0.404 + }, + { + "text": "you", + "start": 71.18, + "end": 71.22, + "confidence": 0.89 + }, + { + "text": "come", + "start": 71.22, + "end": 71.54, + "confidence": 0.544 + }, + { + "text": "up", + "start": 71.54, + "end": 71.72, + "confidence": 0.892 + }, + { + "text": "with,", + "start": 71.72, + "end": 71.98, + "confidence": 0.816 + }, + { + "text": "just", + "start": 71.98, + "end": 72.16, + "confidence": 0.77 + }, + { + "text": "let", + "start": 72.16, + "end": 72.2, + "confidence": 0.898 + }, + { + "text": "us", + "start": 72.2, + "end": 72.34, + "confidence": 0.905 + }, + { + "text": "know.", + "start": 72.34, + "end": 72.62, + "confidence": 0.798 + } + ] + }, + { + "id": 41, + "seek": 5300, + "start": 74.18, + "end": 75.48, + "text": " Okay, no problem.", + "tokens": [ + 1033, + 11, + 572, + 1154, + 13 + ], + "temperature": 0.4, + "avg_logprob": -0.7040317565437377, + "compression_ratio": 1.7253218884120172, + "no_speech_prob": 0.4773952066898346, + "confidence": 0.699, + "words": [ + { + "text": "Okay,", + "start": 74.18, + "end": 74.66, + "confidence": 0.789 + }, + { + "text": "no", + "start": 74.66, + "end": 74.88, + "confidence": 0.548 + }, + { + "text": "problem.", + "start": 74.88, + "end": 75.48, + "confidence": 0.79 + } + ] + }, + { + "id": 42, + "seek": 7544, + "start": 75.48, + "end": 76.86, + "text": " Okay, no problem.", + "tokens": [ + 50364, + 1033, + 11, + 572, + 1154, + 13, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.7493470907211304, + "compression_ratio": 0.68, + "no_speech_prob": 0.06937140226364136, + "confidence": 0.521, + "words": [ + { + "text": "Okay,", + "start": 75.48, + "end": 76.28, + "confidence": 0.386 + }, + { + "text": "no", + "start": 76.28, + "end": 76.32, + "confidence": 0.472 + }, + { + "text": "problem.", + "start": 76.32, + "end": 76.86, + "confidence": 0.775 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/nocond.random_music.mp4.words.json b/tests/expected/corner_cases.cpu/nocond.random_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..cffe4045f2a305e00267c28fb344c5011039e8b1 --- /dev/null +++ b/tests/expected/corner_cases.cpu/nocond.random_music.mp4.words.json @@ -0,0 +1,3209 @@ +{ + "text": " I Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself But I'm ready Your words up on the wall You don't need to say I'm ready your words up on the wall as you're praying for my phone and the laughter in the holes and the names that I've I stack it in my mind and I'm waiting for the time When I show you what it's like to be worse than a mind Tell you you're the greatest But once you turn they hate us Oh, the misery Everybody wants to be my enemy I smell the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay I'm hoping that somebody pray for me I'm praying that somebody vote for me I'm staying where nobody's supposed to be I propose to be in a rink of emotions Ready to go whenever you let me know The road is long so put the pedal into the flow The enemy on my trail, my energy unavailable I'm a tell a monster the way go Way when the plot on my track to the top I been out of shape thinking that I'm a box I'm an astronaut Blast it off the planet rock the cause catastrophe And it matters more because ahead and not ahead I thought about wreaking havoc on an opposition Kinda shockin' they want it static with precision I'm automatic quarterback I ain't talkin' second Pack it pack it up I don't panic Better batter up who the baddest It don't matter cause we it's your Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Oh the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy I swear, I swear I'll never be a saint I swear, my enemy I swear, I swear I'll never be a saint You got to be yourself!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.02, + "end": 0.4, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.9367842674255371, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7794302701950073, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 0.02, + "end": 0.4, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.02, + "end": 68.54, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.669, + "words": [ + { + "text": "Oh,", + "start": 60.02, + "end": 65.44, + "confidence": 0.084 + }, + { + "text": "the", + "start": 65.44, + "end": 65.48, + "confidence": 0.796 + }, + { + "text": "misery", + "start": 65.48, + "end": 66.08, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 66.08, + "end": 67.62, + "confidence": 0.431 + }, + { + "text": "wants", + "start": 67.62, + "end": 68.0, + "confidence": 0.983 + }, + { + "text": "to", + "start": 68.0, + "end": 68.18, + "confidence": 0.993 + }, + { + "text": "be", + "start": 68.18, + "end": 68.38, + "confidence": 0.996 + }, + { + "text": "my", + "start": 68.38, + "end": 68.5, + "confidence": 0.984 + }, + { + "text": "enemy", + "start": 68.5, + "end": 68.54, + "confidence": 0.984 + } + ] + }, + { + "id": 2, + "seek": 6000, + "start": 70.9, + "end": 75.92, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.932, + "words": [ + { + "text": "Spare", + "start": 70.9, + "end": 71.48, + "confidence": 0.789 + }, + { + "text": "the", + "start": 71.48, + "end": 71.7, + "confidence": 0.97 + }, + { + "text": "sympathy", + "start": 71.7, + "end": 72.56, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 72.56, + "end": 73.86, + "confidence": 0.959 + }, + { + "text": "wants", + "start": 73.86, + "end": 74.26, + "confidence": 0.994 + }, + { + "text": "to", + "start": 74.26, + "end": 74.42, + "confidence": 0.998 + }, + { + "text": "be", + "start": 74.42, + "end": 75.16, + "confidence": 0.999 + }, + { + "text": "my", + "start": 75.16, + "end": 75.64, + "confidence": 0.869 + }, + { + "text": "enemy", + "start": 75.64, + "end": 75.92, + "confidence": 0.997 + } + ] + }, + { + "id": 3, + "seek": 6000, + "start": 79.98, + "end": 80.5, + "text": " Look out for yourself My enemy", + "tokens": [ + 2053, + 484, + 337, + 1803, + 1222, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.778, + "words": [ + { + "text": "Look", + "start": 79.98, + "end": 80.12, + "confidence": 0.446 + }, + { + "text": "out", + "start": 80.12, + "end": 80.34, + "confidence": 0.99 + }, + { + "text": "for", + "start": 80.34, + "end": 80.38, + "confidence": 0.991 + }, + { + "text": "yourself", + "start": 80.38, + "end": 80.42, + "confidence": 0.971 + }, + { + "text": "My", + "start": 80.42, + "end": 80.46, + "confidence": 0.533 + }, + { + "text": "enemy", + "start": 80.46, + "end": 80.5, + "confidence": 0.979 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 80.5, + "end": 81.28, + "text": " Look out for yourself But I'm ready", + "tokens": [ + 2053, + 484, + 337, + 1803, + 583, + 286, + 478, + 1919 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.943, + "words": [ + { + "text": "Look", + "start": 80.5, + "end": 80.54, + "confidence": 0.844 + }, + { + "text": "out", + "start": 80.54, + "end": 80.58, + "confidence": 0.994 + }, + { + "text": "for", + "start": 80.58, + "end": 80.62, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 80.62, + "end": 80.66, + "confidence": 0.999 + }, + { + "text": "But", + "start": 80.66, + "end": 80.7, + "confidence": 0.767 + }, + { + "text": "I'm", + "start": 80.7, + "end": 81.04, + "confidence": 0.99 + }, + { + "text": "ready", + "start": 81.04, + "end": 81.28, + "confidence": 0.996 + } + ] + }, + { + "id": 5, + "seek": 6000, + "start": 86.14, + "end": 88.28, + "text": " Your words up on the wall You don't need to say", + "tokens": [ + 2260, + 2283, + 493, + 322, + 264, + 2929, + 509, + 500, + 380, + 643, + 281, + 584 + ], + "temperature": 0.4, + "avg_logprob": -0.7892291628081223, + "compression_ratio": 1.627906976744186, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.361, + "words": [ + { + "text": "Your", + "start": 86.14, + "end": 86.4, + "confidence": 0.618 + }, + { + "text": "words", + "start": 86.4, + "end": 86.44, + "confidence": 0.786 + }, + { + "text": "up", + "start": 86.44, + "end": 86.7, + "confidence": 0.891 + }, + { + "text": "on", + "start": 86.7, + "end": 86.98, + "confidence": 0.996 + }, + { + "text": "the", + "start": 86.98, + "end": 87.1, + "confidence": 0.985 + }, + { + "text": "wall", + "start": 87.1, + "end": 87.62, + "confidence": 0.711 + }, + { + "text": "You", + "start": 87.62, + "end": 87.92, + "confidence": 0.084 + }, + { + "text": "don't", + "start": 87.92, + "end": 88.16, + "confidence": 0.172 + }, + { + "text": "need", + "start": 88.16, + "end": 88.2, + "confidence": 0.149 + }, + { + "text": "to", + "start": 88.2, + "end": 88.24, + "confidence": 0.466 + }, + { + "text": "say", + "start": 88.24, + "end": 88.28, + "confidence": 0.094 + } + ] + }, + { + "id": 6, + "seek": 8774, + "start": 88.28, + "end": 94.25, + "text": " I'm ready your words up on the wall as you're praying for my phone and the laughter in the holes and the names that I've", + "tokens": [ + 286, + 478, + 1919, + 428, + 2283, + 493, + 322, + 264, + 2929, + 382, + 291, + 434, + 15611, + 337, + 452, + 2593, + 293, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 286, + 600 + ], + "temperature": 0.4, + "avg_logprob": -0.26142611746060646, + "compression_ratio": 1.486842105263158, + "no_speech_prob": 0.8554685115814209, + "confidence": 0.773, + "words": [ + { + "text": "I'm", + "start": 88.28, + "end": 88.32, + "confidence": 0.459 + }, + { + "text": "ready", + "start": 88.32, + "end": 88.52, + "confidence": 0.976 + }, + { + "text": "your", + "start": 88.52, + "end": 88.9, + "confidence": 0.592 + }, + { + "text": "words", + "start": 88.9, + "end": 89.14, + "confidence": 0.987 + }, + { + "text": "up", + "start": 89.14, + "end": 89.46, + "confidence": 0.974 + }, + { + "text": "on", + "start": 89.46, + "end": 89.6, + "confidence": 0.979 + }, + { + "text": "the", + "start": 89.6, + "end": 89.66, + "confidence": 0.985 + }, + { + "text": "wall", + "start": 89.66, + "end": 90.12, + "confidence": 0.45 + }, + { + "text": "as", + "start": 90.12, + "end": 90.32, + "confidence": 0.489 + }, + { + "text": "you're", + "start": 90.32, + "end": 90.64, + "confidence": 0.817 + }, + { + "text": "praying", + "start": 90.64, + "end": 90.8, + "confidence": 0.962 + }, + { + "text": "for", + "start": 90.8, + "end": 91.06, + "confidence": 0.956 + }, + { + "text": "my", + "start": 91.06, + "end": 91.26, + "confidence": 0.965 + }, + { + "text": "phone", + "start": 91.26, + "end": 91.72, + "confidence": 0.63 + }, + { + "text": "and", + "start": 91.72, + "end": 91.9, + "confidence": 0.687 + }, + { + "text": "the", + "start": 91.9, + "end": 92.0, + "confidence": 0.948 + }, + { + "text": "laughter", + "start": 92.0, + "end": 92.36, + "confidence": 0.87 + }, + { + "text": "in", + "start": 92.36, + "end": 92.72, + "confidence": 0.905 + }, + { + "text": "the", + "start": 92.72, + "end": 92.88, + "confidence": 0.982 + }, + { + "text": "holes", + "start": 92.88, + "end": 93.24, + "confidence": 0.593 + }, + { + "text": "and", + "start": 93.24, + "end": 93.44, + "confidence": 0.6 + }, + { + "text": "the", + "start": 93.44, + "end": 93.52, + "confidence": 0.983 + }, + { + "text": "names", + "start": 93.52, + "end": 93.82, + "confidence": 0.98 + }, + { + "text": "that", + "start": 93.82, + "end": 94.08, + "confidence": 0.827 + }, + { + "text": "I've", + "start": 94.08, + "end": 94.25, + "confidence": 0.75 + } + ] + }, + { + "id": 7, + "seek": 9418, + "start": 94.25, + "end": 97.84, + "text": " I stack it in my mind and I'm waiting for the time", + "tokens": [ + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 286, + 478, + 3806, + 337, + 264, + 565 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.764, + "words": [ + { + "text": "I", + "start": 94.25, + "end": 95.04, + "confidence": 0.29 + }, + { + "text": "stack", + "start": 95.04, + "end": 95.38, + "confidence": 0.341 + }, + { + "text": "it", + "start": 95.38, + "end": 95.7, + "confidence": 0.988 + }, + { + "text": "in", + "start": 95.7, + "end": 95.82, + "confidence": 0.987 + }, + { + "text": "my", + "start": 95.82, + "end": 96.02, + "confidence": 0.99 + }, + { + "text": "mind", + "start": 96.02, + "end": 96.42, + "confidence": 0.99 + }, + { + "text": "and", + "start": 96.42, + "end": 96.62, + "confidence": 0.382 + }, + { + "text": "I'm", + "start": 96.62, + "end": 96.72, + "confidence": 0.964 + }, + { + "text": "waiting", + "start": 96.72, + "end": 97.02, + "confidence": 0.952 + }, + { + "text": "for", + "start": 97.02, + "end": 97.34, + "confidence": 0.972 + }, + { + "text": "the", + "start": 97.34, + "end": 97.5, + "confidence": 0.986 + }, + { + "text": "time", + "start": 97.5, + "end": 97.84, + "confidence": 0.985 + } + ] + }, + { + "id": 8, + "seek": 9418, + "start": 98.0, + "end": 101.24, + "text": " When I show you what it's like to be worse than a mind", + "tokens": [ + 1133, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 813, + 257, + 1575 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.655, + "words": [ + { + "text": "When", + "start": 98.0, + "end": 98.18, + "confidence": 0.768 + }, + { + "text": "I", + "start": 98.18, + "end": 98.22, + "confidence": 0.989 + }, + { + "text": "show", + "start": 98.22, + "end": 98.48, + "confidence": 0.931 + }, + { + "text": "you", + "start": 98.48, + "end": 98.76, + "confidence": 0.988 + }, + { + "text": "what", + "start": 98.76, + "end": 98.94, + "confidence": 0.991 + }, + { + "text": "it's", + "start": 98.94, + "end": 99.42, + "confidence": 0.981 + }, + { + "text": "like", + "start": 99.42, + "end": 99.46, + "confidence": 0.997 + }, + { + "text": "to", + "start": 99.46, + "end": 99.72, + "confidence": 0.681 + }, + { + "text": "be", + "start": 99.72, + "end": 99.78, + "confidence": 0.995 + }, + { + "text": "worse", + "start": 99.78, + "end": 100.04, + "confidence": 0.659 + }, + { + "text": "than", + "start": 100.04, + "end": 100.36, + "confidence": 0.173 + }, + { + "text": "a", + "start": 100.36, + "end": 100.6, + "confidence": 0.112 + }, + { + "text": "mind", + "start": 100.6, + "end": 101.24, + "confidence": 0.463 + } + ] + }, + { + "id": 9, + "seek": 9418, + "start": 102.06, + "end": 106.28, + "text": " Tell you you're the greatest", + "tokens": [ + 5115, + 291, + 291, + 434, + 264, + 6636 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.919, + "words": [ + { + "text": "Tell", + "start": 102.06, + "end": 102.1, + "confidence": 0.861 + }, + { + "text": "you", + "start": 102.1, + "end": 102.72, + "confidence": 0.991 + }, + { + "text": "you're", + "start": 102.72, + "end": 103.66, + "confidence": 0.855 + }, + { + "text": "the", + "start": 103.66, + "end": 104.1, + "confidence": 0.985 + }, + { + "text": "greatest", + "start": 104.1, + "end": 106.28, + "confidence": 0.978 + } + ] + }, + { + "id": 10, + "seek": 9418, + "start": 107.14, + "end": 112.98, + "text": " But once you turn they hate us", + "tokens": [ + 583, + 1564, + 291, + 1261, + 436, + 4700, + 505 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.893, + "words": [ + { + "text": "But", + "start": 107.14, + "end": 107.54, + "confidence": 0.976 + }, + { + "text": "once", + "start": 107.54, + "end": 108.38, + "confidence": 0.799 + }, + { + "text": "you", + "start": 108.38, + "end": 108.92, + "confidence": 0.998 + }, + { + "text": "turn", + "start": 108.92, + "end": 109.84, + "confidence": 0.889 + }, + { + "text": "they", + "start": 109.84, + "end": 110.4, + "confidence": 0.662 + }, + { + "text": "hate", + "start": 110.4, + "end": 111.88, + "confidence": 0.99 + }, + { + "text": "us", + "start": 111.88, + "end": 112.98, + "confidence": 0.999 + } + ] + }, + { + "id": 11, + "seek": 9418, + "start": 114.54, + "end": 119.18, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.4302098981795772, + "compression_ratio": 1.388157894736842, + "no_speech_prob": 0.8590295314788818, + "confidence": 0.874, + "words": [ + { + "text": "Oh,", + "start": 114.54, + "end": 115.32, + "confidence": 0.74 + }, + { + "text": "the", + "start": 115.32, + "end": 115.36, + "confidence": 0.97 + }, + { + "text": "misery", + "start": 115.36, + "end": 116.36, + "confidence": 0.999 + }, + { + "text": "Everybody", + "start": 116.36, + "end": 117.52, + "confidence": 0.432 + }, + { + "text": "wants", + "start": 117.52, + "end": 117.9, + "confidence": 0.989 + }, + { + "text": "to", + "start": 117.9, + "end": 118.24, + "confidence": 0.998 + }, + { + "text": "be", + "start": 118.24, + "end": 118.28, + "confidence": 0.998 + }, + { + "text": "my", + "start": 118.28, + "end": 118.54, + "confidence": 0.997 + }, + { + "text": "enemy", + "start": 118.54, + "end": 119.18, + "confidence": 0.984 + } + ] + }, + { + "id": 12, + "seek": 12018, + "start": 120.2, + "end": 122.16, + "text": " I smell the sympathy", + "tokens": [ + 286, + 4316, + 264, + 33240 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.755, + "words": [ + { + "text": "I", + "start": 120.2, + "end": 120.54, + "confidence": 0.42 + }, + { + "text": "smell", + "start": 120.54, + "end": 121.36, + "confidence": 0.889 + }, + { + "text": "the", + "start": 121.36, + "end": 121.76, + "confidence": 0.892 + }, + { + "text": "sympathy", + "start": 121.76, + "end": 122.16, + "confidence": 0.979 + } + ] + }, + { + "id": 13, + "seek": 12018, + "start": 122.16, + "end": 126.08, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.912, + "words": [ + { + "text": "Everybody", + "start": 122.16, + "end": 123.72, + "confidence": 0.923 + }, + { + "text": "wants", + "start": 123.72, + "end": 124.1, + "confidence": 0.984 + }, + { + "text": "to", + "start": 124.1, + "end": 124.68, + "confidence": 0.995 + }, + { + "text": "be", + "start": 124.68, + "end": 125.24, + "confidence": 0.996 + }, + { + "text": "my", + "start": 125.24, + "end": 125.5, + "confidence": 0.659 + }, + { + "text": "enemy", + "start": 125.5, + "end": 126.08, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 12018, + "start": 129.9, + "end": 130.86, + "text": " Look out for yourself", + "tokens": [ + 2053, + 484, + 337, + 1803 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.731, + "words": [ + { + "text": "Look", + "start": 129.9, + "end": 130.04, + "confidence": 0.313 + }, + { + "text": "out", + "start": 130.04, + "end": 130.3, + "confidence": 0.932 + }, + { + "text": "for", + "start": 130.3, + "end": 130.44, + "confidence": 0.991 + }, + { + "text": "yourself", + "start": 130.44, + "end": 130.86, + "confidence": 0.986 + } + ] + }, + { + "id": 15, + "seek": 12018, + "start": 131.52, + "end": 132.54, + "text": " My enemy", + "tokens": [ + 1222, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.937, + "words": [ + { + "text": "My", + "start": 131.52, + "end": 131.74, + "confidence": 0.891 + }, + { + "text": "enemy", + "start": 131.74, + "end": 132.54, + "confidence": 0.985 + } + ] + }, + { + "id": 16, + "seek": 12018, + "start": 136.06, + "end": 137.14, + "text": " Look out for yourself", + "tokens": [ + 2053, + 484, + 337, + 1803 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.991, + "words": [ + { + "text": "Look", + "start": 136.06, + "end": 136.18, + "confidence": 0.974 + }, + { + "text": "out", + "start": 136.18, + "end": 136.54, + "confidence": 0.994 + }, + { + "text": "for", + "start": 136.54, + "end": 136.74, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.74, + "end": 137.14, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 12018, + "start": 137.66, + "end": 140.18, + "text": " Look, okay I'm hoping that somebody pray for me", + "tokens": [ + 2053, + 11, + 1392, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.792, + "words": [ + { + "text": "Look,", + "start": 137.66, + "end": 138.46, + "confidence": 0.849 + }, + { + "text": "okay", + "start": 138.46, + "end": 138.6, + "confidence": 0.646 + }, + { + "text": "I'm", + "start": 138.6, + "end": 138.86, + "confidence": 0.658 + }, + { + "text": "hoping", + "start": 138.86, + "end": 139.12, + "confidence": 0.576 + }, + { + "text": "that", + "start": 139.12, + "end": 139.32, + "confidence": 0.958 + }, + { + "text": "somebody", + "start": 139.32, + "end": 139.52, + "confidence": 0.975 + }, + { + "text": "pray", + "start": 139.52, + "end": 139.8, + "confidence": 0.85 + }, + { + "text": "for", + "start": 139.8, + "end": 140.02, + "confidence": 0.899 + }, + { + "text": "me", + "start": 140.02, + "end": 140.18, + "confidence": 0.999 + } + ] + }, + { + "id": 18, + "seek": 12018, + "start": 140.38, + "end": 141.64, + "text": " I'm praying that somebody vote for me", + "tokens": [ + 286, + 478, + 15611, + 300, + 2618, + 4740, + 337, + 385 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.821, + "words": [ + { + "text": "I'm", + "start": 140.38, + "end": 140.58, + "confidence": 0.756 + }, + { + "text": "praying", + "start": 140.58, + "end": 140.62, + "confidence": 0.956 + }, + { + "text": "that", + "start": 140.62, + "end": 140.82, + "confidence": 0.959 + }, + { + "text": "somebody", + "start": 140.82, + "end": 141.06, + "confidence": 0.995 + }, + { + "text": "vote", + "start": 141.06, + "end": 141.32, + "confidence": 0.396 + }, + { + "text": "for", + "start": 141.32, + "end": 141.52, + "confidence": 0.999 + }, + { + "text": "me", + "start": 141.52, + "end": 141.64, + "confidence": 1.0 + } + ] + }, + { + "id": 19, + "seek": 12018, + "start": 141.72, + "end": 143.2, + "text": " I'm staying where nobody's supposed to be", + "tokens": [ + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.893, + "words": [ + { + "text": "I'm", + "start": 141.72, + "end": 141.9, + "confidence": 0.937 + }, + { + "text": "staying", + "start": 141.9, + "end": 142.12, + "confidence": 0.82 + }, + { + "text": "where", + "start": 142.12, + "end": 142.3, + "confidence": 0.976 + }, + { + "text": "nobody's", + "start": 142.3, + "end": 142.88, + "confidence": 0.735 + }, + { + "text": "supposed", + "start": 142.88, + "end": 142.92, + "confidence": 0.957 + }, + { + "text": "to", + "start": 142.92, + "end": 143.12, + "confidence": 0.998 + }, + { + "text": "be", + "start": 143.12, + "end": 143.2, + "confidence": 0.999 + } + ] + }, + { + "id": 20, + "seek": 12018, + "start": 143.2, + "end": 144.86, + "text": " I propose to be in a rink of emotions", + "tokens": [ + 286, + 17421, + 281, + 312, + 294, + 257, + 367, + 475, + 295, + 8462 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.554, + "words": [ + { + "text": "I", + "start": 143.2, + "end": 143.36, + "confidence": 0.354 + }, + { + "text": "propose", + "start": 143.36, + "end": 143.64, + "confidence": 0.323 + }, + { + "text": "to", + "start": 143.64, + "end": 143.86, + "confidence": 0.856 + }, + { + "text": "be", + "start": 143.86, + "end": 144.04, + "confidence": 0.579 + }, + { + "text": "in", + "start": 144.04, + "end": 144.18, + "confidence": 0.837 + }, + { + "text": "a", + "start": 144.18, + "end": 144.24, + "confidence": 0.631 + }, + { + "text": "rink", + "start": 144.24, + "end": 144.42, + "confidence": 0.309 + }, + { + "text": "of", + "start": 144.42, + "end": 144.54, + "confidence": 0.996 + }, + { + "text": "emotions", + "start": 144.54, + "end": 144.86, + "confidence": 0.952 + } + ] + }, + { + "id": 21, + "seek": 12018, + "start": 145.06, + "end": 146.28, + "text": " Ready to go whenever you let me know", + "tokens": [ + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.902, + "words": [ + { + "text": "Ready", + "start": 145.06, + "end": 145.26, + "confidence": 0.938 + }, + { + "text": "to", + "start": 145.26, + "end": 145.5, + "confidence": 0.997 + }, + { + "text": "go", + "start": 145.5, + "end": 145.58, + "confidence": 0.996 + }, + { + "text": "whenever", + "start": 145.58, + "end": 145.74, + "confidence": 0.85 + }, + { + "text": "you", + "start": 145.74, + "end": 145.92, + "confidence": 0.575 + }, + { + "text": "let", + "start": 145.92, + "end": 146.04, + "confidence": 0.97 + }, + { + "text": "me", + "start": 146.04, + "end": 146.16, + "confidence": 0.998 + }, + { + "text": "know", + "start": 146.16, + "end": 146.28, + "confidence": 0.992 + } + ] + }, + { + "id": 22, + "seek": 12018, + "start": 146.28, + "end": 147.84, + "text": " The road is long so put the pedal into the flow", + "tokens": [ + 440, + 3060, + 307, + 938, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.944, + "words": [ + { + "text": "The", + "start": 146.28, + "end": 146.5, + "confidence": 0.977 + }, + { + "text": "road", + "start": 146.5, + "end": 146.64, + "confidence": 0.989 + }, + { + "text": "is", + "start": 146.64, + "end": 146.74, + "confidence": 0.977 + }, + { + "text": "long", + "start": 146.74, + "end": 146.9, + "confidence": 0.936 + }, + { + "text": "so", + "start": 146.9, + "end": 147.04, + "confidence": 0.811 + }, + { + "text": "put", + "start": 147.04, + "end": 147.16, + "confidence": 0.94 + }, + { + "text": "the", + "start": 147.16, + "end": 147.3, + "confidence": 0.969 + }, + { + "text": "pedal", + "start": 147.3, + "end": 147.4, + "confidence": 0.947 + }, + { + "text": "into", + "start": 147.4, + "end": 147.58, + "confidence": 0.955 + }, + { + "text": "the", + "start": 147.58, + "end": 147.76, + "confidence": 0.993 + }, + { + "text": "flow", + "start": 147.76, + "end": 147.84, + "confidence": 0.902 + } + ] + }, + { + "id": 23, + "seek": 12018, + "start": 147.96, + "end": 149.78, + "text": " The enemy on my trail, my energy unavailable", + "tokens": [ + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699 + ], + "temperature": 0.4, + "avg_logprob": -0.3580510299817651, + "compression_ratio": 1.7621145374449338, + "no_speech_prob": 0.8322692513465881, + "confidence": 0.866, + "words": [ + { + "text": "The", + "start": 147.96, + "end": 148.08, + "confidence": 0.714 + }, + { + "text": "enemy", + "start": 148.08, + "end": 148.24, + "confidence": 0.473 + }, + { + "text": "on", + "start": 148.24, + "end": 148.44, + "confidence": 0.969 + }, + { + "text": "my", + "start": 148.44, + "end": 148.54, + "confidence": 0.977 + }, + { + "text": "trail,", + "start": 148.54, + "end": 148.76, + "confidence": 0.993 + }, + { + "text": "my", + "start": 148.76, + "end": 148.8, + "confidence": 0.988 + }, + { + "text": "energy", + "start": 148.8, + "end": 149.06, + "confidence": 0.993 + }, + { + "text": "unavailable", + "start": 149.06, + "end": 149.78, + "confidence": 0.935 + } + ] + }, + { + "id": 24, + "seek": 14968, + "start": 150.0, + "end": 151.16, + "text": " I'm a tell a monster the way go", + "tokens": [ + 286, + 478, + 257, + 980, + 257, + 10090, + 264, + 636, + 352 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.443, + "words": [ + { + "text": "I'm", + "start": 150.0, + "end": 150.18, + "confidence": 0.73 + }, + { + "text": "a", + "start": 150.18, + "end": 150.28, + "confidence": 0.203 + }, + { + "text": "tell", + "start": 150.28, + "end": 150.34, + "confidence": 0.507 + }, + { + "text": "a", + "start": 150.34, + "end": 150.46, + "confidence": 0.137 + }, + { + "text": "monster", + "start": 150.46, + "end": 150.66, + "confidence": 0.238 + }, + { + "text": "the", + "start": 150.66, + "end": 150.9, + "confidence": 0.585 + }, + { + "text": "way", + "start": 150.9, + "end": 150.98, + "confidence": 0.953 + }, + { + "text": "go", + "start": 150.98, + "end": 151.16, + "confidence": 0.664 + } + ] + }, + { + "id": 25, + "seek": 14968, + "start": 151.2, + "end": 152.54, + "text": " Way when the plot on my track to the top", + "tokens": [ + 9558, + 562, + 264, + 7542, + 322, + 452, + 2837, + 281, + 264, + 1192 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.517, + "words": [ + { + "text": "Way", + "start": 151.2, + "end": 151.38, + "confidence": 0.102 + }, + { + "text": "when", + "start": 151.38, + "end": 151.54, + "confidence": 0.434 + }, + { + "text": "the", + "start": 151.54, + "end": 151.66, + "confidence": 0.263 + }, + { + "text": "plot", + "start": 151.66, + "end": 151.78, + "confidence": 0.627 + }, + { + "text": "on", + "start": 151.78, + "end": 151.92, + "confidence": 0.605 + }, + { + "text": "my", + "start": 151.92, + "end": 152.0, + "confidence": 0.975 + }, + { + "text": "track", + "start": 152.0, + "end": 152.14, + "confidence": 0.338 + }, + { + "text": "to", + "start": 152.14, + "end": 152.32, + "confidence": 0.946 + }, + { + "text": "the", + "start": 152.32, + "end": 152.4, + "confidence": 0.991 + }, + { + "text": "top", + "start": 152.4, + "end": 152.54, + "confidence": 0.993 + } + ] + }, + { + "id": 26, + "seek": 14968, + "start": 152.54, + "end": 154.32, + "text": " I been out of shape thinking that I'm a box I'm an astronaut", + "tokens": [ + 286, + 668, + 484, + 295, + 3909, + 1953, + 300, + 286, + 478, + 257, + 2424, + 286, + 478, + 364, + 18516 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.667, + "words": [ + { + "text": "I", + "start": 152.54, + "end": 152.7, + "confidence": 0.972 + }, + { + "text": "been", + "start": 152.7, + "end": 152.76, + "confidence": 0.691 + }, + { + "text": "out", + "start": 152.76, + "end": 152.9, + "confidence": 0.72 + }, + { + "text": "of", + "start": 152.9, + "end": 153.0, + "confidence": 0.959 + }, + { + "text": "shape", + "start": 153.0, + "end": 153.1, + "confidence": 0.998 + }, + { + "text": "thinking", + "start": 153.1, + "end": 153.3, + "confidence": 0.385 + }, + { + "text": "that", + "start": 153.3, + "end": 153.5, + "confidence": 0.353 + }, + { + "text": "I'm", + "start": 153.5, + "end": 153.66, + "confidence": 0.572 + }, + { + "text": "a", + "start": 153.66, + "end": 153.7, + "confidence": 0.703 + }, + { + "text": "box", + "start": 153.7, + "end": 153.74, + "confidence": 0.874 + }, + { + "text": "I'm", + "start": 153.74, + "end": 153.92, + "confidence": 0.451 + }, + { + "text": "an", + "start": 153.92, + "end": 153.96, + "confidence": 0.976 + }, + { + "text": "astronaut", + "start": 153.96, + "end": 154.32, + "confidence": 0.926 + } + ] + }, + { + "id": 27, + "seek": 14968, + "start": 154.52, + "end": 156.34, + "text": " Blast it off the planet rock the cause catastrophe", + "tokens": [ + 2177, + 525, + 309, + 766, + 264, + 5054, + 3727, + 264, + 3082, + 36043 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.583, + "words": [ + { + "text": "Blast", + "start": 154.52, + "end": 154.76, + "confidence": 0.476 + }, + { + "text": "it", + "start": 154.76, + "end": 154.9, + "confidence": 0.954 + }, + { + "text": "off", + "start": 154.9, + "end": 155.02, + "confidence": 0.984 + }, + { + "text": "the", + "start": 155.02, + "end": 155.2, + "confidence": 0.956 + }, + { + "text": "planet", + "start": 155.2, + "end": 155.34, + "confidence": 0.99 + }, + { + "text": "rock", + "start": 155.34, + "end": 155.52, + "confidence": 0.24 + }, + { + "text": "the", + "start": 155.52, + "end": 155.68, + "confidence": 0.301 + }, + { + "text": "cause", + "start": 155.68, + "end": 155.88, + "confidence": 0.701 + }, + { + "text": "catastrophe", + "start": 155.88, + "end": 156.34, + "confidence": 0.444 + } + ] + }, + { + "id": 28, + "seek": 14968, + "start": 156.44, + "end": 158.08, + "text": " And it matters more because ahead and not ahead", + "tokens": [ + 400, + 309, + 7001, + 544, + 570, + 2286, + 293, + 406, + 2286 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.721, + "words": [ + { + "text": "And", + "start": 156.44, + "end": 156.64, + "confidence": 0.828 + }, + { + "text": "it", + "start": 156.64, + "end": 156.78, + "confidence": 0.715 + }, + { + "text": "matters", + "start": 156.78, + "end": 156.94, + "confidence": 0.994 + }, + { + "text": "more", + "start": 156.94, + "end": 157.16, + "confidence": 0.981 + }, + { + "text": "because", + "start": 157.16, + "end": 157.38, + "confidence": 0.909 + }, + { + "text": "ahead", + "start": 157.38, + "end": 157.64, + "confidence": 0.432 + }, + { + "text": "and", + "start": 157.64, + "end": 157.82, + "confidence": 0.616 + }, + { + "text": "not", + "start": 157.82, + "end": 157.94, + "confidence": 0.716 + }, + { + "text": "ahead", + "start": 157.94, + "end": 158.08, + "confidence": 0.524 + } + ] + }, + { + "id": 29, + "seek": 14968, + "start": 158.1, + "end": 159.74, + "text": " I thought about wreaking havoc on an opposition", + "tokens": [ + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.963, + "words": [ + { + "text": "I", + "start": 158.1, + "end": 158.24, + "confidence": 0.986 + }, + { + "text": "thought", + "start": 158.24, + "end": 158.36, + "confidence": 0.977 + }, + { + "text": "about", + "start": 158.36, + "end": 158.58, + "confidence": 0.975 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.84, + "confidence": 0.944 + }, + { + "text": "havoc", + "start": 158.84, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.3, + "confidence": 0.944 + }, + { + "text": "an", + "start": 159.3, + "end": 159.44, + "confidence": 0.913 + }, + { + "text": "opposition", + "start": 159.44, + "end": 159.74, + "confidence": 0.987 + } + ] + }, + { + "id": 30, + "seek": 14968, + "start": 159.88, + "end": 161.52, + "text": " Kinda shockin' they want it static with precision", + "tokens": [ + 35553, + 5588, + 259, + 6, + 436, + 528, + 309, + 13437, + 365, + 18356 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.537, + "words": [ + { + "text": "Kinda", + "start": 159.88, + "end": 160.1, + "confidence": 0.574 + }, + { + "text": "shockin'", + "start": 160.1, + "end": 160.5, + "confidence": 0.408 + }, + { + "text": "they", + "start": 160.5, + "end": 160.58, + "confidence": 0.371 + }, + { + "text": "want", + "start": 160.58, + "end": 160.74, + "confidence": 0.454 + }, + { + "text": "it", + "start": 160.74, + "end": 160.84, + "confidence": 0.577 + }, + { + "text": "static", + "start": 160.84, + "end": 161.02, + "confidence": 0.993 + }, + { + "text": "with", + "start": 161.02, + "end": 161.22, + "confidence": 0.609 + }, + { + "text": "precision", + "start": 161.22, + "end": 161.52, + "confidence": 0.877 + } + ] + }, + { + "id": 31, + "seek": 14968, + "start": 161.62, + "end": 163.36, + "text": " I'm automatic quarterback I ain't talkin' second", + "tokens": [ + 286, + 478, + 12509, + 31952, + 286, + 7862, + 380, + 39243, + 6, + 1150 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.735, + "words": [ + { + "text": "I'm", + "start": 161.62, + "end": 161.78, + "confidence": 0.935 + }, + { + "text": "automatic", + "start": 161.78, + "end": 162.18, + "confidence": 0.975 + }, + { + "text": "quarterback", + "start": 162.18, + "end": 162.58, + "confidence": 0.419 + }, + { + "text": "I", + "start": 162.58, + "end": 162.8, + "confidence": 0.405 + }, + { + "text": "ain't", + "start": 162.8, + "end": 162.88, + "confidence": 0.968 + }, + { + "text": "talkin'", + "start": 162.88, + "end": 163.18, + "confidence": 0.783 + }, + { + "text": "second", + "start": 163.18, + "end": 163.36, + "confidence": 0.556 + } + ] + }, + { + "id": 32, + "seek": 14968, + "start": 163.4, + "end": 164.58, + "text": " Pack it pack it up I don't panic", + "tokens": [ + 18466, + 309, + 2844, + 309, + 493, + 286, + 500, + 380, + 14783 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.786, + "words": [ + { + "text": "Pack", + "start": 163.4, + "end": 163.6, + "confidence": 0.931 + }, + { + "text": "it", + "start": 163.6, + "end": 163.66, + "confidence": 0.88 + }, + { + "text": "pack", + "start": 163.66, + "end": 163.82, + "confidence": 0.298 + }, + { + "text": "it", + "start": 163.82, + "end": 164.04, + "confidence": 0.994 + }, + { + "text": "up", + "start": 164.04, + "end": 164.1, + "confidence": 0.977 + }, + { + "text": "I", + "start": 164.1, + "end": 164.2, + "confidence": 0.637 + }, + { + "text": "don't", + "start": 164.2, + "end": 164.54, + "confidence": 0.87 + }, + { + "text": "panic", + "start": 164.54, + "end": 164.58, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 14968, + "start": 164.58, + "end": 165.7, + "text": " Better batter up who the baddest", + "tokens": [ + 15753, + 4220, + 493, + 567, + 264, + 1578, + 23748 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.703, + "words": [ + { + "text": "Better", + "start": 164.58, + "end": 164.82, + "confidence": 0.627 + }, + { + "text": "batter", + "start": 164.82, + "end": 165.1, + "confidence": 0.265 + }, + { + "text": "up", + "start": 165.1, + "end": 165.24, + "confidence": 0.963 + }, + { + "text": "who", + "start": 165.24, + "end": 165.4, + "confidence": 0.861 + }, + { + "text": "the", + "start": 165.4, + "end": 165.52, + "confidence": 0.973 + }, + { + "text": "baddest", + "start": 165.52, + "end": 165.7, + "confidence": 0.797 + } + ] + }, + { + "id": 34, + "seek": 14968, + "start": 165.7, + "end": 166.74, + "text": " It don't matter cause we it's your", + "tokens": [ + 467, + 500, + 380, + 1871, + 3082, + 321, + 309, + 311, + 428 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.727, + "words": [ + { + "text": "It", + "start": 165.7, + "end": 165.9, + "confidence": 0.979 + }, + { + "text": "don't", + "start": 165.9, + "end": 166.14, + "confidence": 0.995 + }, + { + "text": "matter", + "start": 166.14, + "end": 166.18, + "confidence": 0.998 + }, + { + "text": "cause", + "start": 166.18, + "end": 166.4, + "confidence": 0.588 + }, + { + "text": "we", + "start": 166.4, + "end": 166.52, + "confidence": 0.973 + }, + { + "text": "it's", + "start": 166.52, + "end": 166.7, + "confidence": 0.404 + }, + { + "text": "your", + "start": 166.7, + "end": 166.74, + "confidence": 0.632 + } + ] + }, + { + "id": 35, + "seek": 14968, + "start": 166.8, + "end": 169.52, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.966, + "words": [ + { + "text": "Everybody", + "start": 166.8, + "end": 167.34, + "confidence": 0.896 + }, + { + "text": "wants", + "start": 167.34, + "end": 167.74, + "confidence": 0.97 + }, + { + "text": "to", + "start": 167.74, + "end": 167.94, + "confidence": 0.996 + }, + { + "text": "be", + "start": 167.94, + "end": 168.14, + "confidence": 0.996 + }, + { + "text": "my", + "start": 168.14, + "end": 168.46, + "confidence": 0.991 + }, + { + "text": "enemy", + "start": 168.46, + "end": 169.52, + "confidence": 0.95 + } + ] + }, + { + "id": 36, + "seek": 14968, + "start": 170.66, + "end": 172.2, + "text": " Spare the sympathy", + "tokens": [ + 1738, + 543, + 264, + 33240 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.687, + "words": [ + { + "text": "Spare", + "start": 170.66, + "end": 171.22, + "confidence": 0.494 + }, + { + "text": "the", + "start": 171.22, + "end": 171.46, + "confidence": 0.976 + }, + { + "text": "sympathy", + "start": 171.46, + "end": 172.2, + "confidence": 0.937 + } + ] + }, + { + "id": 37, + "seek": 14968, + "start": 172.96, + "end": 176.18, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.983, + "words": [ + { + "text": "Everybody", + "start": 172.96, + "end": 173.62, + "confidence": 0.994 + }, + { + "text": "wants", + "start": 173.62, + "end": 174.02, + "confidence": 0.99 + }, + { + "text": "to", + "start": 174.02, + "end": 174.36, + "confidence": 0.999 + }, + { + "text": "be", + "start": 174.36, + "end": 175.0, + "confidence": 0.999 + }, + { + "text": "my", + "start": 175.0, + "end": 175.4, + "confidence": 0.924 + }, + { + "text": "enemy", + "start": 175.4, + "end": 176.18, + "confidence": 0.996 + } + ] + }, + { + "id": 38, + "seek": 14968, + "start": 176.84, + "end": 178.44, + "text": " Oh the misery", + "tokens": [ + 876, + 264, + 32309 + ], + "temperature": 0.4, + "avg_logprob": -0.5499832056745698, + "compression_ratio": 1.8025078369905956, + "no_speech_prob": 0.8352975249290466, + "confidence": 0.813, + "words": [ + { + "text": "Oh", + "start": 176.84, + "end": 177.48, + "confidence": 0.677 + }, + { + "text": "the", + "start": 177.48, + "end": 177.66, + "confidence": 0.794 + }, + { + "text": "misery", + "start": 177.66, + "end": 178.44, + "confidence": 0.998 + } + ] + }, + { + "id": 39, + "seek": 17906, + "start": 179.28, + "end": 181.44, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.955, + "words": [ + { + "text": "Everybody", + "start": 179.28, + "end": 179.84, + "confidence": 0.802 + }, + { + "text": "wants", + "start": 179.84, + "end": 180.22, + "confidence": 0.984 + }, + { + "text": "to", + "start": 180.22, + "end": 180.44, + "confidence": 0.997 + }, + { + "text": "be", + "start": 180.44, + "end": 180.58, + "confidence": 0.998 + }, + { + "text": "my", + "start": 180.58, + "end": 180.86, + "confidence": 0.992 + }, + { + "text": "enemy", + "start": 180.86, + "end": 181.44, + "confidence": 0.976 + } + ] + }, + { + "id": 40, + "seek": 17906, + "start": 183.12, + "end": 184.52, + "text": " Spare the sympathy", + "tokens": [ + 1738, + 543, + 264, + 33240 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.678, + "words": [ + { + "text": "Spare", + "start": 183.12, + "end": 183.7, + "confidence": 0.479 + }, + { + "text": "the", + "start": 183.7, + "end": 183.92, + "confidence": 0.934 + }, + { + "text": "sympathy", + "start": 183.92, + "end": 184.52, + "confidence": 0.986 + } + ] + }, + { + "id": 41, + "seek": 17906, + "start": 185.56, + "end": 188.46, + "text": " Everybody wants to be my enemy", + "tokens": [ + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.959, + "words": [ + { + "text": "Everybody", + "start": 185.56, + "end": 186.08, + "confidence": 0.993 + }, + { + "text": "wants", + "start": 186.08, + "end": 186.46, + "confidence": 0.99 + }, + { + "text": "to", + "start": 186.46, + "end": 186.72, + "confidence": 0.999 + }, + { + "text": "be", + "start": 186.72, + "end": 187.22, + "confidence": 0.999 + }, + { + "text": "my", + "start": 187.22, + "end": 187.86, + "confidence": 0.798 + }, + { + "text": "enemy", + "start": 187.86, + "end": 188.46, + "confidence": 0.991 + } + ] + }, + { + "id": 42, + "seek": 17906, + "start": 190.1, + "end": 191.96, + "text": " I swear, I swear I'll never be a saint", + "tokens": [ + 286, + 11902, + 11, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.662, + "words": [ + { + "text": "I", + "start": 190.1, + "end": 190.14, + "confidence": 0.107 + }, + { + "text": "swear,", + "start": 190.14, + "end": 190.44, + "confidence": 0.908 + }, + { + "text": "I", + "start": 190.44, + "end": 190.48, + "confidence": 0.962 + }, + { + "text": "swear", + "start": 190.48, + "end": 190.52, + "confidence": 0.867 + }, + { + "text": "I'll", + "start": 190.52, + "end": 191.24, + "confidence": 0.628 + }, + { + "text": "never", + "start": 191.24, + "end": 191.28, + "confidence": 0.987 + }, + { + "text": "be", + "start": 191.28, + "end": 191.56, + "confidence": 0.993 + }, + { + "text": "a", + "start": 191.56, + "end": 191.76, + "confidence": 0.602 + }, + { + "text": "saint", + "start": 191.76, + "end": 191.96, + "confidence": 0.858 + } + ] + }, + { + "id": 43, + "seek": 17906, + "start": 192.12, + "end": 194.66, + "text": " I swear, my enemy", + "tokens": [ + 286, + 11902, + 11, + 452, + 5945 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.529, + "words": [ + { + "text": "I", + "start": 192.12, + "end": 192.62, + "confidence": 0.35 + }, + { + "text": "swear,", + "start": 192.62, + "end": 193.88, + "confidence": 0.83 + }, + { + "text": "my", + "start": 193.88, + "end": 194.02, + "confidence": 0.279 + }, + { + "text": "enemy", + "start": 194.02, + "end": 194.66, + "confidence": 0.964 + } + ] + }, + { + "id": 44, + "seek": 17906, + "start": 196.34, + "end": 198.36, + "text": " I swear, I swear I'll never be a saint", + "tokens": [ + 286, + 11902, + 11, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374 + ], + "temperature": 0.4, + "avg_logprob": -0.6545042613195995, + "compression_ratio": 1.97, + "no_speech_prob": 0.5597606301307678, + "confidence": 0.976, + "words": [ + { + "text": "I", + "start": 196.34, + "end": 196.38, + "confidence": 0.945 + }, + { + "text": "swear,", + "start": 196.38, + "end": 196.56, + "confidence": 0.999 + }, + { + "text": "I", + "start": 196.56, + "end": 196.6, + "confidence": 0.875 + }, + { + "text": "swear", + "start": 196.6, + "end": 196.84, + "confidence": 0.995 + }, + { + "text": "I'll", + "start": 196.84, + "end": 197.36, + "confidence": 0.982 + }, + { + "text": "never", + "start": 197.36, + "end": 197.5, + "confidence": 0.999 + }, + { + "text": "be", + "start": 197.5, + "end": 197.8, + "confidence": 0.999 + }, + { + "text": "a", + "start": 197.8, + "end": 198.04, + "confidence": 0.997 + }, + { + "text": "saint", + "start": 198.04, + "end": 198.36, + "confidence": 0.994 + } + ] + }, + { + "id": 45, + "seek": 19822, + "start": 198.44, + "end": 200.22, + "text": " You got to be yourself!", + "tokens": [ + 50364, + 509, + 658, + 281, + 312, + 1803, + 0, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.8683164384629991, + "compression_ratio": 0.7419354838709677, + "no_speech_prob": 0.6954998970031738, + "confidence": 0.365, + "words": [ + { + "text": "You", + "start": 198.44, + "end": 198.62, + "confidence": 0.054 + }, + { + "text": "got", + "start": 198.62, + "end": 198.8, + "confidence": 0.163 + }, + { + "text": "to", + "start": 198.8, + "end": 198.98, + "confidence": 0.918 + }, + { + "text": "be", + "start": 198.98, + "end": 199.06, + "confidence": 0.87 + }, + { + "text": "yourself!", + "start": 199.06, + "end": 200.22, + "confidence": 0.913 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/nocond_music.mp4.words.json b/tests/expected/corner_cases.cpu/nocond_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..ddd3ddeb2eb18ea98bb99dffe65159d92a046d85 --- /dev/null +++ b/tests/expected/corner_cases.cpu/nocond_music.mp4.words.json @@ -0,0 +1,2687 @@ +{ + "text": " I Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself But I'm ready Your words up on the wall as you're praying for my phone And the laughter in the holes and the names that I've been called I stack it in my mind and I'm waiting for the time When I show you what it's like to be worse but in the mind Tell you you're the greatest But once you turn they hate us Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay I'm hoping that somebody pray for me I'm praying that somebody hold for me. I'm staying where nobody's supposed to be. I propose to be in a wreck of emotions. Ready to go whenever you let me know. The road is long, so put the pedal into the flow. The enemy on my trail, my energy unavailable. I'ma tell them I said away, go away. When I'm plotting, I'ma drive to the top. I've been out of shape, thinking out of the box. I'm an astronaut, blasted off the planet. Rock the cars, catastrophic, and it matters more because I had it now. Had I thought about wreaking havoc on an opposition. Kind of shocking, they want it static. With precision, I'm automatic. Quarterback, I ain't talking second. Pack it, pack it up. I don't panic, better, better. Up who the baddest. it don't matter cause we is your enemy. I swear I'll never be insane You got to be yourself", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.02, + "end": 0.4, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.9367842674255371, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7794302701950073, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 0.02, + "end": 0.4, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.02, + "end": 69.26, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.669, + "words": [ + { + "text": "Oh,", + "start": 60.02, + "end": 65.44, + "confidence": 0.084 + }, + { + "text": "the", + "start": 65.44, + "end": 65.48, + "confidence": 0.796 + }, + { + "text": "misery", + "start": 65.48, + "end": 66.08, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 66.08, + "end": 67.62, + "confidence": 0.431 + }, + { + "text": "wants", + "start": 67.62, + "end": 68.0, + "confidence": 0.983 + }, + { + "text": "to", + "start": 68.0, + "end": 68.2, + "confidence": 0.993 + }, + { + "text": "be", + "start": 68.2, + "end": 68.4, + "confidence": 0.996 + }, + { + "text": "my", + "start": 68.4, + "end": 68.74, + "confidence": 0.984 + }, + { + "text": "enemy", + "start": 68.74, + "end": 69.26, + "confidence": 0.984 + } + ] + }, + { + "id": 2, + "seek": 6000, + "start": 70.9, + "end": 76.38, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.934, + "words": [ + { + "text": "Spare", + "start": 70.9, + "end": 71.4, + "confidence": 0.812 + }, + { + "text": "the", + "start": 71.4, + "end": 71.74, + "confidence": 0.968 + }, + { + "text": "sympathy", + "start": 71.74, + "end": 72.58, + "confidence": 0.993 + }, + { + "text": "Everybody", + "start": 72.58, + "end": 73.88, + "confidence": 0.962 + }, + { + "text": "wants", + "start": 73.88, + "end": 74.24, + "confidence": 0.994 + }, + { + "text": "to", + "start": 74.24, + "end": 74.42, + "confidence": 0.999 + }, + { + "text": "be", + "start": 74.42, + "end": 75.36, + "confidence": 0.999 + }, + { + "text": "my", + "start": 75.36, + "end": 75.68, + "confidence": 0.837 + }, + { + "text": "enemy", + "start": 75.68, + "end": 76.38, + "confidence": 0.997 + } + ] + }, + { + "id": 3, + "seek": 6000, + "start": 79.94, + "end": 82.58, + "text": " Look out for yourself My enemy", + "tokens": [ + 2053, + 484, + 337, + 1803, + 1222, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.774, + "words": [ + { + "text": "Look", + "start": 79.94, + "end": 80.14, + "confidence": 0.43 + }, + { + "text": "out", + "start": 80.14, + "end": 80.46, + "confidence": 0.991 + }, + { + "text": "for", + "start": 80.46, + "end": 80.6, + "confidence": 0.989 + }, + { + "text": "yourself", + "start": 80.6, + "end": 81.24, + "confidence": 0.97 + }, + { + "text": "My", + "start": 81.24, + "end": 81.88, + "confidence": 0.536 + }, + { + "text": "enemy", + "start": 81.88, + "end": 82.58, + "confidence": 0.98 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 86.18, + "end": 88.46, + "text": " Look out for yourself But I'm ready", + "tokens": [ + 2053, + 484, + 337, + 1803, + 583, + 286, + 478, + 1919 + ], + "temperature": 0.0, + "avg_logprob": -0.45698386972600763, + "compression_ratio": 1.62, + "no_speech_prob": 0.8689420819282532, + "confidence": 0.927, + "words": [ + { + "text": "Look", + "start": 86.18, + "end": 86.4, + "confidence": 0.747 + }, + { + "text": "out", + "start": 86.4, + "end": 86.64, + "confidence": 0.988 + }, + { + "text": "for", + "start": 86.64, + "end": 86.82, + "confidence": 0.998 + }, + { + "text": "yourself", + "start": 86.82, + "end": 87.62, + "confidence": 0.999 + }, + { + "text": "But", + "start": 87.62, + "end": 87.88, + "confidence": 0.764 + }, + { + "text": "I'm", + "start": 87.88, + "end": 88.12, + "confidence": 0.989 + }, + { + "text": "ready", + "start": 88.12, + "end": 88.46, + "confidence": 0.995 + } + ] + }, + { + "id": 5, + "seek": 8856, + "start": 88.58, + "end": 91.58, + "text": " Your words up on the wall as you're praying for my phone", + "tokens": [ + 2260, + 2283, + 493, + 322, + 264, + 2929, + 382, + 291, + 434, + 15611, + 337, + 452, + 2593 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.82, + "words": [ + { + "text": "Your", + "start": 88.58, + "end": 88.86, + "confidence": 0.549 + }, + { + "text": "words", + "start": 88.86, + "end": 89.12, + "confidence": 0.921 + }, + { + "text": "up", + "start": 89.12, + "end": 89.46, + "confidence": 0.965 + }, + { + "text": "on", + "start": 89.46, + "end": 89.6, + "confidence": 0.995 + }, + { + "text": "the", + "start": 89.6, + "end": 89.66, + "confidence": 0.991 + }, + { + "text": "wall", + "start": 89.66, + "end": 90.12, + "confidence": 0.88 + }, + { + "text": "as", + "start": 90.12, + "end": 90.34, + "confidence": 0.403 + }, + { + "text": "you're", + "start": 90.34, + "end": 90.8, + "confidence": 0.828 + }, + { + "text": "praying", + "start": 90.8, + "end": 90.84, + "confidence": 0.967 + }, + { + "text": "for", + "start": 90.84, + "end": 91.06, + "confidence": 0.97 + }, + { + "text": "my", + "start": 91.06, + "end": 91.24, + "confidence": 0.989 + }, + { + "text": "phone", + "start": 91.24, + "end": 91.58, + "confidence": 0.702 + } + ] + }, + { + "id": 6, + "seek": 8856, + "start": 91.78, + "end": 94.68, + "text": " And the laughter in the holes and the names that I've been called", + "tokens": [ + 400, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 286, + 600, + 668, + 1219 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.885, + "words": [ + { + "text": "And", + "start": 91.78, + "end": 91.92, + "confidence": 0.844 + }, + { + "text": "the", + "start": 91.92, + "end": 92.0, + "confidence": 0.987 + }, + { + "text": "laughter", + "start": 92.0, + "end": 92.4, + "confidence": 0.994 + }, + { + "text": "in", + "start": 92.4, + "end": 92.72, + "confidence": 0.849 + }, + { + "text": "the", + "start": 92.72, + "end": 92.92, + "confidence": 0.997 + }, + { + "text": "holes", + "start": 92.92, + "end": 93.2, + "confidence": 0.609 + }, + { + "text": "and", + "start": 93.2, + "end": 93.44, + "confidence": 0.596 + }, + { + "text": "the", + "start": 93.44, + "end": 93.54, + "confidence": 0.991 + }, + { + "text": "names", + "start": 93.54, + "end": 93.82, + "confidence": 0.991 + }, + { + "text": "that", + "start": 93.82, + "end": 94.12, + "confidence": 0.957 + }, + { + "text": "I've", + "start": 94.12, + "end": 94.28, + "confidence": 0.915 + }, + { + "text": "been", + "start": 94.28, + "end": 94.42, + "confidence": 0.986 + }, + { + "text": "called", + "start": 94.42, + "end": 94.68, + "confidence": 0.912 + } + ] + }, + { + "id": 7, + "seek": 8856, + "start": 95.0, + "end": 97.82, + "text": " I stack it in my mind and I'm waiting for the time", + "tokens": [ + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 286, + 478, + 3806, + 337, + 264, + 565 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.96, + "words": [ + { + "text": "I", + "start": 95.0, + "end": 95.12, + "confidence": 0.987 + }, + { + "text": "stack", + "start": 95.12, + "end": 95.42, + "confidence": 0.825 + }, + { + "text": "it", + "start": 95.42, + "end": 95.7, + "confidence": 0.995 + }, + { + "text": "in", + "start": 95.7, + "end": 95.82, + "confidence": 0.996 + }, + { + "text": "my", + "start": 95.82, + "end": 96.02, + "confidence": 0.994 + }, + { + "text": "mind", + "start": 96.02, + "end": 96.4, + "confidence": 0.999 + }, + { + "text": "and", + "start": 96.4, + "end": 96.62, + "confidence": 0.762 + }, + { + "text": "I'm", + "start": 96.62, + "end": 97.02, + "confidence": 0.991 + }, + { + "text": "waiting", + "start": 97.02, + "end": 97.06, + "confidence": 0.988 + }, + { + "text": "for", + "start": 97.06, + "end": 97.32, + "confidence": 0.996 + }, + { + "text": "the", + "start": 97.32, + "end": 97.5, + "confidence": 0.996 + }, + { + "text": "time", + "start": 97.5, + "end": 97.82, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 8856, + "start": 97.98, + "end": 101.24, + "text": " When I show you what it's like to be worse but in the mind", + "tokens": [ + 1133, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 457, + 294, + 264, + 1575 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.755, + "words": [ + { + "text": "When", + "start": 97.98, + "end": 98.16, + "confidence": 0.923 + }, + { + "text": "I", + "start": 98.16, + "end": 98.22, + "confidence": 0.993 + }, + { + "text": "show", + "start": 98.22, + "end": 98.48, + "confidence": 0.959 + }, + { + "text": "you", + "start": 98.48, + "end": 98.74, + "confidence": 0.994 + }, + { + "text": "what", + "start": 98.74, + "end": 98.96, + "confidence": 0.996 + }, + { + "text": "it's", + "start": 98.96, + "end": 99.38, + "confidence": 0.988 + }, + { + "text": "like", + "start": 99.38, + "end": 99.42, + "confidence": 0.998 + }, + { + "text": "to", + "start": 99.42, + "end": 99.68, + "confidence": 0.877 + }, + { + "text": "be", + "start": 99.68, + "end": 99.78, + "confidence": 0.997 + }, + { + "text": "worse", + "start": 99.78, + "end": 100.04, + "confidence": 0.444 + }, + { + "text": "but", + "start": 100.04, + "end": 100.36, + "confidence": 0.196 + }, + { + "text": "in", + "start": 100.36, + "end": 100.52, + "confidence": 0.862 + }, + { + "text": "the", + "start": 100.52, + "end": 100.8, + "confidence": 0.426 + }, + { + "text": "mind", + "start": 100.8, + "end": 101.24, + "confidence": 0.621 + } + ] + }, + { + "id": 9, + "seek": 8856, + "start": 101.32, + "end": 106.28, + "text": " Tell you you're the greatest", + "tokens": [ + 5115, + 291, + 291, + 434, + 264, + 6636 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.94, + "words": [ + { + "text": "Tell", + "start": 101.32, + "end": 102.08, + "confidence": 0.937 + }, + { + "text": "you", + "start": 102.08, + "end": 102.68, + "confidence": 0.993 + }, + { + "text": "you're", + "start": 102.68, + "end": 103.66, + "confidence": 0.873 + }, + { + "text": "the", + "start": 103.66, + "end": 104.14, + "confidence": 0.985 + }, + { + "text": "greatest", + "start": 104.14, + "end": 106.28, + "confidence": 0.986 + } + ] + }, + { + "id": 10, + "seek": 8856, + "start": 107.18, + "end": 113.0, + "text": " But once you turn they hate us", + "tokens": [ + 583, + 1564, + 291, + 1261, + 436, + 4700, + 505 + ], + "temperature": 0.0, + "avg_logprob": -0.2876515737394007, + "compression_ratio": 1.5869565217391304, + "no_speech_prob": 0.7907973527908325, + "confidence": 0.909, + "words": [ + { + "text": "But", + "start": 107.18, + "end": 107.52, + "confidence": 0.963 + }, + { + "text": "once", + "start": 107.52, + "end": 108.34, + "confidence": 0.79 + }, + { + "text": "you", + "start": 108.34, + "end": 108.9, + "confidence": 0.998 + }, + { + "text": "turn", + "start": 108.9, + "end": 109.84, + "confidence": 0.913 + }, + { + "text": "they", + "start": 109.84, + "end": 110.34, + "confidence": 0.754 + }, + { + "text": "hate", + "start": 110.34, + "end": 111.9, + "confidence": 0.985 + }, + { + "text": "us", + "start": 111.9, + "end": 113.0, + "confidence": 0.999 + } + ] + }, + { + "id": 11, + "seek": 11312, + "start": 113.14, + "end": 119.18, + "text": " Oh, the misery Everybody wants to be my enemy", + "tokens": [ + 876, + 11, + 264, + 32309, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.804, + "words": [ + { + "text": "Oh,", + "start": 113.14, + "end": 115.14, + "confidence": 0.317 + }, + { + "text": "the", + "start": 115.14, + "end": 115.34, + "confidence": 0.964 + }, + { + "text": "misery", + "start": 115.34, + "end": 116.08, + "confidence": 0.995 + }, + { + "text": "Everybody", + "start": 116.08, + "end": 117.48, + "confidence": 0.482 + }, + { + "text": "wants", + "start": 117.48, + "end": 117.86, + "confidence": 0.986 + }, + { + "text": "to", + "start": 117.86, + "end": 118.08, + "confidence": 0.995 + }, + { + "text": "be", + "start": 118.08, + "end": 118.26, + "confidence": 0.998 + }, + { + "text": "my", + "start": 118.26, + "end": 118.58, + "confidence": 0.993 + }, + { + "text": "enemy", + "start": 118.58, + "end": 119.18, + "confidence": 0.983 + } + ] + }, + { + "id": 12, + "seek": 11312, + "start": 119.94, + "end": 126.08, + "text": " Spare the sympathy Everybody wants to be my enemy", + "tokens": [ + 1738, + 543, + 264, + 33240, + 7646, + 2738, + 281, + 312, + 452, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.874, + "words": [ + { + "text": "Spare", + "start": 119.94, + "end": 121.26, + "confidence": 0.576 + }, + { + "text": "the", + "start": 121.26, + "end": 121.62, + "confidence": 0.985 + }, + { + "text": "sympathy", + "start": 121.62, + "end": 122.32, + "confidence": 0.994 + }, + { + "text": "Everybody", + "start": 122.32, + "end": 123.76, + "confidence": 0.969 + }, + { + "text": "wants", + "start": 123.76, + "end": 124.12, + "confidence": 0.994 + }, + { + "text": "to", + "start": 124.12, + "end": 124.6, + "confidence": 0.999 + }, + { + "text": "be", + "start": 124.6, + "end": 125.34, + "confidence": 0.999 + }, + { + "text": "my", + "start": 125.34, + "end": 125.58, + "confidence": 0.835 + }, + { + "text": "enemy", + "start": 125.58, + "end": 126.08, + "confidence": 0.998 + } + ] + }, + { + "id": 13, + "seek": 11312, + "start": 129.86, + "end": 132.32, + "text": " Look out for yourself My enemy", + "tokens": [ + 2053, + 484, + 337, + 1803, + 1222, + 5945 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.794, + "words": [ + { + "text": "Look", + "start": 129.86, + "end": 130.08, + "confidence": 0.592 + }, + { + "text": "out", + "start": 130.08, + "end": 130.3, + "confidence": 0.892 + }, + { + "text": "for", + "start": 130.3, + "end": 130.44, + "confidence": 0.978 + }, + { + "text": "yourself", + "start": 130.44, + "end": 130.92, + "confidence": 0.988 + }, + { + "text": "My", + "start": 130.92, + "end": 131.74, + "confidence": 0.496 + }, + { + "text": "enemy", + "start": 131.74, + "end": 132.32, + "confidence": 0.989 + } + ] + }, + { + "id": 14, + "seek": 11312, + "start": 133.7, + "end": 137.14, + "text": " Look out for yourself", + "tokens": [ + 2053, + 484, + 337, + 1803 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.97, + "words": [ + { + "text": "Look", + "start": 133.7, + "end": 134.62, + "confidence": 0.907 + }, + { + "text": "out", + "start": 134.62, + "end": 136.52, + "confidence": 0.978 + }, + { + "text": "for", + "start": 136.52, + "end": 136.66, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.66, + "end": 137.14, + "confidence": 0.999 + } + ] + }, + { + "id": 15, + "seek": 11312, + "start": 137.64, + "end": 140.16, + "text": " Look, okay I'm hoping that somebody pray for me", + "tokens": [ + 2053, + 11, + 1392, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385 + ], + "temperature": 0.0, + "avg_logprob": -0.2736021077857827, + "compression_ratio": 1.6610169491525424, + "no_speech_prob": 0.8637643456459045, + "confidence": 0.786, + "words": [ + { + "text": "Look,", + "start": 137.64, + "end": 138.46, + "confidence": 0.727 + }, + { + "text": "okay", + "start": 138.46, + "end": 138.62, + "confidence": 0.727 + }, + { + "text": "I'm", + "start": 138.62, + "end": 138.88, + "confidence": 0.698 + }, + { + "text": "hoping", + "start": 138.88, + "end": 139.14, + "confidence": 0.501 + }, + { + "text": "that", + "start": 139.14, + "end": 139.34, + "confidence": 0.958 + }, + { + "text": "somebody", + "start": 139.34, + "end": 139.52, + "confidence": 0.981 + }, + { + "text": "pray", + "start": 139.52, + "end": 139.8, + "confidence": 0.821 + }, + { + "text": "for", + "start": 139.8, + "end": 140.04, + "confidence": 0.9 + }, + { + "text": "me", + "start": 140.04, + "end": 140.16, + "confidence": 0.999 + } + ] + }, + { + "id": 16, + "seek": 14012, + "start": 140.18, + "end": 141.7, + "text": " I'm praying that somebody hold for me.", + "tokens": [ + 286, + 478, + 15611, + 300, + 2618, + 1797, + 337, + 385, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.795, + "words": [ + { + "text": "I'm", + "start": 140.18, + "end": 140.38, + "confidence": 0.819 + }, + { + "text": "praying", + "start": 140.38, + "end": 140.62, + "confidence": 0.694 + }, + { + "text": "that", + "start": 140.62, + "end": 140.82, + "confidence": 0.786 + }, + { + "text": "somebody", + "start": 140.82, + "end": 141.08, + "confidence": 0.985 + }, + { + "text": "hold", + "start": 141.08, + "end": 141.32, + "confidence": 0.447 + }, + { + "text": "for", + "start": 141.32, + "end": 141.52, + "confidence": 0.993 + }, + { + "text": "me.", + "start": 141.52, + "end": 141.7, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 14012, + "start": 141.72, + "end": 143.43, + "text": " I'm staying where nobody's supposed to be.", + "tokens": [ + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.853, + "words": [ + { + "text": "I'm", + "start": 141.72, + "end": 141.9, + "confidence": 0.976 + }, + { + "text": "staying", + "start": 141.9, + "end": 142.12, + "confidence": 0.708 + }, + { + "text": "where", + "start": 142.12, + "end": 142.3, + "confidence": 0.906 + }, + { + "text": "nobody's", + "start": 142.3, + "end": 142.88, + "confidence": 0.654 + }, + { + "text": "supposed", + "start": 142.88, + "end": 142.92, + "confidence": 0.924 + }, + { + "text": "to", + "start": 142.92, + "end": 143.12, + "confidence": 0.992 + }, + { + "text": "be.", + "start": 143.12, + "end": 143.43, + "confidence": 0.997 + } + ] + }, + { + "id": 18, + "seek": 14012, + "start": 143.43, + "end": 145.29, + "text": " I propose to be in a wreck of emotions.", + "tokens": [ + 286, + 17421, + 281, + 312, + 294, + 257, + 21478, + 295, + 8462, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.645, + "words": [ + { + "text": "I", + "start": 143.43, + "end": 143.47, + "confidence": 0.492 + }, + { + "text": "propose", + "start": 143.47, + "end": 143.66, + "confidence": 0.332 + }, + { + "text": "to", + "start": 143.66, + "end": 143.84, + "confidence": 0.78 + }, + { + "text": "be", + "start": 143.84, + "end": 144.04, + "confidence": 0.442 + }, + { + "text": "in", + "start": 144.04, + "end": 144.16, + "confidence": 0.81 + }, + { + "text": "a", + "start": 144.16, + "end": 144.24, + "confidence": 0.555 + }, + { + "text": "wreck", + "start": 144.24, + "end": 144.36, + "confidence": 0.81 + }, + { + "text": "of", + "start": 144.36, + "end": 144.54, + "confidence": 0.99 + }, + { + "text": "emotions.", + "start": 144.54, + "end": 145.29, + "confidence": 0.953 + } + ] + }, + { + "id": 19, + "seek": 14012, + "start": 145.29, + "end": 146.56, + "text": " Ready to go whenever you let me know.", + "tokens": [ + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.834, + "words": [ + { + "text": "Ready", + "start": 145.29, + "end": 145.33, + "confidence": 0.951 + }, + { + "text": "to", + "start": 145.33, + "end": 145.5, + "confidence": 0.997 + }, + { + "text": "go", + "start": 145.5, + "end": 145.56, + "confidence": 0.997 + }, + { + "text": "whenever", + "start": 145.56, + "end": 145.76, + "confidence": 0.659 + }, + { + "text": "you", + "start": 145.76, + "end": 145.92, + "confidence": 0.403 + }, + { + "text": "let", + "start": 145.92, + "end": 146.04, + "confidence": 0.952 + }, + { + "text": "me", + "start": 146.04, + "end": 146.16, + "confidence": 0.998 + }, + { + "text": "know.", + "start": 146.16, + "end": 146.56, + "confidence": 0.977 + } + ] + }, + { + "id": 20, + "seek": 14012, + "start": 146.56, + "end": 147.86, + "text": " The road is long, so put the pedal into the flow.", + "tokens": [ + 440, + 3060, + 307, + 938, + 11, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.959, + "words": [ + { + "text": "The", + "start": 146.56, + "end": 146.6, + "confidence": 0.99 + }, + { + "text": "road", + "start": 146.6, + "end": 146.66, + "confidence": 0.99 + }, + { + "text": "is", + "start": 146.66, + "end": 146.74, + "confidence": 0.987 + }, + { + "text": "long,", + "start": 146.74, + "end": 146.96, + "confidence": 0.961 + }, + { + "text": "so", + "start": 146.96, + "end": 147.02, + "confidence": 0.968 + }, + { + "text": "put", + "start": 147.02, + "end": 147.14, + "confidence": 0.954 + }, + { + "text": "the", + "start": 147.14, + "end": 147.28, + "confidence": 0.972 + }, + { + "text": "pedal", + "start": 147.28, + "end": 147.38, + "confidence": 0.993 + }, + { + "text": "into", + "start": 147.38, + "end": 147.58, + "confidence": 0.898 + }, + { + "text": "the", + "start": 147.58, + "end": 147.74, + "confidence": 0.992 + }, + { + "text": "flow.", + "start": 147.74, + "end": 147.86, + "confidence": 0.857 + } + ] + }, + { + "id": 21, + "seek": 14012, + "start": 147.94, + "end": 150.22, + "text": " The enemy on my trail, my energy unavailable.", + "tokens": [ + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.949, + "words": [ + { + "text": "The", + "start": 147.94, + "end": 148.06, + "confidence": 0.968 + }, + { + "text": "enemy", + "start": 148.06, + "end": 148.24, + "confidence": 0.727 + }, + { + "text": "on", + "start": 148.24, + "end": 148.44, + "confidence": 0.974 + }, + { + "text": "my", + "start": 148.44, + "end": 148.5, + "confidence": 0.974 + }, + { + "text": "trail,", + "start": 148.5, + "end": 148.72, + "confidence": 0.986 + }, + { + "text": "my", + "start": 148.72, + "end": 148.82, + "confidence": 0.99 + }, + { + "text": "energy", + "start": 148.82, + "end": 149.06, + "confidence": 0.996 + }, + { + "text": "unavailable.", + "start": 149.06, + "end": 150.22, + "confidence": 0.978 + } + ] + }, + { + "id": 22, + "seek": 14012, + "start": 150.22, + "end": 151.35, + "text": " I'ma tell them I said away, go away.", + "tokens": [ + 286, + 478, + 64, + 980, + 552, + 286, + 848, + 1314, + 11, + 352, + 1314, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.571, + "words": [ + { + "text": "I'ma", + "start": 150.22, + "end": 150.26, + "confidence": 0.767 + }, + { + "text": "tell", + "start": 150.26, + "end": 150.36, + "confidence": 0.989 + }, + { + "text": "them", + "start": 150.36, + "end": 150.5, + "confidence": 0.336 + }, + { + "text": "I", + "start": 150.5, + "end": 150.58, + "confidence": 0.358 + }, + { + "text": "said", + "start": 150.58, + "end": 150.74, + "confidence": 0.3 + }, + { + "text": "away,", + "start": 150.74, + "end": 151.14, + "confidence": 0.323 + }, + { + "text": "go", + "start": 151.14, + "end": 151.18, + "confidence": 0.911 + }, + { + "text": "away.", + "start": 151.18, + "end": 151.35, + "confidence": 0.781 + } + ] + }, + { + "id": 23, + "seek": 14012, + "start": 151.35, + "end": 152.78, + "text": " When I'm plotting, I'ma drive to the top.", + "tokens": [ + 1133, + 286, + 478, + 41178, + 11, + 286, + 478, + 64, + 3332, + 281, + 264, + 1192, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.645, + "words": [ + { + "text": "When", + "start": 151.35, + "end": 151.54, + "confidence": 0.884 + }, + { + "text": "I'm", + "start": 151.54, + "end": 151.72, + "confidence": 0.594 + }, + { + "text": "plotting,", + "start": 151.72, + "end": 151.92, + "confidence": 0.367 + }, + { + "text": "I'ma", + "start": 151.92, + "end": 152.04, + "confidence": 0.488 + }, + { + "text": "drive", + "start": 152.04, + "end": 152.14, + "confidence": 0.621 + }, + { + "text": "to", + "start": 152.14, + "end": 152.32, + "confidence": 0.984 + }, + { + "text": "the", + "start": 152.32, + "end": 152.38, + "confidence": 0.997 + }, + { + "text": "top.", + "start": 152.38, + "end": 152.78, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 14012, + "start": 152.78, + "end": 153.76, + "text": " I've been out of shape, thinking out of the box.", + "tokens": [ + 286, + 600, + 668, + 484, + 295, + 3909, + 11, + 1953, + 484, + 295, + 264, + 2424, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.769, + "words": [ + { + "text": "I've", + "start": 152.78, + "end": 152.82, + "confidence": 0.721 + }, + { + "text": "been", + "start": 152.82, + "end": 152.86, + "confidence": 0.998 + }, + { + "text": "out", + "start": 152.86, + "end": 152.9, + "confidence": 0.905 + }, + { + "text": "of", + "start": 152.9, + "end": 152.96, + "confidence": 0.986 + }, + { + "text": "shape,", + "start": 152.96, + "end": 153.12, + "confidence": 0.998 + }, + { + "text": "thinking", + "start": 153.12, + "end": 153.3, + "confidence": 0.842 + }, + { + "text": "out", + "start": 153.3, + "end": 153.48, + "confidence": 0.347 + }, + { + "text": "of", + "start": 153.48, + "end": 153.54, + "confidence": 0.507 + }, + { + "text": "the", + "start": 153.54, + "end": 153.58, + "confidence": 0.819 + }, + { + "text": "box.", + "start": 153.58, + "end": 153.76, + "confidence": 0.998 + } + ] + }, + { + "id": 25, + "seek": 14012, + "start": 153.76, + "end": 155.34, + "text": " I'm an astronaut, blasted off the planet.", + "tokens": [ + 286, + 478, + 364, + 18516, + 11, + 12035, + 292, + 766, + 264, + 5054, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.967, + "words": [ + { + "text": "I'm", + "start": 153.76, + "end": 153.9, + "confidence": 0.996 + }, + { + "text": "an", + "start": 153.9, + "end": 153.98, + "confidence": 0.996 + }, + { + "text": "astronaut,", + "start": 153.98, + "end": 154.68, + "confidence": 0.991 + }, + { + "text": "blasted", + "start": 154.68, + "end": 154.86, + "confidence": 0.895 + }, + { + "text": "off", + "start": 154.86, + "end": 155.0, + "confidence": 0.987 + }, + { + "text": "the", + "start": 155.0, + "end": 155.16, + "confidence": 0.959 + }, + { + "text": "planet.", + "start": 155.16, + "end": 155.34, + "confidence": 0.997 + } + ] + }, + { + "id": 26, + "seek": 14012, + "start": 155.36, + "end": 157.11, + "text": " Rock the cars, catastrophic, and it matters more", + "tokens": [ + 6922, + 264, + 5163, + 11, + 34915, + 11, + 293, + 309, + 7001, + 544 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.703, + "words": [ + { + "text": "Rock", + "start": 155.36, + "end": 155.56, + "confidence": 0.563 + }, + { + "text": "the", + "start": 155.56, + "end": 155.7, + "confidence": 0.504 + }, + { + "text": "cars,", + "start": 155.7, + "end": 156.36, + "confidence": 0.472 + }, + { + "text": "catastrophic,", + "start": 156.36, + "end": 156.4, + "confidence": 0.77 + }, + { + "text": "and", + "start": 156.4, + "end": 156.62, + "confidence": 0.925 + }, + { + "text": "it", + "start": 156.62, + "end": 156.7, + "confidence": 0.73 + }, + { + "text": "matters", + "start": 156.7, + "end": 156.94, + "confidence": 0.874 + }, + { + "text": "more", + "start": 156.94, + "end": 157.11, + "confidence": 0.973 + } + ] + }, + { + "id": 27, + "seek": 14012, + "start": 157.11, + "end": 157.93, + "text": " because I had it now.", + "tokens": [ + 570, + 286, + 632, + 309, + 586, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.648, + "words": [ + { + "text": "because", + "start": 157.11, + "end": 157.38, + "confidence": 0.99 + }, + { + "text": "I", + "start": 157.38, + "end": 157.56, + "confidence": 0.528 + }, + { + "text": "had", + "start": 157.56, + "end": 157.68, + "confidence": 0.935 + }, + { + "text": "it", + "start": 157.68, + "end": 157.76, + "confidence": 0.984 + }, + { + "text": "now.", + "start": 157.76, + "end": 157.93, + "confidence": 0.237 + } + ] + }, + { + "id": 28, + "seek": 14012, + "start": 157.93, + "end": 160.1, + "text": " Had I thought about wreaking havoc on an opposition.", + "tokens": [ + 12298, + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.955, + "words": [ + { + "text": "Had", + "start": 157.93, + "end": 158.12, + "confidence": 0.852 + }, + { + "text": "I", + "start": 158.12, + "end": 158.2, + "confidence": 0.971 + }, + { + "text": "thought", + "start": 158.2, + "end": 158.4, + "confidence": 0.991 + }, + { + "text": "about", + "start": 158.4, + "end": 158.58, + "confidence": 0.995 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.84, + "confidence": 0.996 + }, + { + "text": "havoc", + "start": 158.84, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.28, + "confidence": 0.861 + }, + { + "text": "an", + "start": 159.28, + "end": 159.44, + "confidence": 0.913 + }, + { + "text": "opposition.", + "start": 159.44, + "end": 160.1, + "confidence": 0.991 + } + ] + }, + { + "id": 29, + "seek": 14012, + "start": 160.1, + "end": 161.04, + "text": " Kind of shocking, they want it static.", + "tokens": [ + 9242, + 295, + 18776, + 11, + 436, + 528, + 309, + 13437, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.666, + "words": [ + { + "text": "Kind", + "start": 160.1, + "end": 160.14, + "confidence": 0.663 + }, + { + "text": "of", + "start": 160.14, + "end": 160.22, + "confidence": 0.995 + }, + { + "text": "shocking,", + "start": 160.22, + "end": 160.5, + "confidence": 0.656 + }, + { + "text": "they", + "start": 160.5, + "end": 160.58, + "confidence": 0.366 + }, + { + "text": "want", + "start": 160.58, + "end": 160.74, + "confidence": 0.573 + }, + { + "text": "it", + "start": 160.74, + "end": 160.84, + "confidence": 0.648 + }, + { + "text": "static.", + "start": 160.84, + "end": 161.04, + "confidence": 0.993 + } + ] + }, + { + "id": 30, + "seek": 14012, + "start": 161.06, + "end": 162.14, + "text": " With precision, I'm automatic.", + "tokens": [ + 2022, + 18356, + 11, + 286, + 478, + 12509, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.875, + "words": [ + { + "text": "With", + "start": 161.06, + "end": 161.24, + "confidence": 0.59 + }, + { + "text": "precision,", + "start": 161.24, + "end": 161.72, + "confidence": 0.904 + }, + { + "text": "I'm", + "start": 161.72, + "end": 161.78, + "confidence": 0.987 + }, + { + "text": "automatic.", + "start": 161.78, + "end": 162.14, + "confidence": 0.987 + } + ] + }, + { + "id": 31, + "seek": 14012, + "start": 162.24, + "end": 163.36, + "text": " Quarterback, I ain't talking second.", + "tokens": [ + 43794, + 3207, + 11, + 286, + 7862, + 380, + 1417, + 1150, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.688, + "words": [ + { + "text": "Quarterback,", + "start": 162.24, + "end": 162.66, + "confidence": 0.513 + }, + { + "text": "I", + "start": 162.66, + "end": 162.78, + "confidence": 0.965 + }, + { + "text": "ain't", + "start": 162.78, + "end": 162.86, + "confidence": 0.99 + }, + { + "text": "talking", + "start": 162.86, + "end": 163.1, + "confidence": 0.892 + }, + { + "text": "second.", + "start": 163.1, + "end": 163.36, + "confidence": 0.328 + } + ] + }, + { + "id": 32, + "seek": 14012, + "start": 163.4, + "end": 164.17, + "text": " Pack it, pack it up.", + "tokens": [ + 18466, + 309, + 11, + 2844, + 309, + 493, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.973, + "words": [ + { + "text": "Pack", + "start": 163.4, + "end": 163.6, + "confidence": 0.968 + }, + { + "text": "it,", + "start": 163.6, + "end": 163.68, + "confidence": 0.937 + }, + { + "text": "pack", + "start": 163.68, + "end": 163.84, + "confidence": 0.986 + }, + { + "text": "it", + "start": 163.84, + "end": 163.98, + "confidence": 0.999 + }, + { + "text": "up.", + "start": 163.98, + "end": 164.17, + "confidence": 0.979 + } + ] + }, + { + "id": 33, + "seek": 14012, + "start": 164.17, + "end": 165.1, + "text": " I don't panic, better, better.", + "tokens": [ + 286, + 500, + 380, + 14783, + 11, + 1101, + 11, + 1101, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.76, + "words": [ + { + "text": "I", + "start": 164.17, + "end": 164.21, + "confidence": 0.984 + }, + { + "text": "don't", + "start": 164.21, + "end": 164.34, + "confidence": 0.913 + }, + { + "text": "panic,", + "start": 164.34, + "end": 164.8, + "confidence": 0.999 + }, + { + "text": "better,", + "start": 164.8, + "end": 164.84, + "confidence": 0.423 + }, + { + "text": "better.", + "start": 164.84, + "end": 165.1, + "confidence": 0.556 + } + ] + }, + { + "id": 34, + "seek": 14012, + "start": 165.16, + "end": 165.76, + "text": " Up who the baddest.", + "tokens": [ + 5858, + 567, + 264, + 1578, + 23748, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3642334662543403, + "compression_ratio": 1.7333333333333334, + "no_speech_prob": 0.7768110036849976, + "confidence": 0.752, + "words": [ + { + "text": "Up", + "start": 165.16, + "end": 165.24, + "confidence": 0.518 + }, + { + "text": "who", + "start": 165.24, + "end": 165.38, + "confidence": 0.751 + }, + { + "text": "the", + "start": 165.38, + "end": 165.52, + "confidence": 0.926 + }, + { + "text": "baddest.", + "start": 165.52, + "end": 165.76, + "confidence": 0.817 + } + ] + }, + { + "id": 35, + "seek": 16572, + "start": 165.76, + "end": 166.86, + "text": " it don't matter cause we is your enemy.", + "tokens": [ + 50364, + 309, + 500, + 380, + 1871, + 3082, + 321, + 307, + 428, + 5945, + 13, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.910880969120906, + "compression_ratio": 0.8297872340425532, + "no_speech_prob": 0.6945543885231018, + "confidence": 0.448, + "words": [ + { + "text": "it", + "start": 165.76, + "end": 165.86, + "confidence": 0.124 + }, + { + "text": "don't", + "start": 165.86, + "end": 166.18, + "confidence": 0.825 + }, + { + "text": "matter", + "start": 166.18, + "end": 166.22, + "confidence": 0.996 + }, + { + "text": "cause", + "start": 166.22, + "end": 166.4, + "confidence": 0.287 + }, + { + "text": "we", + "start": 166.4, + "end": 166.5, + "confidence": 0.889 + }, + { + "text": "is", + "start": 166.5, + "end": 166.68, + "confidence": 0.231 + }, + { + "text": "your", + "start": 166.68, + "end": 166.74, + "confidence": 0.562 + }, + { + "text": "enemy.", + "start": 166.74, + "end": 166.86, + "confidence": 0.258 + } + ] + }, + { + "id": 36, + "seek": 19572, + "start": 196.18, + "end": 198.2, + "text": " I swear I'll never be insane", + "tokens": [ + 286, + 11902, + 286, + 603, + 1128, + 312, + 10838 + ], + "temperature": 0.0, + "avg_logprob": -0.5786522030830383, + "compression_ratio": 1.0, + "no_speech_prob": 0.756009578704834, + "confidence": 0.75, + "words": [ + { + "text": "I", + "start": 196.18, + "end": 196.4, + "confidence": 0.623 + }, + { + "text": "swear", + "start": 196.4, + "end": 196.66, + "confidence": 0.959 + }, + { + "text": "I'll", + "start": 196.66, + "end": 197.2, + "confidence": 0.643 + }, + { + "text": "never", + "start": 197.2, + "end": 197.5, + "confidence": 0.997 + }, + { + "text": "be", + "start": 197.5, + "end": 197.84, + "confidence": 0.993 + }, + { + "text": "insane", + "start": 197.84, + "end": 198.2, + "confidence": 0.546 + } + ] + }, + { + "id": 37, + "seek": 19772, + "start": 198.38, + "end": 199.72, + "text": " You got to be yourself", + "tokens": [ + 50364, + 509, + 658, + 281, + 312, + 1803, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.7925397753715515, + "compression_ratio": 0.7333333333333333, + "no_speech_prob": 0.7818466424942017, + "confidence": 0.406, + "words": [ + { + "text": "You", + "start": 198.38, + "end": 198.54, + "confidence": 0.093 + }, + { + "text": "got", + "start": 198.54, + "end": 198.8, + "confidence": 0.199 + }, + { + "text": "to", + "start": 198.8, + "end": 198.98, + "confidence": 0.803 + }, + { + "text": "be", + "start": 198.98, + "end": 199.08, + "confidence": 0.826 + }, + { + "text": "yourself", + "start": 199.08, + "end": 199.72, + "confidence": 0.903 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/random.nocond_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/random.nocond_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..411722411d7434ca27a340d8cf4629331e376df9 --- /dev/null +++ b/tests/expected/corner_cases.cpu/random.nocond_apollo11.mp3.words.json @@ -0,0 +1,1192 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. All right. Okay, we like to say that they make it one or two on the helmet. We're going to have a B1 and you can put the other one on the mic helmet with those GVA blizzard frames. We were going to hack me on the ground with a cover. I tried it already. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd get you to check it out. I'm not sure if you've already turned that. So I guess we're going to come up with this. Let us know. Okay, no problem. Okay, no problem. No problem. No one at the end of the line.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.68, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.542, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.52 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.968 + }, + { + "text": "for", + "start": 3.1, + "end": 3.52, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.52, + "end": 3.9, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.9, + "end": 4.28, + "confidence": 0.909 + }, + { + "text": "your", + "start": 4.28, + "end": 4.32, + "confidence": 0.971 + }, + { + "text": "Soyuz-VA", + "start": 4.32, + "end": 5.38, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.38, + "end": 6.04, + "confidence": 0.478 + }, + { + "text": "GVA.", + "start": 6.04, + "end": 6.68, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 7.5, + "end": 11.0, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.456, + "words": [ + { + "text": "All", + "start": 7.5, + "end": 7.7, + "confidence": 0.212 + }, + { + "text": "right.", + "start": 7.7, + "end": 11.0, + "confidence": 0.982 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 11.5, + "end": 17.19, + "text": " Okay, we like to say that they make it one or two on the helmet.", + "tokens": [ + 1033, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 309, + 472, + 420, + 732, + 322, + 264, + 15922, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.428, + "words": [ + { + "text": "Okay,", + "start": 11.5, + "end": 12.82, + "confidence": 0.577 + }, + { + "text": "we", + "start": 12.82, + "end": 13.12, + "confidence": 0.541 + }, + { + "text": "like", + "start": 13.12, + "end": 13.44, + "confidence": 0.493 + }, + { + "text": "to", + "start": 13.44, + "end": 13.68, + "confidence": 0.307 + }, + { + "text": "say", + "start": 13.68, + "end": 14.96, + "confidence": 0.172 + }, + { + "text": "that", + "start": 14.96, + "end": 15.56, + "confidence": 0.203 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.472 + }, + { + "text": "make", + "start": 15.6, + "end": 15.76, + "confidence": 0.378 + }, + { + "text": "it", + "start": 15.76, + "end": 15.96, + "confidence": 0.259 + }, + { + "text": "one", + "start": 15.96, + "end": 16.1, + "confidence": 0.481 + }, + { + "text": "or", + "start": 16.1, + "end": 16.34, + "confidence": 0.221 + }, + { + "text": "two", + "start": 16.34, + "end": 16.46, + "confidence": 0.938 + }, + { + "text": "on", + "start": 16.46, + "end": 16.7, + "confidence": 0.638 + }, + { + "text": "the", + "start": 16.7, + "end": 16.96, + "confidence": 0.885 + }, + { + "text": "helmet.", + "start": 16.96, + "end": 17.19, + "confidence": 0.695 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 17.19, + "end": 24.74, + "text": " We're going to have a B1 and you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 492, + 434, + 516, + 281, + 362, + 257, + 363, + 16, + 293, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.709779328937772, + "compression_ratio": 1.3631578947368421, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.515, + "words": [ + { + "text": "We're", + "start": 17.19, + "end": 17.86, + "confidence": 0.63 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.62 + }, + { + "text": "to", + "start": 18.06, + "end": 18.24, + "confidence": 0.993 + }, + { + "text": "have", + "start": 18.24, + "end": 18.3, + "confidence": 0.959 + }, + { + "text": "a", + "start": 18.3, + "end": 18.42, + "confidence": 0.32 + }, + { + "text": "B1", + "start": 18.42, + "end": 19.3, + "confidence": 0.799 + }, + { + "text": "and", + "start": 19.3, + "end": 20.18, + "confidence": 0.498 + }, + { + "text": "you", + "start": 20.18, + "end": 20.36, + "confidence": 0.87 + }, + { + "text": "can", + "start": 20.36, + "end": 20.54, + "confidence": 0.672 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.967 + }, + { + "text": "the", + "start": 20.72, + "end": 20.88, + "confidence": 0.974 + }, + { + "text": "other", + "start": 20.88, + "end": 21.06, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.973 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.973 + }, + { + "text": "the", + "start": 21.68, + "end": 21.96, + "confidence": 0.585 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.6, + "confidence": 0.48 + }, + { + "text": "helmet", + "start": 22.6, + "end": 22.94, + "confidence": 0.949 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.444 + }, + { + "text": "those", + "start": 23.2, + "end": 23.5, + "confidence": 0.351 + }, + { + "text": "GVA", + "start": 23.5, + "end": 23.86, + "confidence": 0.169 + }, + { + "text": "blizzard", + "start": 23.86, + "end": 24.32, + "confidence": 0.094 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.74, + "confidence": 0.245 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 56.0, + "end": 61.1, + "text": " We were going to hack me on the ground with a cover.", + "tokens": [ + 492, + 645, + 516, + 281, + 10339, + 385, + 322, + 264, + 2727, + 365, + 257, + 2060, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.342, + "words": [ + { + "text": "We", + "start": 56.0, + "end": 56.68, + "confidence": 0.071 + }, + { + "text": "were", + "start": 56.68, + "end": 56.98, + "confidence": 0.177 + }, + { + "text": "going", + "start": 56.98, + "end": 57.2, + "confidence": 0.148 + }, + { + "text": "to", + "start": 57.2, + "end": 57.46, + "confidence": 0.932 + }, + { + "text": "hack", + "start": 57.46, + "end": 57.5, + "confidence": 0.261 + }, + { + "text": "me", + "start": 57.5, + "end": 57.64, + "confidence": 0.544 + }, + { + "text": "on", + "start": 57.64, + "end": 59.2, + "confidence": 0.433 + }, + { + "text": "the", + "start": 59.2, + "end": 59.5, + "confidence": 0.309 + }, + { + "text": "ground", + "start": 59.5, + "end": 59.78, + "confidence": 0.356 + }, + { + "text": "with", + "start": 59.78, + "end": 60.62, + "confidence": 0.471 + }, + { + "text": "a", + "start": 60.62, + "end": 60.78, + "confidence": 0.538 + }, + { + "text": "cover.", + "start": 60.78, + "end": 61.1, + "confidence": 0.855 + } + ] + }, + { + "id": 5, + "seek": 5500, + "start": 61.12, + "end": 61.88, + "text": " I tried it already.", + "tokens": [ + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.682, + "words": [ + { + "text": "I", + "start": 61.12, + "end": 61.26, + "confidence": 0.407 + }, + { + "text": "tried", + "start": 61.26, + "end": 61.52, + "confidence": 0.889 + }, + { + "text": "it", + "start": 61.52, + "end": 61.7, + "confidence": 0.746 + }, + { + "text": "already.", + "start": 61.7, + "end": 61.88, + "confidence": 0.803 + } + ] + }, + { + "id": 6, + "seek": 5500, + "start": 62.5, + "end": 63.12, + "text": " Okay, fine.", + "tokens": [ + 1033, + 11, + 2489, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.683, + "words": [ + { + "text": "Okay,", + "start": 62.5, + "end": 63.04, + "confidence": 0.521 + }, + { + "text": "fine.", + "start": 63.04, + "end": 63.12, + "confidence": 0.895 + } + ] + }, + { + "id": 7, + "seek": 5500, + "start": 63.12, + "end": 64.12, + "text": " We weren't sure of that.", + "tokens": [ + 492, + 4999, + 380, + 988, + 295, + 300, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.895, + "words": [ + { + "text": "We", + "start": 63.12, + "end": 63.34, + "confidence": 0.959 + }, + { + "text": "weren't", + "start": 63.34, + "end": 63.62, + "confidence": 0.988 + }, + { + "text": "sure", + "start": 63.62, + "end": 63.82, + "confidence": 0.903 + }, + { + "text": "of", + "start": 63.82, + "end": 64.08, + "confidence": 0.613 + }, + { + "text": "that.", + "start": 64.08, + "end": 64.12, + "confidence": 0.989 + } + ] + }, + { + "id": 8, + "seek": 5500, + "start": 64.46, + "end": 65.2, + "text": " Just a suggestion.", + "tokens": [ + 1449, + 257, + 16541, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.755, + "words": [ + { + "text": "Just", + "start": 64.46, + "end": 64.66, + "confidence": 0.545 + }, + { + "text": "a", + "start": 64.66, + "end": 64.76, + "confidence": 0.791 + }, + { + "text": "suggestion.", + "start": 64.76, + "end": 65.2, + "confidence": 0.997 + } + ] + }, + { + "id": 9, + "seek": 5500, + "start": 65.34, + "end": 67.92, + "text": " We thought we'd get you to check it out.", + "tokens": [ + 492, + 1194, + 321, + 1116, + 483, + 291, + 281, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.692, + "words": [ + { + "text": "We", + "start": 65.34, + "end": 65.44, + "confidence": 0.873 + }, + { + "text": "thought", + "start": 65.44, + "end": 65.62, + "confidence": 0.968 + }, + { + "text": "we'd", + "start": 65.62, + "end": 66.76, + "confidence": 0.686 + }, + { + "text": "get", + "start": 66.76, + "end": 67.06, + "confidence": 0.093 + }, + { + "text": "you", + "start": 67.06, + "end": 67.14, + "confidence": 0.823 + }, + { + "text": "to", + "start": 67.14, + "end": 67.32, + "confidence": 0.987 + }, + { + "text": "check", + "start": 67.32, + "end": 67.46, + "confidence": 0.845 + }, + { + "text": "it", + "start": 67.46, + "end": 67.68, + "confidence": 0.991 + }, + { + "text": "out.", + "start": 67.68, + "end": 67.92, + "confidence": 0.997 + } + ] + }, + { + "id": 10, + "seek": 5500, + "start": 68.28, + "end": 69.34, + "text": " I'm not sure if you've already turned that.", + "tokens": [ + 286, + 478, + 406, + 988, + 498, + 291, + 600, + 1217, + 3574, + 300, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.444, + "words": [ + { + "text": "I'm", + "start": 68.28, + "end": 68.5, + "confidence": 0.314 + }, + { + "text": "not", + "start": 68.5, + "end": 68.56, + "confidence": 0.319 + }, + { + "text": "sure", + "start": 68.56, + "end": 68.74, + "confidence": 0.82 + }, + { + "text": "if", + "start": 68.74, + "end": 68.78, + "confidence": 0.334 + }, + { + "text": "you've", + "start": 68.78, + "end": 68.84, + "confidence": 0.389 + }, + { + "text": "already", + "start": 68.84, + "end": 68.94, + "confidence": 0.588 + }, + { + "text": "turned", + "start": 68.94, + "end": 69.12, + "confidence": 0.738 + }, + { + "text": "that.", + "start": 69.12, + "end": 69.34, + "confidence": 0.524 + } + ] + }, + { + "id": 11, + "seek": 5500, + "start": 70.42, + "end": 71.98, + "text": " So I guess we're going to come up with this.", + "tokens": [ + 407, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.709, + "words": [ + { + "text": "So", + "start": 70.42, + "end": 70.5, + "confidence": 0.533 + }, + { + "text": "I", + "start": 70.5, + "end": 70.54, + "confidence": 0.454 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.82, + "confidence": 0.991 + }, + { + "text": "we're", + "start": 70.82, + "end": 71.12, + "confidence": 0.654 + }, + { + "text": "going", + "start": 71.12, + "end": 71.28, + "confidence": 0.815 + }, + { + "text": "to", + "start": 71.28, + "end": 71.36, + "confidence": 0.992 + }, + { + "text": "come", + "start": 71.36, + "end": 71.48, + "confidence": 0.97 + }, + { + "text": "up", + "start": 71.48, + "end": 71.68, + "confidence": 0.958 + }, + { + "text": "with", + "start": 71.68, + "end": 71.92, + "confidence": 0.915 + }, + { + "text": "this.", + "start": 71.92, + "end": 71.98, + "confidence": 0.323 + } + ] + }, + { + "id": 12, + "seek": 5500, + "start": 73.5, + "end": 73.62, + "text": " Let us know.", + "tokens": [ + 961, + 505, + 458, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.847, + "words": [ + { + "text": "Let", + "start": 73.5, + "end": 73.54, + "confidence": 0.615 + }, + { + "text": "us", + "start": 73.54, + "end": 73.58, + "confidence": 0.99 + }, + { + "text": "know.", + "start": 73.58, + "end": 73.62, + "confidence": 0.998 + } + ] + }, + { + "id": 13, + "seek": 5500, + "start": 74.22, + "end": 75.22, + "text": " Okay, no problem.", + "tokens": [ + 1033, + 11, + 572, + 1154, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.851, + "words": [ + { + "text": "Okay,", + "start": 74.22, + "end": 74.7, + "confidence": 0.687 + }, + { + "text": "no", + "start": 74.7, + "end": 74.86, + "confidence": 0.917 + }, + { + "text": "problem.", + "start": 74.86, + "end": 75.22, + "confidence": 0.976 + } + ] + }, + { + "id": 14, + "seek": 5500, + "start": 75.28, + "end": 76.46, + "text": " Okay, no problem.", + "tokens": [ + 1033, + 11, + 572, + 1154, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.46802620968576203, + "compression_ratio": 1.6418604651162791, + "no_speech_prob": 0.7498895525932312, + "confidence": 0.623, + "words": [ + { + "text": "Okay,", + "start": 75.28, + "end": 75.98, + "confidence": 0.258 + }, + { + "text": "no", + "start": 75.98, + "end": 76.2, + "confidence": 0.948 + }, + { + "text": "problem.", + "start": 76.2, + "end": 76.46, + "confidence": 0.993 + } + ] + }, + { + "id": 15, + "seek": 7600, + "start": 76.46, + "end": 76.62, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.1, + "avg_logprob": -0.7044810771942138, + "compression_ratio": 0.9344262295081968, + "no_speech_prob": 0.35927486419677734, + "confidence": 0.653, + "words": [ + { + "text": "No", + "start": 76.46, + "end": 76.5, + "confidence": 0.463 + }, + { + "text": "problem.", + "start": 76.5, + "end": 76.62, + "confidence": 0.922 + } + ] + }, + { + "id": 16, + "seek": 7700, + "start": 77.02, + "end": 78.32, + "text": " No one at the end of the line.", + "tokens": [ + 50364, + 883, + 472, + 412, + 264, + 917, + 295, + 264, + 1622, + 13, + 50464 + ], + "temperature": 0.1, + "avg_logprob": -0.676593542098999, + "compression_ratio": 0.8823529411764706, + "no_speech_prob": 0.2589211165904999, + "confidence": 0.558, + "words": [ + { + "text": "No", + "start": 77.02, + "end": 77.22, + "confidence": 0.139 + }, + { + "text": "one", + "start": 77.22, + "end": 77.36, + "confidence": 0.335 + }, + { + "text": "at", + "start": 77.36, + "end": 77.44, + "confidence": 0.483 + }, + { + "text": "the", + "start": 77.44, + "end": 77.48, + "confidence": 0.996 + }, + { + "text": "end", + "start": 77.48, + "end": 77.92, + "confidence": 0.995 + }, + { + "text": "of", + "start": 77.92, + "end": 78.1, + "confidence": 0.968 + }, + { + "text": "the", + "start": 78.1, + "end": 78.2, + "confidence": 0.536 + }, + { + "text": "line.", + "start": 78.2, + "end": 78.32, + "confidence": 0.816 + } + ] + } + ], + "language": "English" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/random_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/random_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..23ccd701318a10a1d5462a5b3b9a55f8ce872217 --- /dev/null +++ b/tests/expected/corner_cases.cpu/random_apollo11.mp3.words.json @@ -0,0 +1,3815 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.94, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.2, + "avg_logprob": -0.6937426777629109, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.549, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.52 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.968 + }, + { + "text": "for", + "start": 3.1, + "end": 3.52, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.52, + "end": 3.9, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.9, + "end": 4.28, + "confidence": 0.909 + }, + { + "text": "your", + "start": 4.28, + "end": 4.32, + "confidence": 0.971 + }, + { + "text": "Soyuz-EA", + "start": 4.32, + "end": 5.42, + "confidence": 0.321 + }, + { + "text": "GLEME", + "start": 5.42, + "end": 6.04, + "confidence": 0.561 + }, + { + "text": "GVA.", + "start": 6.04, + "end": 6.94, + "confidence": 0.336 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 19.86, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.2, + "avg_logprob": -0.6937426777629109, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.469, + "words": [ + { + "text": "Alright,", + "start": 10.82, + "end": 12.5, + "confidence": 0.295 + }, + { + "text": "okay,", + "start": 12.5, + "end": 12.94, + "confidence": 0.513 + }, + { + "text": "we", + "start": 12.94, + "end": 13.12, + "confidence": 0.614 + }, + { + "text": "like", + "start": 13.12, + "end": 13.44, + "confidence": 0.496 + }, + { + "text": "to", + "start": 13.44, + "end": 13.68, + "confidence": 0.26 + }, + { + "text": "say", + "start": 13.68, + "end": 14.98, + "confidence": 0.135 + }, + { + "text": "that", + "start": 14.98, + "end": 15.56, + "confidence": 0.203 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.425 + }, + { + "text": "make", + "start": 15.6, + "end": 15.76, + "confidence": 0.408 + }, + { + "text": "the", + "start": 15.76, + "end": 15.92, + "confidence": 0.255 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.609 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.34, + "confidence": 0.447 + }, + { + "text": "on", + "start": 16.34, + "end": 16.62, + "confidence": 0.598 + }, + { + "text": "the", + "start": 16.62, + "end": 16.96, + "confidence": 0.837 + }, + { + "text": "helmet", + "start": 16.96, + "end": 17.36, + "confidence": 0.836 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.86, + "confidence": 0.301 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.606 + }, + { + "text": "to", + "start": 18.06, + "end": 18.22, + "confidence": 0.818 + }, + { + "text": "have", + "start": 18.22, + "end": 18.26, + "confidence": 0.846 + }, + { + "text": "in", + "start": 18.26, + "end": 18.42, + "confidence": 0.718 + }, + { + "text": "B1.", + "start": 18.42, + "end": 19.86, + "confidence": 0.798 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 20.1, + "end": 24.76, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.2, + "avg_logprob": -0.6937426777629109, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.52, + "words": [ + { + "text": "And", + "start": 20.1, + "end": 20.26, + "confidence": 0.62 + }, + { + "text": "you", + "start": 20.26, + "end": 20.38, + "confidence": 0.948 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.731 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.98 + }, + { + "text": "the", + "start": 20.72, + "end": 20.88, + "confidence": 0.989 + }, + { + "text": "other", + "start": 20.88, + "end": 21.06, + "confidence": 0.991 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.977 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.68, + "end": 21.96, + "confidence": 0.554 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.58, + "confidence": 0.442 + }, + { + "text": "helmet", + "start": 22.58, + "end": 22.94, + "confidence": 0.842 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.433 + }, + { + "text": "those", + "start": 23.2, + "end": 23.5, + "confidence": 0.471 + }, + { + "text": "GVA", + "start": 23.5, + "end": 23.88, + "confidence": 0.414 + }, + { + "text": "blizzard", + "start": 23.88, + "end": 24.32, + "confidence": 0.104 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.76, + "confidence": 0.25 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 25.02, + "end": 55.0, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.2, + "avg_logprob": -0.10773486667209202, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.0012825782177969813, + "confidence": 0.936, + "words": [ + { + "text": "Alright,", + "start": 25.02, + "end": 31.98, + "confidence": 0.245 + }, + { + "text": "got", + "start": 31.98, + "end": 32.02, + "confidence": 0.356 + }, + { + "text": "them,", + "start": 32.02, + "end": 32.58, + "confidence": 0.323 + }, + { + "text": "got", + "start": 32.58, + "end": 33.08, + "confidence": 0.519 + }, + { + "text": "them,", + "start": 33.08, + "end": 33.78, + "confidence": 0.941 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.507 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.12, + "confidence": 0.926 + }, + { + "text": "got", + "start": 34.12, + "end": 34.16, + "confidence": 0.475 + }, + { + "text": "them,", + "start": 34.16, + "end": 34.92, + "confidence": 0.897 + }, + { + "text": "got", + "start": 34.92, + "end": 34.96, + "confidence": 0.597 + }, + { + "text": "them,", + "start": 34.96, + "end": 35.88, + "confidence": 0.914 + }, + { + "text": "got", + "start": 35.88, + "end": 35.92, + "confidence": 0.662 + }, + { + "text": "them,", + "start": 35.92, + "end": 35.96, + "confidence": 0.942 + }, + { + "text": "got", + "start": 35.96, + "end": 36.0, + "confidence": 0.738 + }, + { + "text": "them,", + "start": 36.0, + "end": 36.04, + "confidence": 0.963 + }, + { + "text": "got", + "start": 36.04, + "end": 36.08, + "confidence": 0.819 + }, + { + "text": "them,", + "start": 36.08, + "end": 36.12, + "confidence": 0.974 + }, + { + "text": "got", + "start": 36.12, + "end": 36.16, + "confidence": 0.843 + }, + { + "text": "them,", + "start": 36.16, + "end": 36.2, + "confidence": 0.981 + }, + { + "text": "got", + "start": 36.2, + "end": 36.24, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 36.24, + "end": 36.28, + "confidence": 0.985 + }, + { + "text": "got", + "start": 36.28, + "end": 36.32, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 36.32, + "end": 36.36, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.36, + "end": 36.4, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 36.4, + "end": 36.44, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.44, + "end": 36.48, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 36.48, + "end": 36.52, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.52, + "end": 36.56, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 36.56, + "end": 36.6, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.6, + "end": 36.64, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 36.64, + "end": 36.68, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.68, + "end": 36.72, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.72, + "end": 36.76, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.76, + "end": 36.8, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.8, + "end": 36.84, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.84, + "end": 36.88, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 36.88, + "end": 36.92, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.92, + "end": 36.96, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.96, + "end": 37.0, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.0, + "end": 37.04, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.04, + "end": 37.08, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.08, + "end": 37.12, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 37.12, + "end": 37.16, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.16, + "end": 37.2, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.2, + "end": 37.24, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.24, + "end": 37.28, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 37.28, + "end": 37.32, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.32, + "end": 37.36, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.36, + "end": 37.4, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.4, + "end": 37.44, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 37.44, + "end": 37.48, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.48, + "end": 37.52, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.52, + "end": 37.56, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.56, + "end": 37.6, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.6, + "end": 37.64, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.64, + "end": 37.68, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 37.68, + "end": 37.72, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.72, + "end": 37.76, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.76, + "end": 37.8, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.8, + "end": 37.84, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.84, + "end": 37.88, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.88, + "end": 37.92, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.92, + "end": 37.96, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.96, + "end": 38.0, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.0, + "end": 38.04, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.04, + "end": 38.08, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.08, + "end": 38.12, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.12, + "end": 38.16, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.16, + "end": 38.2, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.2, + "end": 38.24, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 38.24, + "end": 38.28, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.28, + "end": 38.32, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 38.32, + "end": 38.36, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.36, + "end": 38.4, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 38.4, + "end": 38.44, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.44, + "end": 38.48, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 38.48, + "end": 38.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.52, + "end": 38.56, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 38.56, + "end": 38.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.6, + "end": 38.64, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 38.64, + "end": 38.68, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.68, + "end": 38.72, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 38.72, + "end": 38.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.76, + "end": 38.8, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 38.8, + "end": 38.84, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.84, + "end": 38.88, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 38.88, + "end": 38.92, + "confidence": 0.995 + }, + { + "text": "got", + "start": 38.92, + "end": 38.96, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 38.96, + "end": 39.0, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.0, + "end": 39.04, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.04, + "end": 39.08, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.08, + "end": 39.12, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.12, + "end": 39.16, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.16, + "end": 39.2, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.2, + "end": 39.24, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.24, + "end": 39.28, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.28, + "end": 39.32, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.32, + "end": 39.36, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.36, + "end": 39.4, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.4, + "end": 39.44, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.44, + "end": 39.48, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.48, + "end": 39.52, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.52, + "end": 39.56, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.56, + "end": 39.6, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.6, + "end": 39.64, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.64, + "end": 39.68, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.68, + "end": 39.72, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.72, + "end": 39.76, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.76, + "end": 39.8, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.8, + "end": 39.84, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.84, + "end": 39.88, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.88, + "end": 39.92, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.92, + "end": 39.96, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.96, + "end": 40.0, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.0, + "end": 40.04, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.04, + "end": 40.08, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.08, + "end": 40.12, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.12, + "end": 40.16, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.16, + "end": 40.2, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.2, + "end": 40.24, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.24, + "end": 40.28, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.28, + "end": 40.32, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.32, + "end": 40.36, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.36, + "end": 40.4, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.4, + "end": 40.44, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.44, + "end": 40.48, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.48, + "end": 40.52, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.52, + "end": 40.56, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.56, + "end": 40.6, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.6, + "end": 40.64, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.64, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.68, + "end": 40.72, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.72, + "end": 40.76, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.76, + "end": 40.8, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.8, + "end": 40.84, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.84, + "end": 40.88, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.88, + "end": 40.92, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.92, + "end": 40.96, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.96, + "end": 41.0, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.0, + "end": 41.04, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.04, + "end": 41.08, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.08, + "end": 41.12, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.12, + "end": 41.16, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.16, + "end": 41.2, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.2, + "end": 42.94, + "confidence": 0.996 + }, + { + "text": "got", + "start": 42.94, + "end": 46.82, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.82, + "end": 47.8, + "confidence": 0.996 + }, + { + "text": "got", + "start": 47.8, + "end": 48.58, + "confidence": 0.995 + }, + { + "text": "them", + "start": 48.58, + "end": 55.0, + "confidence": 0.997 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 72.06, + "end": 85.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.2, + "avg_logprob": -0.053046889369263245, + "compression_ratio": 29.52, + "no_speech_prob": 0.24410122632980347, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 72.06, + "end": 72.1, + "confidence": 0.214 + }, + { + "text": "them,", + "start": 72.1, + "end": 72.14, + "confidence": 0.95 + }, + { + "text": "got", + "start": 72.14, + "end": 72.18, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 72.18, + "end": 72.22, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.22, + "end": 72.26, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 72.26, + "end": 72.3, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.3, + "end": 72.34, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 72.34, + "end": 72.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.38, + "end": 72.42, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 72.42, + "end": 72.46, + "confidence": 0.993 + }, + { + "text": "got", + "start": 72.46, + "end": 72.5, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 72.5, + "end": 72.54, + "confidence": 0.988 + }, + { + "text": "got", + "start": 72.54, + "end": 72.58, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 72.58, + "end": 72.62, + "confidence": 0.982 + }, + { + "text": "got", + "start": 72.62, + "end": 72.66, + "confidence": 0.863 + }, + { + "text": "them,", + "start": 72.66, + "end": 72.7, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.7, + "end": 72.74, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 72.74, + "end": 72.78, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.78, + "end": 72.82, + "confidence": 0.852 + }, + { + "text": "them,", + "start": 72.82, + "end": 72.86, + "confidence": 0.925 + }, + { + "text": "got", + "start": 72.86, + "end": 72.9, + "confidence": 0.56 + }, + { + "text": "them,", + "start": 72.9, + "end": 72.94, + "confidence": 0.958 + }, + { + "text": "got", + "start": 72.94, + "end": 72.98, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 72.98, + "end": 73.02, + "confidence": 0.976 + }, + { + "text": "got", + "start": 73.02, + "end": 73.06, + "confidence": 0.843 + }, + { + "text": "them,", + "start": 73.06, + "end": 73.1, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.1, + "end": 73.14, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 73.14, + "end": 73.18, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.18, + "end": 73.22, + "confidence": 0.836 + }, + { + "text": "them,", + "start": 73.22, + "end": 73.26, + "confidence": 0.979 + }, + { + "text": "got", + "start": 73.26, + "end": 73.3, + "confidence": 0.853 + }, + { + "text": "them,", + "start": 73.3, + "end": 73.34, + "confidence": 0.983 + }, + { + "text": "got", + "start": 73.34, + "end": 73.38, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 73.38, + "end": 73.42, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.42, + "end": 73.46, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 73.46, + "end": 73.5, + "confidence": 0.985 + }, + { + "text": "got", + "start": 73.5, + "end": 73.54, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 73.54, + "end": 73.58, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.58, + "end": 73.62, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 73.62, + "end": 73.66, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.66, + "end": 73.7, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 73.7, + "end": 73.74, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.74, + "end": 73.78, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 73.78, + "end": 73.82, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.82, + "end": 73.86, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 73.86, + "end": 73.9, + "confidence": 0.988 + }, + { + "text": "got", + "start": 73.9, + "end": 73.94, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 73.94, + "end": 73.98, + "confidence": 0.989 + }, + { + "text": "got", + "start": 73.98, + "end": 74.02, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 74.02, + "end": 74.06, + "confidence": 0.989 + }, + { + "text": "got", + "start": 74.06, + "end": 74.1, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 74.1, + "end": 74.14, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.14, + "end": 74.18, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 74.18, + "end": 74.22, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.22, + "end": 74.26, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 74.26, + "end": 74.3, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.3, + "end": 74.34, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 74.34, + "end": 74.38, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.38, + "end": 74.42, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 74.42, + "end": 74.46, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.46, + "end": 74.5, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 74.5, + "end": 74.54, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.54, + "end": 74.58, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 74.58, + "end": 74.62, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.62, + "end": 74.66, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.66, + "end": 74.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.7, + "end": 74.74, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 74.74, + "end": 74.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.78, + "end": 74.82, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 74.82, + "end": 74.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.86, + "end": 74.9, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 74.9, + "end": 74.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.94, + "end": 74.98, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 74.98, + "end": 75.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.02, + "end": 75.06, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 75.06, + "end": 75.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.1, + "end": 75.14, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 75.14, + "end": 75.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.18, + "end": 75.22, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 75.22, + "end": 75.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.26, + "end": 75.3, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 75.3, + "end": 75.34, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.34, + "end": 75.38, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 75.38, + "end": 75.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.42, + "end": 75.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.46, + "end": 75.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.5, + "end": 75.54, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.54, + "end": 75.58, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.58, + "end": 75.62, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 75.62, + "end": 75.66, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.66, + "end": 75.7, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 75.7, + "end": 75.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.74, + "end": 75.78, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.78, + "end": 75.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.82, + "end": 75.86, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.86, + "end": 75.9, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.9, + "end": 75.94, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 75.94, + "end": 75.98, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.98, + "end": 76.02, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.02, + "end": 76.06, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.06, + "end": 76.1, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.1, + "end": 76.14, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.14, + "end": 76.18, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 76.18, + "end": 76.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.22, + "end": 76.26, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.26, + "end": 76.3, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.3, + "end": 76.34, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.34, + "end": 76.38, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.38, + "end": 76.42, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.42, + "end": 76.46, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.46, + "end": 76.5, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.5, + "end": 76.54, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.54, + "end": 76.58, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.58, + "end": 76.62, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.62, + "end": 76.66, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.66, + "end": 76.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.7, + "end": 76.74, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.74, + "end": 76.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.78, + "end": 76.82, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.82, + "end": 76.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.86, + "end": 76.9, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.9, + "end": 76.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.94, + "end": 76.98, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.98, + "end": 77.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.02, + "end": 77.06, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.06, + "end": 77.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.1, + "end": 77.14, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.14, + "end": 77.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.18, + "end": 77.22, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.22, + "end": 77.26, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.26, + "end": 77.3, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.3, + "end": 77.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.34, + "end": 77.38, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.38, + "end": 77.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.42, + "end": 77.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.46, + "end": 77.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.5, + "end": 77.54, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.54, + "end": 77.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.58, + "end": 77.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.62, + "end": 77.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.66, + "end": 77.7, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.7, + "end": 77.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.74, + "end": 77.78, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.78, + "end": 77.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.82, + "end": 77.86, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.86, + "end": 79.56, + "confidence": 0.994 + }, + { + "text": "got", + "start": 79.56, + "end": 79.6, + "confidence": 0.981 + }, + { + "text": "them", + "start": 79.6, + "end": 85.0, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.02, + "end": 115.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.2, + "avg_logprob": -0.04965524716227578, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971923112869263, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 85.02, + "end": 85.46, + "confidence": 0.438 + }, + { + "text": "them,", + "start": 85.46, + "end": 86.26, + "confidence": 0.943 + }, + { + "text": "got", + "start": 86.26, + "end": 86.78, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 86.78, + "end": 87.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 87.18, + "end": 87.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 87.8, + "end": 87.84, + "confidence": 0.993 + }, + { + "text": "got", + "start": 87.84, + "end": 88.6, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 88.6, + "end": 88.64, + "confidence": 0.991 + }, + { + "text": "got", + "start": 88.64, + "end": 88.9, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 88.9, + "end": 88.94, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.94, + "end": 88.98, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 88.98, + "end": 89.02, + "confidence": 0.987 + }, + { + "text": "got", + "start": 89.02, + "end": 89.06, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 89.06, + "end": 89.1, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.1, + "end": 89.14, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 89.14, + "end": 89.18, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.18, + "end": 89.22, + "confidence": 0.907 + }, + { + "text": "them,", + "start": 89.22, + "end": 89.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 89.26, + "end": 89.3, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 89.3, + "end": 89.34, + "confidence": 0.971 + }, + { + "text": "got", + "start": 89.34, + "end": 89.38, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 89.38, + "end": 89.42, + "confidence": 0.959 + }, + { + "text": "got", + "start": 89.42, + "end": 89.46, + "confidence": 0.845 + }, + { + "text": "them,", + "start": 89.46, + "end": 89.5, + "confidence": 0.96 + }, + { + "text": "got", + "start": 89.5, + "end": 89.54, + "confidence": 0.861 + }, + { + "text": "them,", + "start": 89.54, + "end": 89.58, + "confidence": 0.963 + }, + { + "text": "got", + "start": 89.58, + "end": 89.62, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 89.62, + "end": 89.66, + "confidence": 0.965 + }, + { + "text": "got", + "start": 89.66, + "end": 89.7, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 89.7, + "end": 89.74, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.74, + "end": 89.78, + "confidence": 0.887 + }, + { + "text": "them,", + "start": 89.78, + "end": 89.82, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.82, + "end": 89.86, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 89.86, + "end": 89.9, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.9, + "end": 89.94, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 89.94, + "end": 89.98, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.98, + "end": 90.02, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 90.02, + "end": 90.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.06, + "end": 90.1, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.1, + "end": 90.14, + "confidence": 0.964 + }, + { + "text": "got", + "start": 90.14, + "end": 90.18, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.18, + "end": 90.22, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.22, + "end": 90.26, + "confidence": 0.86 + }, + { + "text": "them,", + "start": 90.26, + "end": 90.3, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.3, + "end": 90.34, + "confidence": 0.867 + }, + { + "text": "them,", + "start": 90.34, + "end": 90.38, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.38, + "end": 90.42, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.42, + "end": 90.46, + "confidence": 0.967 + }, + { + "text": "got", + "start": 90.46, + "end": 90.5, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.5, + "end": 90.54, + "confidence": 0.968 + }, + { + "text": "got", + "start": 90.54, + "end": 90.58, + "confidence": 0.87 + }, + { + "text": "them,", + "start": 90.58, + "end": 90.62, + "confidence": 0.969 + }, + { + "text": "got", + "start": 90.62, + "end": 90.66, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.66, + "end": 90.7, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.7, + "end": 90.74, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.74, + "end": 90.78, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.78, + "end": 90.82, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 90.82, + "end": 90.86, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.86, + "end": 90.9, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 90.9, + "end": 90.94, + "confidence": 0.973 + }, + { + "text": "got", + "start": 90.94, + "end": 90.98, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 90.98, + "end": 91.02, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.02, + "end": 91.06, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 91.06, + "end": 91.1, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.1, + "end": 91.14, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 91.14, + "end": 91.18, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.18, + "end": 91.22, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 91.22, + "end": 91.26, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.26, + "end": 91.3, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 91.3, + "end": 91.34, + "confidence": 0.976 + }, + { + "text": "got", + "start": 91.34, + "end": 91.38, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 91.38, + "end": 91.42, + "confidence": 0.977 + }, + { + "text": "got", + "start": 91.42, + "end": 91.46, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 91.46, + "end": 91.5, + "confidence": 0.978 + }, + { + "text": "got", + "start": 91.5, + "end": 91.54, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 91.54, + "end": 91.58, + "confidence": 0.979 + }, + { + "text": "got", + "start": 91.58, + "end": 91.62, + "confidence": 0.92 + }, + { + "text": "them,", + "start": 91.62, + "end": 91.66, + "confidence": 0.98 + }, + { + "text": "got", + "start": 91.66, + "end": 91.7, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 91.7, + "end": 91.74, + "confidence": 0.981 + }, + { + "text": "got", + "start": 91.74, + "end": 91.78, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 91.78, + "end": 91.82, + "confidence": 0.982 + }, + { + "text": "got", + "start": 91.82, + "end": 91.86, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 91.86, + "end": 91.9, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.9, + "end": 91.94, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 91.94, + "end": 91.98, + "confidence": 0.984 + }, + { + "text": "got", + "start": 91.98, + "end": 92.02, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 92.02, + "end": 92.06, + "confidence": 0.985 + }, + { + "text": "got", + "start": 92.06, + "end": 92.1, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 92.1, + "end": 92.14, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.14, + "end": 92.18, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 92.18, + "end": 92.22, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.22, + "end": 92.26, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 92.26, + "end": 92.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.3, + "end": 92.34, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 92.34, + "end": 92.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.38, + "end": 92.42, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 92.42, + "end": 92.46, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.46, + "end": 92.5, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.5, + "end": 92.54, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.54, + "end": 92.58, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.58, + "end": 92.62, + "confidence": 0.99 + }, + { + "text": "got", + "start": 92.62, + "end": 92.66, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 92.66, + "end": 92.7, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.7, + "end": 92.74, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 92.74, + "end": 92.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.78, + "end": 92.82, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 92.82, + "end": 92.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.86, + "end": 92.9, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 92.9, + "end": 92.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.94, + "end": 92.98, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 92.98, + "end": 93.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.02, + "end": 93.06, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 93.06, + "end": 93.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.1, + "end": 93.14, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 93.14, + "end": 93.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.18, + "end": 93.22, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.22, + "end": 93.26, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.26, + "end": 93.3, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.3, + "end": 93.34, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.34, + "end": 93.38, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 93.38, + "end": 93.42, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.42, + "end": 93.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 93.46, + "end": 93.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.5, + "end": 93.54, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 93.54, + "end": 93.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.58, + "end": 93.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.62, + "end": 93.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.66, + "end": 93.7, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 93.7, + "end": 93.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.74, + "end": 93.78, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.78, + "end": 93.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.82, + "end": 93.86, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.86, + "end": 93.9, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.9, + "end": 93.94, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.94, + "end": 93.98, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.98, + "end": 94.02, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.02, + "end": 94.06, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.06, + "end": 94.1, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.1, + "end": 94.14, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.14, + "end": 94.18, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 94.18, + "end": 94.22, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.22, + "end": 94.26, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.26, + "end": 94.3, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.3, + "end": 94.34, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.34, + "end": 94.38, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.38, + "end": 94.42, + "confidence": 0.987 + }, + { + "text": "them", + "start": 94.42, + "end": 115.0, + "confidence": 0.996 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases.cpu/stucked_lm_apollo11.mp3.words.json b/tests/expected/corner_cases.cpu/stucked_lm_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..527def1f7142f05644333ecb9e358b7e067d0468 --- /dev/null +++ b/tests/expected/corner_cases.cpu/stucked_lm_apollo11.mp3.words.json @@ -0,0 +1,3814 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.94, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.542, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.52 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.824 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.968 + }, + { + "text": "for", + "start": 3.1, + "end": 3.52, + "confidence": 0.947 + }, + { + "text": "you", + "start": 3.52, + "end": 3.9, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.9, + "end": 4.28, + "confidence": 0.909 + }, + { + "text": "your", + "start": 4.28, + "end": 4.32, + "confidence": 0.971 + }, + { + "text": "Soyuz-VA", + "start": 4.32, + "end": 5.38, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.38, + "end": 6.04, + "confidence": 0.478 + }, + { + "text": "GVA.", + "start": 6.04, + "end": 6.94, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 19.24, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.468, + "words": [ + { + "text": "Alright,", + "start": 10.82, + "end": 12.5, + "confidence": 0.309 + }, + { + "text": "okay,", + "start": 12.5, + "end": 12.94, + "confidence": 0.507 + }, + { + "text": "we", + "start": 12.94, + "end": 13.12, + "confidence": 0.61 + }, + { + "text": "like", + "start": 13.12, + "end": 13.48, + "confidence": 0.503 + }, + { + "text": "to", + "start": 13.48, + "end": 13.68, + "confidence": 0.264 + }, + { + "text": "say", + "start": 13.68, + "end": 14.98, + "confidence": 0.138 + }, + { + "text": "that", + "start": 14.98, + "end": 15.56, + "confidence": 0.2 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.418 + }, + { + "text": "make", + "start": 15.6, + "end": 15.76, + "confidence": 0.398 + }, + { + "text": "the", + "start": 15.76, + "end": 15.92, + "confidence": 0.253 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.608 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.34, + "confidence": 0.443 + }, + { + "text": "on", + "start": 16.34, + "end": 16.62, + "confidence": 0.595 + }, + { + "text": "the", + "start": 16.62, + "end": 16.96, + "confidence": 0.871 + }, + { + "text": "helmet", + "start": 16.96, + "end": 17.36, + "confidence": 0.853 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.86, + "confidence": 0.299 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.6 + }, + { + "text": "to", + "start": 18.06, + "end": 18.22, + "confidence": 0.818 + }, + { + "text": "have", + "start": 18.22, + "end": 18.26, + "confidence": 0.84 + }, + { + "text": "in", + "start": 18.26, + "end": 18.42, + "confidence": 0.717 + }, + { + "text": "B1.", + "start": 18.42, + "end": 19.24, + "confidence": 0.764 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 20.1, + "end": 24.76, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.485, + "words": [ + { + "text": "And", + "start": 20.1, + "end": 20.24, + "confidence": 0.67 + }, + { + "text": "you", + "start": 20.24, + "end": 20.38, + "confidence": 0.947 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.729 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.98 + }, + { + "text": "the", + "start": 20.72, + "end": 20.88, + "confidence": 0.989 + }, + { + "text": "other", + "start": 20.88, + "end": 21.08, + "confidence": 0.991 + }, + { + "text": "one", + "start": 21.08, + "end": 21.26, + "confidence": 0.978 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.68, + "end": 21.96, + "confidence": 0.52 + }, + { + "text": "mic", + "start": 21.96, + "end": 22.58, + "confidence": 0.413 + }, + { + "text": "helmet", + "start": 22.58, + "end": 22.94, + "confidence": 0.882 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.425 + }, + { + "text": "those", + "start": 23.2, + "end": 23.5, + "confidence": 0.465 + }, + { + "text": "GVA", + "start": 23.5, + "end": 23.88, + "confidence": 0.216 + }, + { + "text": "blizzard", + "start": 23.88, + "end": 24.32, + "confidence": 0.107 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.76, + "confidence": 0.254 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 25.02, + "end": 55.0, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.1079042222764757, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.00111382023897022, + "confidence": 0.936, + "words": [ + { + "text": "Alright,", + "start": 25.02, + "end": 31.98, + "confidence": 0.213 + }, + { + "text": "got", + "start": 31.98, + "end": 32.02, + "confidence": 0.333 + }, + { + "text": "them,", + "start": 32.02, + "end": 32.58, + "confidence": 0.334 + }, + { + "text": "got", + "start": 32.58, + "end": 33.08, + "confidence": 0.526 + }, + { + "text": "them,", + "start": 33.08, + "end": 33.78, + "confidence": 0.942 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.539 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.18, + "confidence": 0.924 + }, + { + "text": "got", + "start": 34.18, + "end": 34.22, + "confidence": 0.48 + }, + { + "text": "them,", + "start": 34.22, + "end": 35.14, + "confidence": 0.907 + }, + { + "text": "got", + "start": 35.14, + "end": 35.18, + "confidence": 0.604 + }, + { + "text": "them,", + "start": 35.18, + "end": 35.22, + "confidence": 0.922 + }, + { + "text": "got", + "start": 35.22, + "end": 35.5, + "confidence": 0.661 + }, + { + "text": "them,", + "start": 35.5, + "end": 35.9, + "confidence": 0.948 + }, + { + "text": "got", + "start": 35.9, + "end": 35.94, + "confidence": 0.743 + }, + { + "text": "them,", + "start": 35.94, + "end": 35.98, + "confidence": 0.968 + }, + { + "text": "got", + "start": 35.98, + "end": 36.02, + "confidence": 0.825 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.06, + "confidence": 0.975 + }, + { + "text": "got", + "start": 36.06, + "end": 36.1, + "confidence": 0.839 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.14, + "confidence": 0.981 + }, + { + "text": "got", + "start": 36.14, + "end": 36.18, + "confidence": 0.891 + }, + { + "text": "them,", + "start": 36.18, + "end": 36.22, + "confidence": 0.985 + }, + { + "text": "got", + "start": 36.22, + "end": 36.26, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 36.26, + "end": 36.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.3, + "end": 36.34, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 36.34, + "end": 36.38, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.38, + "end": 36.42, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 36.42, + "end": 36.46, + "confidence": 0.987 + }, + { + "text": "got", + "start": 36.46, + "end": 36.5, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.5, + "end": 36.54, + "confidence": 0.989 + }, + { + "text": "got", + "start": 36.54, + "end": 36.58, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 36.58, + "end": 36.62, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.62, + "end": 36.66, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 36.66, + "end": 36.7, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.7, + "end": 36.74, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.74, + "end": 36.78, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.78, + "end": 36.82, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 36.82, + "end": 36.86, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.86, + "end": 36.9, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 36.9, + "end": 36.94, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.94, + "end": 36.98, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 36.98, + "end": 37.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.02, + "end": 37.06, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.06, + "end": 37.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.1, + "end": 37.14, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.14, + "end": 37.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.18, + "end": 37.22, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.22, + "end": 37.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 37.26, + "end": 37.3, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 37.3, + "end": 37.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.34, + "end": 37.38, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.38, + "end": 37.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.42, + "end": 37.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.5, + "end": 37.54, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 37.54, + "end": 37.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.58, + "end": 37.62, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 37.62, + "end": 37.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.66, + "end": 37.7, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.7, + "end": 37.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.74, + "end": 37.78, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 37.78, + "end": 37.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 37.82, + "end": 37.86, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 37.86, + "end": 37.9, + "confidence": 0.994 + }, + { + "text": "got", + "start": 37.9, + "end": 37.94, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 37.94, + "end": 37.98, + "confidence": 0.994 + }, + { + "text": "got", + "start": 37.98, + "end": 38.02, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.02, + "end": 38.06, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.06, + "end": 38.1, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.1, + "end": 38.14, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.14, + "end": 38.18, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 38.18, + "end": 38.22, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.22, + "end": 38.26, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 38.26, + "end": 38.3, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.3, + "end": 38.34, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 38.34, + "end": 38.38, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.38, + "end": 38.42, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 38.42, + "end": 38.46, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.46, + "end": 38.5, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 38.5, + "end": 38.54, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.54, + "end": 38.58, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 38.58, + "end": 38.62, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.62, + "end": 38.66, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 38.66, + "end": 38.7, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.7, + "end": 38.74, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 38.74, + "end": 38.78, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.78, + "end": 38.82, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 38.82, + "end": 38.86, + "confidence": 0.994 + }, + { + "text": "got", + "start": 38.86, + "end": 38.9, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 38.9, + "end": 38.94, + "confidence": 0.995 + }, + { + "text": "got", + "start": 38.94, + "end": 38.98, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 38.98, + "end": 39.02, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.02, + "end": 39.06, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.06, + "end": 39.1, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.1, + "end": 39.14, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.14, + "end": 39.18, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.18, + "end": 39.22, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.22, + "end": 39.26, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.26, + "end": 39.3, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.3, + "end": 39.34, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.34, + "end": 39.38, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.38, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.42, + "end": 39.46, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.46, + "end": 39.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.5, + "end": 39.54, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.54, + "end": 39.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.58, + "end": 39.62, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.62, + "end": 39.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.66, + "end": 39.7, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.7, + "end": 39.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.74, + "end": 39.78, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.78, + "end": 39.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.82, + "end": 39.86, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.86, + "end": 39.9, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.9, + "end": 39.94, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.94, + "end": 39.98, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.98, + "end": 40.02, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.02, + "end": 40.06, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.06, + "end": 40.1, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.1, + "end": 40.14, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.14, + "end": 40.18, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.18, + "end": 40.22, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.22, + "end": 40.26, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.26, + "end": 40.3, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.3, + "end": 40.34, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.34, + "end": 40.38, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.38, + "end": 40.42, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.42, + "end": 40.46, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.46, + "end": 40.5, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.5, + "end": 40.54, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.54, + "end": 40.58, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.58, + "end": 40.62, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.62, + "end": 40.66, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.66, + "end": 40.7, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.7, + "end": 40.74, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.74, + "end": 40.78, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.78, + "end": 40.82, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.82, + "end": 40.86, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.86, + "end": 40.9, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.9, + "end": 40.94, + "confidence": 0.996 + }, + { + "text": "got", + "start": 40.94, + "end": 40.98, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.98, + "end": 41.02, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.02, + "end": 41.06, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.06, + "end": 42.94, + "confidence": 0.996 + }, + { + "text": "got", + "start": 42.94, + "end": 46.82, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.82, + "end": 47.8, + "confidence": 0.996 + }, + { + "text": "got", + "start": 47.8, + "end": 48.58, + "confidence": 0.996 + }, + { + "text": "them,", + "start": 48.58, + "end": 50.82, + "confidence": 0.996 + }, + { + "text": "got", + "start": 50.82, + "end": 51.64, + "confidence": 0.996 + }, + { + "text": "them", + "start": 51.64, + "end": 55.0, + "confidence": 0.997 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 72.06, + "end": 85.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.053046889369263245, + "compression_ratio": 29.52, + "no_speech_prob": 0.24410122632980347, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 72.06, + "end": 72.1, + "confidence": 0.214 + }, + { + "text": "them,", + "start": 72.1, + "end": 72.14, + "confidence": 0.95 + }, + { + "text": "got", + "start": 72.14, + "end": 72.18, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 72.18, + "end": 72.22, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.22, + "end": 72.26, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 72.26, + "end": 72.3, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.3, + "end": 72.34, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 72.34, + "end": 72.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 72.38, + "end": 72.42, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 72.42, + "end": 72.46, + "confidence": 0.993 + }, + { + "text": "got", + "start": 72.46, + "end": 72.5, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 72.5, + "end": 72.54, + "confidence": 0.988 + }, + { + "text": "got", + "start": 72.54, + "end": 72.58, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 72.58, + "end": 72.62, + "confidence": 0.982 + }, + { + "text": "got", + "start": 72.62, + "end": 72.66, + "confidence": 0.863 + }, + { + "text": "them,", + "start": 72.66, + "end": 72.7, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.7, + "end": 72.74, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 72.74, + "end": 72.78, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.78, + "end": 72.82, + "confidence": 0.852 + }, + { + "text": "them,", + "start": 72.82, + "end": 72.86, + "confidence": 0.925 + }, + { + "text": "got", + "start": 72.86, + "end": 72.9, + "confidence": 0.56 + }, + { + "text": "them,", + "start": 72.9, + "end": 72.94, + "confidence": 0.958 + }, + { + "text": "got", + "start": 72.94, + "end": 72.98, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 72.98, + "end": 73.02, + "confidence": 0.976 + }, + { + "text": "got", + "start": 73.02, + "end": 73.06, + "confidence": 0.843 + }, + { + "text": "them,", + "start": 73.06, + "end": 73.1, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.1, + "end": 73.14, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 73.14, + "end": 73.18, + "confidence": 0.977 + }, + { + "text": "got", + "start": 73.18, + "end": 73.22, + "confidence": 0.836 + }, + { + "text": "them,", + "start": 73.22, + "end": 73.26, + "confidence": 0.979 + }, + { + "text": "got", + "start": 73.26, + "end": 73.3, + "confidence": 0.853 + }, + { + "text": "them,", + "start": 73.3, + "end": 73.34, + "confidence": 0.983 + }, + { + "text": "got", + "start": 73.34, + "end": 73.38, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 73.38, + "end": 73.42, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.42, + "end": 73.46, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 73.46, + "end": 73.5, + "confidence": 0.985 + }, + { + "text": "got", + "start": 73.5, + "end": 73.54, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 73.54, + "end": 73.58, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.58, + "end": 73.62, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 73.62, + "end": 73.66, + "confidence": 0.986 + }, + { + "text": "got", + "start": 73.66, + "end": 73.7, + "confidence": 0.873 + }, + { + "text": "them,", + "start": 73.7, + "end": 73.74, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.74, + "end": 73.78, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 73.78, + "end": 73.82, + "confidence": 0.987 + }, + { + "text": "got", + "start": 73.82, + "end": 73.86, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 73.86, + "end": 73.9, + "confidence": 0.988 + }, + { + "text": "got", + "start": 73.9, + "end": 73.94, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 73.94, + "end": 73.98, + "confidence": 0.989 + }, + { + "text": "got", + "start": 73.98, + "end": 74.02, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 74.02, + "end": 74.06, + "confidence": 0.989 + }, + { + "text": "got", + "start": 74.06, + "end": 74.1, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 74.1, + "end": 74.14, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.14, + "end": 74.18, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 74.18, + "end": 74.22, + "confidence": 0.99 + }, + { + "text": "got", + "start": 74.22, + "end": 74.26, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 74.26, + "end": 74.3, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.3, + "end": 74.34, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 74.34, + "end": 74.38, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.38, + "end": 74.42, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 74.42, + "end": 74.46, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.46, + "end": 74.5, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 74.5, + "end": 74.54, + "confidence": 0.991 + }, + { + "text": "got", + "start": 74.54, + "end": 74.58, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 74.58, + "end": 74.62, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.62, + "end": 74.66, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.66, + "end": 74.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.7, + "end": 74.74, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 74.74, + "end": 74.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.78, + "end": 74.82, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 74.82, + "end": 74.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.86, + "end": 74.9, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 74.9, + "end": 74.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 74.94, + "end": 74.98, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 74.98, + "end": 75.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.02, + "end": 75.06, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 75.06, + "end": 75.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.1, + "end": 75.14, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 75.14, + "end": 75.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.18, + "end": 75.22, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 75.22, + "end": 75.26, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.26, + "end": 75.3, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 75.3, + "end": 75.34, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.34, + "end": 75.38, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 75.38, + "end": 75.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.42, + "end": 75.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.46, + "end": 75.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.5, + "end": 75.54, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 75.54, + "end": 75.58, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.58, + "end": 75.62, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 75.62, + "end": 75.66, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.66, + "end": 75.7, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 75.7, + "end": 75.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.74, + "end": 75.78, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.78, + "end": 75.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.82, + "end": 75.86, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.86, + "end": 75.9, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.9, + "end": 75.94, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 75.94, + "end": 75.98, + "confidence": 0.992 + }, + { + "text": "got", + "start": 75.98, + "end": 76.02, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.02, + "end": 76.06, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.06, + "end": 76.1, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 76.1, + "end": 76.14, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.14, + "end": 76.18, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 76.18, + "end": 76.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.22, + "end": 76.26, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.26, + "end": 76.3, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.3, + "end": 76.34, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.34, + "end": 76.38, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.38, + "end": 76.42, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 76.42, + "end": 76.46, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.46, + "end": 76.5, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.5, + "end": 76.54, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.54, + "end": 76.58, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.58, + "end": 76.62, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.62, + "end": 76.66, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 76.66, + "end": 76.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.7, + "end": 76.74, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.74, + "end": 76.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.78, + "end": 76.82, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.82, + "end": 76.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.86, + "end": 76.9, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.9, + "end": 76.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 76.94, + "end": 76.98, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 76.98, + "end": 77.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.02, + "end": 77.06, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.06, + "end": 77.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.1, + "end": 77.14, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.14, + "end": 77.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.18, + "end": 77.22, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 77.22, + "end": 77.26, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.26, + "end": 77.3, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.3, + "end": 77.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.34, + "end": 77.38, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.38, + "end": 77.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.42, + "end": 77.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 77.46, + "end": 77.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.5, + "end": 77.54, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.54, + "end": 77.58, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.58, + "end": 77.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.62, + "end": 77.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.66, + "end": 77.7, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 77.7, + "end": 77.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.74, + "end": 77.78, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.78, + "end": 77.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 77.82, + "end": 77.86, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 77.86, + "end": 79.56, + "confidence": 0.994 + }, + { + "text": "got", + "start": 79.56, + "end": 79.6, + "confidence": 0.981 + }, + { + "text": "them", + "start": 79.6, + "end": 85.0, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.02, + "end": 115.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04965524716227578, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971923112869263, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 85.02, + "end": 85.46, + "confidence": 0.438 + }, + { + "text": "them,", + "start": 85.46, + "end": 86.26, + "confidence": 0.943 + }, + { + "text": "got", + "start": 86.26, + "end": 86.78, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 86.78, + "end": 87.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 87.18, + "end": 87.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 87.8, + "end": 87.84, + "confidence": 0.993 + }, + { + "text": "got", + "start": 87.84, + "end": 88.6, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 88.6, + "end": 88.64, + "confidence": 0.991 + }, + { + "text": "got", + "start": 88.64, + "end": 88.9, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 88.9, + "end": 88.94, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.94, + "end": 88.98, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 88.98, + "end": 89.02, + "confidence": 0.987 + }, + { + "text": "got", + "start": 89.02, + "end": 89.06, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 89.06, + "end": 89.1, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.1, + "end": 89.14, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 89.14, + "end": 89.18, + "confidence": 0.985 + }, + { + "text": "got", + "start": 89.18, + "end": 89.22, + "confidence": 0.907 + }, + { + "text": "them,", + "start": 89.22, + "end": 89.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 89.26, + "end": 89.3, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 89.3, + "end": 89.34, + "confidence": 0.971 + }, + { + "text": "got", + "start": 89.34, + "end": 89.38, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 89.38, + "end": 89.42, + "confidence": 0.959 + }, + { + "text": "got", + "start": 89.42, + "end": 89.46, + "confidence": 0.845 + }, + { + "text": "them,", + "start": 89.46, + "end": 89.5, + "confidence": 0.96 + }, + { + "text": "got", + "start": 89.5, + "end": 89.54, + "confidence": 0.861 + }, + { + "text": "them,", + "start": 89.54, + "end": 89.58, + "confidence": 0.963 + }, + { + "text": "got", + "start": 89.58, + "end": 89.62, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 89.62, + "end": 89.66, + "confidence": 0.965 + }, + { + "text": "got", + "start": 89.66, + "end": 89.7, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 89.7, + "end": 89.74, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.74, + "end": 89.78, + "confidence": 0.887 + }, + { + "text": "them,", + "start": 89.78, + "end": 89.82, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.82, + "end": 89.86, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 89.86, + "end": 89.9, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.9, + "end": 89.94, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 89.94, + "end": 89.98, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.98, + "end": 90.02, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 90.02, + "end": 90.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.06, + "end": 90.1, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.1, + "end": 90.14, + "confidence": 0.964 + }, + { + "text": "got", + "start": 90.14, + "end": 90.18, + "confidence": 0.865 + }, + { + "text": "them,", + "start": 90.18, + "end": 90.22, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.22, + "end": 90.26, + "confidence": 0.86 + }, + { + "text": "them,", + "start": 90.26, + "end": 90.3, + "confidence": 0.965 + }, + { + "text": "got", + "start": 90.3, + "end": 90.34, + "confidence": 0.867 + }, + { + "text": "them,", + "start": 90.34, + "end": 90.38, + "confidence": 0.966 + }, + { + "text": "got", + "start": 90.38, + "end": 90.42, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.42, + "end": 90.46, + "confidence": 0.967 + }, + { + "text": "got", + "start": 90.46, + "end": 90.5, + "confidence": 0.866 + }, + { + "text": "them,", + "start": 90.5, + "end": 90.54, + "confidence": 0.968 + }, + { + "text": "got", + "start": 90.54, + "end": 90.58, + "confidence": 0.87 + }, + { + "text": "them,", + "start": 90.58, + "end": 90.62, + "confidence": 0.969 + }, + { + "text": "got", + "start": 90.62, + "end": 90.66, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.66, + "end": 90.7, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.7, + "end": 90.74, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 90.74, + "end": 90.78, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.78, + "end": 90.82, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 90.82, + "end": 90.86, + "confidence": 0.971 + }, + { + "text": "got", + "start": 90.86, + "end": 90.9, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 90.9, + "end": 90.94, + "confidence": 0.973 + }, + { + "text": "got", + "start": 90.94, + "end": 90.98, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 90.98, + "end": 91.02, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.02, + "end": 91.06, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 91.06, + "end": 91.1, + "confidence": 0.973 + }, + { + "text": "got", + "start": 91.1, + "end": 91.14, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 91.14, + "end": 91.18, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.18, + "end": 91.22, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 91.22, + "end": 91.26, + "confidence": 0.975 + }, + { + "text": "got", + "start": 91.26, + "end": 91.3, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 91.3, + "end": 91.34, + "confidence": 0.976 + }, + { + "text": "got", + "start": 91.34, + "end": 91.38, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 91.38, + "end": 91.42, + "confidence": 0.977 + }, + { + "text": "got", + "start": 91.42, + "end": 91.46, + "confidence": 0.905 + }, + { + "text": "them,", + "start": 91.46, + "end": 91.5, + "confidence": 0.978 + }, + { + "text": "got", + "start": 91.5, + "end": 91.54, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 91.54, + "end": 91.58, + "confidence": 0.979 + }, + { + "text": "got", + "start": 91.58, + "end": 91.62, + "confidence": 0.92 + }, + { + "text": "them,", + "start": 91.62, + "end": 91.66, + "confidence": 0.98 + }, + { + "text": "got", + "start": 91.66, + "end": 91.7, + "confidence": 0.922 + }, + { + "text": "them,", + "start": 91.7, + "end": 91.74, + "confidence": 0.981 + }, + { + "text": "got", + "start": 91.74, + "end": 91.78, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 91.78, + "end": 91.82, + "confidence": 0.982 + }, + { + "text": "got", + "start": 91.82, + "end": 91.86, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 91.86, + "end": 91.9, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.9, + "end": 91.94, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 91.94, + "end": 91.98, + "confidence": 0.984 + }, + { + "text": "got", + "start": 91.98, + "end": 92.02, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 92.02, + "end": 92.06, + "confidence": 0.985 + }, + { + "text": "got", + "start": 92.06, + "end": 92.1, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 92.1, + "end": 92.14, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.14, + "end": 92.18, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 92.18, + "end": 92.22, + "confidence": 0.986 + }, + { + "text": "got", + "start": 92.22, + "end": 92.26, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 92.26, + "end": 92.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.3, + "end": 92.34, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 92.34, + "end": 92.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.38, + "end": 92.42, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 92.42, + "end": 92.46, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.46, + "end": 92.5, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.5, + "end": 92.54, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.54, + "end": 92.58, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 92.58, + "end": 92.62, + "confidence": 0.99 + }, + { + "text": "got", + "start": 92.62, + "end": 92.66, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 92.66, + "end": 92.7, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.7, + "end": 92.74, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 92.74, + "end": 92.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.78, + "end": 92.82, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 92.82, + "end": 92.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.86, + "end": 92.9, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 92.9, + "end": 92.94, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.94, + "end": 92.98, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 92.98, + "end": 93.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.02, + "end": 93.06, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 93.06, + "end": 93.1, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.1, + "end": 93.14, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 93.14, + "end": 93.18, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.18, + "end": 93.22, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.22, + "end": 93.26, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.26, + "end": 93.3, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 93.3, + "end": 93.34, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.34, + "end": 93.38, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 93.38, + "end": 93.42, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.42, + "end": 93.46, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 93.46, + "end": 93.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.5, + "end": 93.54, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 93.54, + "end": 93.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.58, + "end": 93.62, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.62, + "end": 93.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.66, + "end": 93.7, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 93.7, + "end": 93.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.74, + "end": 93.78, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 93.78, + "end": 93.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 93.82, + "end": 93.86, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.86, + "end": 93.9, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.9, + "end": 93.94, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 93.94, + "end": 93.98, + "confidence": 0.996 + }, + { + "text": "got", + "start": 93.98, + "end": 94.02, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.02, + "end": 94.06, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.06, + "end": 94.1, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 94.1, + "end": 94.14, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.14, + "end": 94.18, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 94.18, + "end": 94.22, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.22, + "end": 94.26, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.26, + "end": 94.3, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.3, + "end": 94.34, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 94.34, + "end": 94.38, + "confidence": 0.996 + }, + { + "text": "got", + "start": 94.38, + "end": 94.42, + "confidence": 0.987 + }, + { + "text": "them", + "start": 94.42, + "end": 115.0, + "confidence": 0.996 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json b/tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1a4024138be0d0fb463dcbae290b0db086462686 --- /dev/null +++ b/tests/expected/corner_cases/accurate.tiny_apollo11.mp3.words.json @@ -0,0 +1,1160 @@ +{ + "text": " I'm all around my business and we got a recommendation for you on your door to the VA We have 18A's Okay, okay, I think I'll just get him or if they like just want to go on the Yeah, now what you want is on having a B1 And you just get the other one on my Now when we're here, we're going to go on the Yeah Okay, we want to hear that This is it, we know we just You can hit that That's how much you've ordered in that So I guess I'm working on my position but it's not Oh my god Okay, no problem No idea No idea Okay Okay", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.6, + "end": 5.36, + "text": " I'm all around my business and we got a recommendation for you on your door to the VA", + "tokens": [ + 50364, + 286, + 478, + 439, + 926, + 452, + 1606, + 293, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 2853, + 281, + 264, + 18527, + 50628 + ], + "temperature": 0.0, + "avg_logprob": -1.1828196152396824, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35403239727020264, + "confidence": 0.324, + "words": [ + { + "text": "I'm", + "start": 0.6, + "end": 0.82, + "confidence": 0.168 + }, + { + "text": "all", + "start": 0.82, + "end": 1.06, + "confidence": 0.082 + }, + { + "text": "around", + "start": 1.06, + "end": 1.28, + "confidence": 0.104 + }, + { + "text": "my", + "start": 1.28, + "end": 1.46, + "confidence": 0.128 + }, + { + "text": "business", + "start": 1.46, + "end": 1.72, + "confidence": 0.342 + }, + { + "text": "and", + "start": 1.72, + "end": 1.92, + "confidence": 0.613 + }, + { + "text": "we", + "start": 1.92, + "end": 1.98, + "confidence": 0.937 + }, + { + "text": "got", + "start": 1.98, + "end": 2.18, + "confidence": 0.4 + }, + { + "text": "a", + "start": 2.18, + "end": 2.4, + "confidence": 0.639 + }, + { + "text": "recommendation", + "start": 2.4, + "end": 3.06, + "confidence": 0.599 + }, + { + "text": "for", + "start": 3.06, + "end": 3.5, + "confidence": 0.371 + }, + { + "text": "you", + "start": 3.5, + "end": 3.68, + "confidence": 0.974 + }, + { + "text": "on", + "start": 3.68, + "end": 3.88, + "confidence": 0.804 + }, + { + "text": "your", + "start": 3.88, + "end": 4.24, + "confidence": 0.562 + }, + { + "text": "door", + "start": 4.24, + "end": 4.68, + "confidence": 0.145 + }, + { + "text": "to", + "start": 4.68, + "end": 4.88, + "confidence": 0.182 + }, + { + "text": "the", + "start": 4.88, + "end": 5.08, + "confidence": 0.501 + }, + { + "text": "VA", + "start": 5.08, + "end": 5.36, + "confidence": 0.249 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 5.36, + "end": 6.94, + "text": " We have 18A's", + "tokens": [ + 50628, + 492, + 362, + 2443, + 32, + 311, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -1.1828196152396824, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35403239727020264, + "confidence": 0.175, + "words": [ + { + "text": "We", + "start": 5.36, + "end": 5.74, + "confidence": 0.042 + }, + { + "text": "have", + "start": 5.74, + "end": 5.76, + "confidence": 0.652 + }, + { + "text": "18A's", + "start": 5.76, + "end": 6.94, + "confidence": 0.182 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.0, + "end": 16.79, + "text": " Okay, okay, I think I'll just get him or if they like just want to go on the", + "tokens": [ + 50914, + 1033, + 11, + 1392, + 11, + 286, + 519, + 286, + 603, + 445, + 483, + 796, + 420, + 498, + 436, + 411, + 445, + 528, + 281, + 352, + 322, + 264, + 51197 + ], + "temperature": 0.0, + "avg_logprob": -1.1828196152396824, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35403239727020264, + "confidence": 0.327, + "words": [ + { + "text": "Okay,", + "start": 12.0, + "end": 12.38, + "confidence": 0.662 + }, + { + "text": "okay,", + "start": 12.88, + "end": 13.12, + "confidence": 0.291 + }, + { + "text": "I", + "start": 13.12, + "end": 13.3, + "confidence": 0.209 + }, + { + "text": "think", + "start": 13.3, + "end": 13.58, + "confidence": 0.263 + }, + { + "text": "I'll", + "start": 13.58, + "end": 13.9, + "confidence": 0.2 + }, + { + "text": "just", + "start": 13.9, + "end": 14.1, + "confidence": 0.17 + }, + { + "text": "get", + "start": 14.1, + "end": 14.28, + "confidence": 0.248 + }, + { + "text": "him", + "start": 14.28, + "end": 14.52, + "confidence": 0.153 + }, + { + "text": "or", + "start": 14.52, + "end": 14.74, + "confidence": 0.199 + }, + { + "text": "if", + "start": 14.74, + "end": 14.94, + "confidence": 0.088 + }, + { + "text": "they", + "start": 14.94, + "end": 15.44, + "confidence": 0.607 + }, + { + "text": "like", + "start": 15.44, + "end": 15.74, + "confidence": 0.2 + }, + { + "text": "just", + "start": 15.74, + "end": 15.96, + "confidence": 0.462 + }, + { + "text": "want", + "start": 15.96, + "end": 16.18, + "confidence": 0.765 + }, + { + "text": "to", + "start": 16.18, + "end": 16.36, + "confidence": 0.91 + }, + { + "text": "go", + "start": 16.36, + "end": 16.56, + "confidence": 0.948 + }, + { + "text": "on", + "start": 16.56, + "end": 16.7, + "confidence": 0.566 + }, + { + "text": "the", + "start": 16.7, + "end": 16.79, + "confidence": 0.585 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 16.79, + "end": 19.0, + "text": " Yeah, now what you want is on having a B1", + "tokens": [ + 51197, + 865, + 11, + 586, + 437, + 291, + 528, + 307, + 322, + 1419, + 257, + 363, + 16, + 51322 + ], + "temperature": 0.0, + "avg_logprob": -1.1828196152396824, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35403239727020264, + "confidence": 0.471, + "words": [ + { + "text": "Yeah,", + "start": 16.79, + "end": 17.1, + "confidence": 0.28 + }, + { + "text": "now", + "start": 17.18, + "end": 17.3, + "confidence": 0.365 + }, + { + "text": "what", + "start": 17.3, + "end": 17.48, + "confidence": 0.743 + }, + { + "text": "you", + "start": 17.48, + "end": 17.62, + "confidence": 0.932 + }, + { + "text": "want", + "start": 17.62, + "end": 17.84, + "confidence": 0.743 + }, + { + "text": "is", + "start": 17.84, + "end": 17.96, + "confidence": 0.674 + }, + { + "text": "on", + "start": 17.96, + "end": 18.16, + "confidence": 0.327 + }, + { + "text": "having", + "start": 18.16, + "end": 18.44, + "confidence": 0.563 + }, + { + "text": "a", + "start": 18.44, + "end": 18.76, + "confidence": 0.287 + }, + { + "text": "B1", + "start": 18.76, + "end": 19.0, + "confidence": 0.37 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 19.0, + "end": 21.88, + "text": " And you just get the other one on my", + "tokens": [ + 51322, + 400, + 291, + 445, + 483, + 264, + 661, + 472, + 322, + 452, + 51462 + ], + "temperature": 0.0, + "avg_logprob": -1.1828196152396824, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35403239727020264, + "confidence": 0.516, + "words": [ + { + "text": "And", + "start": 19.0, + "end": 20.26, + "confidence": 0.272 + }, + { + "text": "you", + "start": 20.26, + "end": 20.4, + "confidence": 0.846 + }, + { + "text": "just", + "start": 20.4, + "end": 20.64, + "confidence": 0.694 + }, + { + "text": "get", + "start": 20.64, + "end": 20.8, + "confidence": 0.134 + }, + { + "text": "the", + "start": 20.8, + "end": 20.9, + "confidence": 0.65 + }, + { + "text": "other", + "start": 20.9, + "end": 21.08, + "confidence": 0.937 + }, + { + "text": "one", + "start": 21.08, + "end": 21.26, + "confidence": 0.976 + }, + { + "text": "on", + "start": 21.26, + "end": 21.66, + "confidence": 0.872 + }, + { + "text": "my", + "start": 21.66, + "end": 21.88, + "confidence": 0.232 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 22.12, + "end": 25.36, + "text": " Now when we're here, we're going to go on the", + "tokens": [ + 51462, + 823, + 562, + 321, + 434, + 510, + 11, + 321, + 434, + 516, + 281, + 352, + 322, + 264, + 51616 + ], + "temperature": 0.0, + "avg_logprob": -1.1828196152396824, + "compression_ratio": 1.52020202020202, + "no_speech_prob": 0.35403239727020264, + "confidence": 0.336, + "words": [ + { + "text": "Now", + "start": 22.12, + "end": 22.8, + "confidence": 0.168 + }, + { + "text": "when", + "start": 22.8, + "end": 23.04, + "confidence": 0.35 + }, + { + "text": "we're", + "start": 23.04, + "end": 23.24, + "confidence": 0.745 + }, + { + "text": "here,", + "start": 23.24, + "end": 23.44, + "confidence": 0.492 + }, + { + "text": "we're", + "start": 23.66, + "end": 23.88, + "confidence": 0.293 + }, + { + "text": "going", + "start": 23.88, + "end": 23.94, + "confidence": 0.291 + }, + { + "text": "to", + "start": 23.94, + "end": 24.38, + "confidence": 0.88 + }, + { + "text": "go", + "start": 24.38, + "end": 24.72, + "confidence": 0.119 + }, + { + "text": "on", + "start": 24.72, + "end": 25.34, + "confidence": 0.133 + }, + { + "text": "the", + "start": 25.34, + "end": 25.36, + "confidence": 0.364 + } + ] + }, + { + "id": 6, + "seek": 3000, + "start": 31.36, + "end": 31.56, + "text": " Yeah", + "tokens": [ + 50364, + 865, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.6120662689208984, + "compression_ratio": 0.3333333333333333, + "no_speech_prob": 0.29405874013900757, + "confidence": 0.057, + "words": [ + { + "text": "Yeah", + "start": 31.36, + "end": 31.56, + "confidence": 0.057 + } + ] + }, + { + "id": 7, + "seek": 6000, + "start": 62.7, + "end": 64.14, + "text": " Okay, we want to hear that", + "tokens": [ + 50364, + 1033, + 11, + 321, + 528, + 281, + 1568, + 300, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.355, + "words": [ + { + "text": "Okay,", + "start": 62.7, + "end": 62.72, + "confidence": 0.125 + }, + { + "text": "we", + "start": 63.1, + "end": 63.32, + "confidence": 0.237 + }, + { + "text": "want", + "start": 63.32, + "end": 63.62, + "confidence": 0.529 + }, + { + "text": "to", + "start": 63.62, + "end": 63.72, + "confidence": 0.571 + }, + { + "text": "hear", + "start": 63.72, + "end": 63.84, + "confidence": 0.245 + }, + { + "text": "that", + "start": 63.84, + "end": 64.14, + "confidence": 0.92 + } + ] + }, + { + "id": 8, + "seek": 6000, + "start": 64.46, + "end": 66.82, + "text": " This is it, we know we just", + "tokens": [ + 50564, + 639, + 307, + 309, + 11, + 321, + 458, + 321, + 445, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.459, + "words": [ + { + "text": "This", + "start": 64.46, + "end": 64.72, + "confidence": 0.243 + }, + { + "text": "is", + "start": 64.72, + "end": 64.94, + "confidence": 0.663 + }, + { + "text": "it,", + "start": 64.94, + "end": 65.12, + "confidence": 0.243 + }, + { + "text": "we", + "start": 65.22, + "end": 65.42, + "confidence": 0.475 + }, + { + "text": "know", + "start": 65.42, + "end": 65.62, + "confidence": 0.696 + }, + { + "text": "we", + "start": 65.62, + "end": 65.88, + "confidence": 0.823 + }, + { + "text": "just", + "start": 65.88, + "end": 66.82, + "confidence": 0.405 + } + ] + }, + { + "id": 9, + "seek": 6000, + "start": 66.86, + "end": 68.3, + "text": " You can hit that", + "tokens": [ + 50714, + 509, + 393, + 2045, + 300, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.329, + "words": [ + { + "text": "You", + "start": 66.86, + "end": 67.14, + "confidence": 0.246 + }, + { + "text": "can", + "start": 67.14, + "end": 67.32, + "confidence": 0.576 + }, + { + "text": "hit", + "start": 67.32, + "end": 67.52, + "confidence": 0.126 + }, + { + "text": "that", + "start": 67.52, + "end": 68.3, + "confidence": 0.656 + } + ] + }, + { + "id": 10, + "seek": 6000, + "start": 68.32, + "end": 69.46, + "text": " That's how much you've ordered in that", + "tokens": [ + 50764, + 663, + 311, + 577, + 709, + 291, + 600, + 8866, + 294, + 300, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.3, + "words": [ + { + "text": "That's", + "start": 68.32, + "end": 68.58, + "confidence": 0.262 + }, + { + "text": "how", + "start": 68.58, + "end": 68.6, + "confidence": 0.237 + }, + { + "text": "much", + "start": 68.6, + "end": 68.68, + "confidence": 0.974 + }, + { + "text": "you've", + "start": 68.68, + "end": 68.84, + "confidence": 0.416 + }, + { + "text": "ordered", + "start": 68.84, + "end": 69.02, + "confidence": 0.145 + }, + { + "text": "in", + "start": 69.02, + "end": 69.22, + "confidence": 0.204 + }, + { + "text": "that", + "start": 69.22, + "end": 69.46, + "confidence": 0.246 + } + ] + }, + { + "id": 11, + "seek": 6000, + "start": 70.4, + "end": 72.52, + "text": " So I guess I'm working on my position but it's not", + "tokens": [ + 50864, + 407, + 286, + 2041, + 286, + 478, + 1364, + 322, + 452, + 2535, + 457, + 309, + 311, + 406, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.458, + "words": [ + { + "text": "So", + "start": 70.4, + "end": 70.42, + "confidence": 0.914 + }, + { + "text": "I", + "start": 70.42, + "end": 70.54, + "confidence": 0.717 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.8, + "confidence": 0.767 + }, + { + "text": "I'm", + "start": 70.8, + "end": 71.3, + "confidence": 0.363 + }, + { + "text": "working", + "start": 71.3, + "end": 71.32, + "confidence": 0.653 + }, + { + "text": "on", + "start": 71.32, + "end": 71.58, + "confidence": 0.396 + }, + { + "text": "my", + "start": 71.58, + "end": 71.74, + "confidence": 0.261 + }, + { + "text": "position", + "start": 71.74, + "end": 71.98, + "confidence": 0.081 + }, + { + "text": "but", + "start": 71.98, + "end": 72.18, + "confidence": 0.291 + }, + { + "text": "it's", + "start": 72.18, + "end": 72.38, + "confidence": 0.792 + }, + { + "text": "not", + "start": 72.38, + "end": 72.52, + "confidence": 0.585 + } + ] + }, + { + "id": 12, + "seek": 6000, + "start": 74.72, + "end": 75.22, + "text": " Oh my god", + "tokens": [ + 51014, + 876, + 452, + 3044, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.198, + "words": [ + { + "text": "Oh", + "start": 74.72, + "end": 74.74, + "confidence": 0.145 + }, + { + "text": "my", + "start": 74.74, + "end": 75.08, + "confidence": 0.272 + }, + { + "text": "god", + "start": 75.08, + "end": 75.22, + "confidence": 0.198 + } + ] + }, + { + "id": 13, + "seek": 6000, + "start": 75.22, + "end": 76.64, + "text": " Okay, no problem", + "tokens": [ + 51114, + 1033, + 11, + 572, + 1154, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.623, + "words": [ + { + "text": "Okay,", + "start": 75.22, + "end": 75.9, + "confidence": 0.682 + }, + { + "text": "no", + "start": 76.26, + "end": 76.32, + "confidence": 0.364 + }, + { + "text": "problem", + "start": 76.32, + "end": 76.64, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 6000, + "start": 76.64, + "end": 77.5, + "text": " No idea", + "tokens": [ + 51214, + 883, + 1558, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.223, + "words": [ + { + "text": "No", + "start": 76.64, + "end": 77.28, + "confidence": 0.223 + }, + { + "text": "idea", + "start": 77.28, + "end": 77.5, + "confidence": 0.223 + } + ] + }, + { + "id": 15, + "seek": 6000, + "start": 78.06, + "end": 78.6, + "text": " No idea", + "tokens": [ + 51264, + 883, + 1558, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.282, + "words": [ + { + "text": "No", + "start": 78.06, + "end": 78.38, + "confidence": 0.105 + }, + { + "text": "idea", + "start": 78.38, + "end": 78.6, + "confidence": 0.758 + } + ] + }, + { + "id": 16, + "seek": 6000, + "start": 85.28, + "end": 85.58, + "text": " Okay", + "tokens": [ + 51614, + 1033, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.101, + "words": [ + { + "text": "Okay", + "start": 85.28, + "end": 85.58, + "confidence": 0.101 + } + ] + }, + { + "id": 17, + "seek": 6000, + "start": 86.6, + "end": 86.86, + "text": " Okay", + "tokens": [ + 51664, + 1033, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.9429719602906859, + "compression_ratio": 1.4172185430463575, + "no_speech_prob": 0.10345567017793655, + "confidence": 0.159, + "words": [ + { + "text": "Okay", + "start": 86.6, + "end": 86.86, + "confidence": 0.159 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/arabic.mp3.words.json b/tests/expected/corner_cases/arabic.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..29acb6c94c7ea4dfabe19feb12b6efe2530e2f8b --- /dev/null +++ b/tests/expected/corner_cases/arabic.mp3.words.json @@ -0,0 +1,393 @@ +{ + "text": " حبّي أنصار الهدى حبّي ركب الفدا وطلّبي سأر الشاهد وطلّبي سأر الشاهد لمتى سنضل رقودا نغرق في النوم ونشخر وعن الآذان نسمّه ونغمّض كي لا نمسر", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.8, + "end": 7.48, + "text": " حبّي أنصار الهدى", + "tokens": [ + 50364, + 11331, + 3555, + 11703, + 1829, + 14739, + 9381, + 9640, + 2423, + 3224, + 3215, + 7578, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5114290775396885, + "compression_ratio": 1.1842105263157894, + "no_speech_prob": 0.7993651032447815, + "confidence": 0.651, + "words": [ + { + "text": "حبّي", + "start": 1.8, + "end": 4.26, + "confidence": 0.436 + }, + { + "text": "أنصار", + "start": 4.26, + "end": 5.7, + "confidence": 0.795 + }, + { + "text": "الهدى", + "start": 5.7, + "end": 7.48, + "confidence": 0.837 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 9.24, + "end": 15.2, + "text": " حبّي ركب الفدا", + "tokens": [ + 50714, + 11331, + 3555, + 11703, + 1829, + 12602, + 4117, + 3555, + 27188, + 28259, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5114290775396885, + "compression_ratio": 1.1842105263157894, + "no_speech_prob": 0.7993651032447815, + "confidence": 0.829, + "words": [ + { + "text": "حبّي", + "start": 9.24, + "end": 12.58, + "confidence": 0.875 + }, + { + "text": "ركب", + "start": 12.58, + "end": 13.76, + "confidence": 0.853 + }, + { + "text": "الفدا", + "start": 13.76, + "end": 15.2, + "confidence": 0.711 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 18.86, + "end": 23.42, + "text": " وطلّبي سأر الشاهد", + "tokens": [ + 51114, + 4032, + 9566, + 1211, + 11703, + 21292, + 8608, + 10721, + 2288, + 25124, + 40294, + 3215, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.5114290775396885, + "compression_ratio": 1.1842105263157894, + "no_speech_prob": 0.7993651032447815, + "confidence": 0.587, + "words": [ + { + "text": "وطلّبي", + "start": 18.86, + "end": 20.68, + "confidence": 0.54 + }, + { + "text": "سأر", + "start": 20.68, + "end": 21.78, + "confidence": 0.94 + }, + { + "text": "الشاهد", + "start": 21.78, + "end": 23.42, + "confidence": 0.421 + } + ] + }, + { + "id": 3, + "seek": 2300, + "start": 24.2, + "end": 28.96, + "text": " وطلّبي سأر الشاهد", + "tokens": [ + 50414, + 4032, + 9566, + 1211, + 11703, + 21292, + 8608, + 10721, + 2288, + 25124, + 40294, + 3215, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.24064027017621853, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.875, + "words": [ + { + "text": "وطلّبي", + "start": 24.2, + "end": 25.98, + "confidence": 0.799 + }, + { + "text": "سأر", + "start": 25.98, + "end": 27.12, + "confidence": 0.95 + }, + { + "text": "الشاهد", + "start": 27.12, + "end": 28.96, + "confidence": 0.936 + } + ] + }, + { + "id": 4, + "seek": 2300, + "start": 30.88, + "end": 33.34, + "text": " لمتى سنضل رقودا", + "tokens": [ + 50714, + 32767, + 49975, + 8608, + 1863, + 11242, + 1211, + 12602, + 4587, + 23328, + 995, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.24064027017621853, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.714, + "words": [ + { + "text": "لمتى", + "start": 30.88, + "end": 31.48, + "confidence": 0.692 + }, + { + "text": "سنضل", + "start": 31.48, + "end": 32.34, + "confidence": 0.754 + }, + { + "text": "رقودا", + "start": 32.34, + "end": 33.34, + "confidence": 0.686 + } + ] + }, + { + "id": 5, + "seek": 2300, + "start": 33.36, + "end": 35.88, + "text": " نغرق في النوم ونشخر", + "tokens": [ + 50864, + 8717, + 17082, + 2288, + 4587, + 8978, + 28239, + 20498, + 4032, + 1863, + 8592, + 34740, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.24064027017621853, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.926, + "words": [ + { + "text": "نغرق", + "start": 33.36, + "end": 34.22, + "confidence": 0.901 + }, + { + "text": "في", + "start": 34.22, + "end": 34.44, + "confidence": 0.873 + }, + { + "text": "النوم", + "start": 34.44, + "end": 35.04, + "confidence": 0.974 + }, + { + "text": "ونشخر", + "start": 35.04, + "end": 35.88, + "confidence": 0.942 + } + ] + }, + { + "id": 6, + "seek": 2300, + "start": 36.24, + "end": 38.72, + "text": " وعن الآذان نسمّه", + "tokens": [ + 51014, + 4032, + 3615, + 1863, + 6024, + 95, + 8848, + 7649, + 8717, + 38251, + 11703, + 3224, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.24064027017621853, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.765, + "words": [ + { + "text": "وعن", + "start": 36.24, + "end": 36.76, + "confidence": 0.923 + }, + { + "text": "الآذان", + "start": 36.76, + "end": 37.72, + "confidence": 0.82 + }, + { + "text": "نسمّه", + "start": 37.72, + "end": 38.72, + "confidence": 0.619 + } + ] + }, + { + "id": 7, + "seek": 2300, + "start": 38.88, + "end": 41.3, + "text": " ونغمّض كي لا نمسر", + "tokens": [ + 51164, + 4032, + 1863, + 17082, + 2304, + 11703, + 11242, + 9122, + 1829, + 20193, + 8717, + 2304, + 3794, + 2288, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.24064027017621853, + "compression_ratio": 1.3675213675213675, + "no_speech_prob": 0.008175775408744812, + "confidence": 0.79, + "words": [ + { + "text": "ونغمّض", + "start": 38.88, + "end": 39.9, + "confidence": 0.91 + }, + { + "text": "كي", + "start": 39.9, + "end": 40.28, + "confidence": 0.833 + }, + { + "text": "لا", + "start": 40.28, + "end": 40.56, + "confidence": 0.804 + }, + { + "text": "نمسر", + "start": 40.56, + "end": 41.3, + "confidence": 0.619 + } + ] + } + ], + "language": "Arabic" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/issue24_empty.wav.words.json b/tests/expected/corner_cases/issue24_empty.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b537f1b7823415bf5f37956f5821918c868d52 --- /dev/null +++ b/tests/expected/corner_cases/issue24_empty.wav.words.json @@ -0,0 +1,5 @@ +{ + "text": "", + "segments": [], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json b/tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..64ebff47689db6854b64f37e10e9e6c61ec7789f --- /dev/null +++ b/tests/expected/corner_cases/large-v2.accurate_gloria.mp3.words.json @@ -0,0 +1,540 @@ +{ + "text": " Ella, my glorious love, how are you? Oh, I'm okay. I will be. I said she could stay with us tomorrow just until she feels better. Of course she can. No, this won't be for long. Well, if you can stay as long as you want, my love. I've really missed you. Pops.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 6.28, + "text": " Ella, my glorious love, how are you?", + "tokens": [ + 50364, + 29261, + 11, + 452, + 24026, + 959, + 11, + 577, + 366, + 291, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.452, + "words": [ + { + "text": "Ella,", + "start": 0.0, + "end": 1.68, + "confidence": 0.126 + }, + { + "text": "my", + "start": 1.94, + "end": 2.7, + "confidence": 0.368 + }, + { + "text": "glorious", + "start": 2.7, + "end": 3.5, + "confidence": 0.729 + }, + { + "text": "love,", + "start": 3.5, + "end": 4.18, + "confidence": 0.432 + }, + { + "text": "how", + "start": 4.94, + "end": 5.66, + "confidence": 0.693 + }, + { + "text": "are", + "start": 5.66, + "end": 6.02, + "confidence": 0.902 + }, + { + "text": "you?", + "start": 6.02, + "end": 6.28, + "confidence": 0.845 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.28, + "end": 9.18, + "text": " Oh, I'm okay. I will be.", + "tokens": [ + 50664, + 876, + 11, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.68, + "words": [ + { + "text": "Oh,", + "start": 6.28, + "end": 6.6, + "confidence": 0.427 + }, + { + "text": "I'm", + "start": 6.68, + "end": 7.02, + "confidence": 0.958 + }, + { + "text": "okay.", + "start": 7.02, + "end": 7.64, + "confidence": 0.605 + }, + { + "text": "I", + "start": 8.3, + "end": 8.58, + "confidence": 0.562 + }, + { + "text": "will", + "start": 8.58, + "end": 8.86, + "confidence": 0.813 + }, + { + "text": "be.", + "start": 8.86, + "end": 9.18, + "confidence": 0.871 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 9.18, + "end": 11.48, + "text": " I said she could stay with us tomorrow just until she feels better.", + "tokens": [ + 50814, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 445, + 1826, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.571, + "words": [ + { + "text": "I", + "start": 9.18, + "end": 9.44, + "confidence": 0.66 + }, + { + "text": "said", + "start": 9.44, + "end": 9.62, + "confidence": 0.869 + }, + { + "text": "she", + "start": 9.62, + "end": 9.76, + "confidence": 0.748 + }, + { + "text": "could", + "start": 9.76, + "end": 9.88, + "confidence": 0.813 + }, + { + "text": "stay", + "start": 9.88, + "end": 10.08, + "confidence": 0.925 + }, + { + "text": "with", + "start": 10.08, + "end": 10.22, + "confidence": 0.808 + }, + { + "text": "us", + "start": 10.22, + "end": 10.34, + "confidence": 0.628 + }, + { + "text": "tomorrow", + "start": 10.34, + "end": 10.56, + "confidence": 0.219 + }, + { + "text": "just", + "start": 10.56, + "end": 10.74, + "confidence": 0.067 + }, + { + "text": "until", + "start": 10.74, + "end": 10.86, + "confidence": 0.4 + }, + { + "text": "she", + "start": 10.86, + "end": 10.98, + "confidence": 0.803 + }, + { + "text": "feels", + "start": 10.98, + "end": 11.18, + "confidence": 0.788 + }, + { + "text": "better.", + "start": 11.18, + "end": 11.48, + "confidence": 0.807 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.5, + "end": 13.34, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.333, + "words": [ + { + "text": "Of", + "start": 11.5, + "end": 12.1, + "confidence": 0.123 + }, + { + "text": "course", + "start": 12.1, + "end": 12.6, + "confidence": 0.86 + }, + { + "text": "she", + "start": 12.6, + "end": 12.88, + "confidence": 0.085 + }, + { + "text": "can.", + "start": 12.88, + "end": 13.34, + "confidence": 0.674 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 15.2, + "text": " No, this won't be for long.", + "tokens": [ + 51014, + 883, + 11, + 341, + 1582, + 380, + 312, + 337, + 938, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.649, + "words": [ + { + "text": "No,", + "start": 13.34, + "end": 13.78, + "confidence": 0.328 + }, + { + "text": "this", + "start": 13.9, + "end": 14.24, + "confidence": 0.419 + }, + { + "text": "won't", + "start": 14.24, + "end": 14.54, + "confidence": 0.917 + }, + { + "text": "be", + "start": 14.54, + "end": 14.68, + "confidence": 0.906 + }, + { + "text": "for", + "start": 14.68, + "end": 14.88, + "confidence": 0.884 + }, + { + "text": "long.", + "start": 14.88, + "end": 15.2, + "confidence": 0.823 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 15.2, + "end": 17.44, + "text": " Well, if you can stay as long as you want, my love.", + "tokens": [ + 51114, + 1042, + 11, + 498, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.69, + "words": [ + { + "text": "Well,", + "start": 15.2, + "end": 15.46, + "confidence": 0.553 + }, + { + "text": "if", + "start": 15.52, + "end": 15.6, + "confidence": 0.084 + }, + { + "text": "you", + "start": 15.6, + "end": 15.68, + "confidence": 0.898 + }, + { + "text": "can", + "start": 15.68, + "end": 15.8, + "confidence": 0.827 + }, + { + "text": "stay", + "start": 15.8, + "end": 16.04, + "confidence": 0.922 + }, + { + "text": "as", + "start": 16.04, + "end": 16.18, + "confidence": 0.896 + }, + { + "text": "long", + "start": 16.18, + "end": 16.32, + "confidence": 0.893 + }, + { + "text": "as", + "start": 16.32, + "end": 16.46, + "confidence": 0.902 + }, + { + "text": "you", + "start": 16.46, + "end": 16.58, + "confidence": 0.905 + }, + { + "text": "want,", + "start": 16.58, + "end": 16.76, + "confidence": 0.731 + }, + { + "text": "my", + "start": 16.88, + "end": 17.02, + "confidence": 0.901 + }, + { + "text": "love.", + "start": 17.02, + "end": 17.44, + "confidence": 0.831 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 17.44, + "end": 18.96, + "text": " I've really missed you.", + "tokens": [ + 51214, + 286, + 600, + 534, + 6721, + 291, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.529, + "words": [ + { + "text": "I've", + "start": 17.44, + "end": 17.58, + "confidence": 0.309 + }, + { + "text": "really", + "start": 17.58, + "end": 18.18, + "confidence": 0.763 + }, + { + "text": "missed", + "start": 18.18, + "end": 18.68, + "confidence": 0.778 + }, + { + "text": "you.", + "start": 18.68, + "end": 18.96, + "confidence": 0.622 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.58, + "end": 20.12, + "text": " Pops.", + "tokens": [ + 51314, + 430, + 3370, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.3580404988835367, + "compression_ratio": 1.425414364640884, + "no_speech_prob": 0.24429768323898315, + "confidence": 0.02, + "words": [ + { + "text": "Pops.", + "start": 19.58, + "end": 20.12, + "confidence": 0.02 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/large-v2.efficient_gloria.mp3.words.json b/tests/expected/corner_cases/large-v2.efficient_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..efb0f918df60aa1eeeeb2fa4aff52f1efb556ad3 --- /dev/null +++ b/tests/expected/corner_cases/large-v2.efficient_gloria.mp3.words.json @@ -0,0 +1,564 @@ +{ + "text": " Ella, my glorious love. How are you? Oh, I'm okay. I will be. I said she could stay with us tomorrow, just until she feels better. Of course she can. No, this won't be for long. Well, if you can stay as long as you want, my love. I really missed you. I agree.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 4.02, + "text": " Ella, my glorious love.", + "tokens": [ + 50364, + 29261, + 11, + 452, + 24026, + 959, + 13, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.477, + "words": [ + { + "text": "Ella,", + "start": 0.0, + "end": 1.68, + "confidence": 0.128 + }, + { + "text": "my", + "start": 1.92, + "end": 2.1, + "confidence": 0.733 + }, + { + "text": "glorious", + "start": 2.1, + "end": 3.36, + "confidence": 0.625 + }, + { + "text": "love.", + "start": 3.36, + "end": 4.02, + "confidence": 0.883 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 5.46, + "end": 6.24, + "text": " How are you?", + "tokens": [ + 50564, + 1012, + 366, + 291, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.682, + "words": [ + { + "text": "How", + "start": 5.46, + "end": 5.62, + "confidence": 0.454 + }, + { + "text": "are", + "start": 5.62, + "end": 6.02, + "confidence": 0.893 + }, + { + "text": "you?", + "start": 6.02, + "end": 6.24, + "confidence": 0.783 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 6.32, + "end": 9.12, + "text": " Oh, I'm okay. I will be.", + "tokens": [ + 50664, + 876, + 11, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.73, + "words": [ + { + "text": "Oh,", + "start": 6.32, + "end": 6.66, + "confidence": 0.479 + }, + { + "text": "I'm", + "start": 6.74, + "end": 7.02, + "confidence": 0.971 + }, + { + "text": "okay.", + "start": 7.02, + "end": 7.7, + "confidence": 0.476 + }, + { + "text": "I", + "start": 8.14, + "end": 8.56, + "confidence": 0.713 + }, + { + "text": "will", + "start": 8.56, + "end": 8.82, + "confidence": 0.795 + }, + { + "text": "be.", + "start": 8.82, + "end": 9.12, + "confidence": 0.906 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 9.32, + "end": 11.42, + "text": " I said she could stay with us tomorrow, just until she feels better.", + "tokens": [ + 50814, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 11, + 445, + 1826, + 750, + 3417, + 1101, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.723, + "words": [ + { + "text": "I", + "start": 9.32, + "end": 9.48, + "confidence": 0.878 + }, + { + "text": "said", + "start": 9.48, + "end": 9.62, + "confidence": 0.862 + }, + { + "text": "she", + "start": 9.62, + "end": 9.76, + "confidence": 0.762 + }, + { + "text": "could", + "start": 9.76, + "end": 9.88, + "confidence": 0.801 + }, + { + "text": "stay", + "start": 9.88, + "end": 10.08, + "confidence": 0.921 + }, + { + "text": "with", + "start": 10.08, + "end": 10.22, + "confidence": 0.812 + }, + { + "text": "us", + "start": 10.22, + "end": 10.36, + "confidence": 0.848 + }, + { + "text": "tomorrow,", + "start": 10.36, + "end": 10.56, + "confidence": 0.515 + }, + { + "text": "just", + "start": 10.74, + "end": 10.76, + "confidence": 0.433 + }, + { + "text": "until", + "start": 10.76, + "end": 10.86, + "confidence": 0.439 + }, + { + "text": "she", + "start": 10.86, + "end": 10.98, + "confidence": 0.809 + }, + { + "text": "feels", + "start": 10.98, + "end": 11.16, + "confidence": 0.777 + }, + { + "text": "better.", + "start": 11.16, + "end": 11.42, + "confidence": 0.822 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 11.9, + "end": 13.06, + "text": " Of course she can.", + "tokens": [ + 50914, + 2720, + 1164, + 750, + 393, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.761, + "words": [ + { + "text": "Of", + "start": 11.9, + "end": 12.16, + "confidence": 0.562 + }, + { + "text": "course", + "start": 12.16, + "end": 12.54, + "confidence": 0.875 + }, + { + "text": "she", + "start": 12.54, + "end": 12.8, + "confidence": 0.758 + }, + { + "text": "can.", + "start": 12.8, + "end": 13.06, + "confidence": 0.899 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.7, + "end": 15.2, + "text": " No, this won't be for long.", + "tokens": [ + 51014, + 883, + 11, + 341, + 1582, + 380, + 312, + 337, + 938, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.774, + "words": [ + { + "text": "No,", + "start": 13.7, + "end": 13.72, + "confidence": 0.381 + }, + { + "text": "this", + "start": 13.92, + "end": 14.26, + "confidence": 0.702 + }, + { + "text": "won't", + "start": 14.26, + "end": 14.54, + "confidence": 0.943 + }, + { + "text": "be", + "start": 14.54, + "end": 14.68, + "confidence": 0.898 + }, + { + "text": "for", + "start": 14.68, + "end": 14.86, + "confidence": 0.891 + }, + { + "text": "long.", + "start": 14.86, + "end": 15.2, + "confidence": 0.877 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 15.3, + "end": 17.48, + "text": " Well, if you can stay as long as you want, my love.", + "tokens": [ + 51114, + 1042, + 11, + 498, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.836, + "words": [ + { + "text": "Well,", + "start": 15.3, + "end": 15.46, + "confidence": 0.647 + }, + { + "text": "if", + "start": 15.52, + "end": 15.6, + "confidence": 0.615 + }, + { + "text": "you", + "start": 15.6, + "end": 15.68, + "confidence": 0.896 + }, + { + "text": "can", + "start": 15.68, + "end": 15.82, + "confidence": 0.851 + }, + { + "text": "stay", + "start": 15.82, + "end": 16.04, + "confidence": 0.924 + }, + { + "text": "as", + "start": 16.04, + "end": 16.2, + "confidence": 0.885 + }, + { + "text": "long", + "start": 16.2, + "end": 16.32, + "confidence": 0.883 + }, + { + "text": "as", + "start": 16.32, + "end": 16.48, + "confidence": 0.907 + }, + { + "text": "you", + "start": 16.48, + "end": 16.58, + "confidence": 0.899 + }, + { + "text": "want,", + "start": 16.58, + "end": 16.76, + "confidence": 0.81 + }, + { + "text": "my", + "start": 16.9, + "end": 17.02, + "confidence": 0.901 + }, + { + "text": "love.", + "start": 17.02, + "end": 17.48, + "confidence": 0.896 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 17.66, + "end": 18.96, + "text": " I really missed you.", + "tokens": [ + 51214, + 286, + 534, + 6721, + 291, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.479, + "words": [ + { + "text": "I", + "start": 17.66, + "end": 17.86, + "confidence": 0.252 + }, + { + "text": "really", + "start": 17.86, + "end": 18.22, + "confidence": 0.447 + }, + { + "text": "missed", + "start": 18.22, + "end": 18.7, + "confidence": 0.653 + }, + { + "text": "you.", + "start": 18.7, + "end": 18.96, + "confidence": 0.717 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 19.58, + "end": 20.6, + "text": " I agree.", + "tokens": [ + 51314, + 286, + 3986, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.36097323239504636, + "compression_ratio": 1.446927374301676, + "no_speech_prob": 0.244294673204422, + "confidence": 0.421, + "words": [ + { + "text": "I", + "start": 19.58, + "end": 20.4, + "confidence": 0.403 + }, + { + "text": "agree.", + "start": 20.4, + "end": 20.6, + "confidence": 0.439 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/large_apollo11.mp3.words.json b/tests/expected/corner_cases/large_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..422f5ca3cb4cf37d77e463c35928d42c49ae8b73 --- /dev/null +++ b/tests/expected/corner_cases/large_apollo11.mp3.words.json @@ -0,0 +1,1861 @@ +{ + "text": " Apollo 11, Houston. We got a recommendation for you on your DOJ's E-A limb, E-G-E-A's, over. Go ahead. Okay, we'd like to have, say, a selected one or two on the helmet. We're going to have an B1. And you could put the other one on Mike's helmet, we're still seeing the bleepers. Over. I got a selected one on the helmet. I got a selected one on the helmet and B1. The other one might go under bleep restraint. We've got them in their helmet bags. And I guess, excuse me, yeah, in the helmet bag. In the leavers, in the leavers bag. Roger. Roger. I'm taking them to the auto. Yeah, we're taking them to the auto. You're good. You want to hang me on this, Charlie. With the cover. I tried it already. Okay, fine. We weren't sure that this was a suggestion. We thought you could check it out. How much we've already done that. So I guess whatever you come up with, just let us know. Okay, no problem. No problem. We'll let you know by the end of the session. Okay.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.36, + "end": 6.96, + "text": " Apollo 11, Houston. We got a recommendation for you on your DOJ's E-A limb, E-G-E-A's, over.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 13, + 492, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 10699, + 41, + 311, + 462, + 12, + 32, + 30390, + 11, + 462, + 12, + 38, + 12, + 36, + 12, + 32, + 311, + 11, + 670, + 13, + 50714 + ], + "temperature": 0.4, + "avg_logprob": -0.6034294282547151, + "compression_ratio": 1.425, + "no_speech_prob": 0.3341304659843445, + "confidence": 0.595, + "words": [ + { + "text": "Apollo", + "start": 0.36, + "end": 0.92, + "confidence": 0.79 + }, + { + "text": "11,", + "start": 0.92, + "end": 1.32, + "confidence": 0.877 + }, + { + "text": "Houston.", + "start": 1.5, + "end": 1.74, + "confidence": 0.827 + }, + { + "text": "We", + "start": 1.9, + "end": 1.94, + "confidence": 0.836 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.639 + }, + { + "text": "a", + "start": 2.1, + "end": 2.3, + "confidence": 0.995 + }, + { + "text": "recommendation", + "start": 2.3, + "end": 3.08, + "confidence": 0.709 + }, + { + "text": "for", + "start": 3.08, + "end": 3.46, + "confidence": 0.856 + }, + { + "text": "you", + "start": 3.46, + "end": 3.62, + "confidence": 0.859 + }, + { + "text": "on", + "start": 3.62, + "end": 3.76, + "confidence": 0.87 + }, + { + "text": "your", + "start": 3.76, + "end": 4.06, + "confidence": 0.767 + }, + { + "text": "DOJ's", + "start": 4.06, + "end": 4.88, + "confidence": 0.163 + }, + { + "text": "E-A", + "start": 4.88, + "end": 5.3, + "confidence": 0.408 + }, + { + "text": "limb,", + "start": 5.3, + "end": 5.62, + "confidence": 0.264 + }, + { + "text": "E-G-E-A's,", + "start": 5.76, + "end": 6.8, + "confidence": 0.768 + }, + { + "text": "over.", + "start": 6.82, + "end": 6.96, + "confidence": 0.817 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.74, + "end": 11.14, + "text": " Go ahead.", + "tokens": [ + 50864, + 1037, + 2286, + 13, + 50914 + ], + "temperature": 0.4, + "avg_logprob": -0.6034294282547151, + "compression_ratio": 1.425, + "no_speech_prob": 0.3341304659843445, + "confidence": 0.85, + "words": [ + { + "text": "Go", + "start": 10.74, + "end": 10.98, + "confidence": 0.842 + }, + { + "text": "ahead.", + "start": 10.98, + "end": 11.14, + "confidence": 0.858 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 11.88, + "end": 17.3, + "text": " Okay, we'd like to have, say, a selected one or two on the helmet.", + "tokens": [ + 50914, + 1033, + 11, + 321, + 1116, + 411, + 281, + 362, + 11, + 584, + 11, + 257, + 8209, + 472, + 420, + 732, + 322, + 264, + 15922, + 13, + 51214 + ], + "temperature": 0.4, + "avg_logprob": -0.6034294282547151, + "compression_ratio": 1.425, + "no_speech_prob": 0.3341304659843445, + "confidence": 0.428, + "words": [ + { + "text": "Okay,", + "start": 11.88, + "end": 12.28, + "confidence": 0.507 + }, + { + "text": "we'd", + "start": 12.5, + "end": 13.16, + "confidence": 0.623 + }, + { + "text": "like", + "start": 13.16, + "end": 13.32, + "confidence": 0.876 + }, + { + "text": "to", + "start": 13.32, + "end": 13.56, + "confidence": 0.737 + }, + { + "text": "have,", + "start": 13.56, + "end": 14.24, + "confidence": 0.124 + }, + { + "text": "say,", + "start": 14.74, + "end": 15.0, + "confidence": 0.153 + }, + { + "text": "a", + "start": 15.1, + "end": 15.16, + "confidence": 0.061 + }, + { + "text": "selected", + "start": 15.16, + "end": 15.76, + "confidence": 0.143 + }, + { + "text": "one", + "start": 15.76, + "end": 16.08, + "confidence": 0.754 + }, + { + "text": "or", + "start": 16.08, + "end": 16.22, + "confidence": 0.67 + }, + { + "text": "two", + "start": 16.22, + "end": 16.36, + "confidence": 0.6 + }, + { + "text": "on", + "start": 16.36, + "end": 16.6, + "confidence": 0.875 + }, + { + "text": "the", + "start": 16.6, + "end": 16.82, + "confidence": 0.809 + }, + { + "text": "helmet.", + "start": 16.82, + "end": 17.3, + "confidence": 0.651 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 17.32, + "end": 19.12, + "text": " We're going to have an B1.", + "tokens": [ + 51214, + 492, + 434, + 516, + 281, + 362, + 364, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.4, + "avg_logprob": -0.6034294282547151, + "compression_ratio": 1.425, + "no_speech_prob": 0.3341304659843445, + "confidence": 0.545, + "words": [ + { + "text": "We're", + "start": 17.32, + "end": 17.78, + "confidence": 0.7 + }, + { + "text": "going", + "start": 17.78, + "end": 17.94, + "confidence": 0.505 + }, + { + "text": "to", + "start": 17.94, + "end": 18.06, + "confidence": 0.899 + }, + { + "text": "have", + "start": 18.06, + "end": 18.22, + "confidence": 0.848 + }, + { + "text": "an", + "start": 18.22, + "end": 18.4, + "confidence": 0.264 + }, + { + "text": "B1.", + "start": 18.4, + "end": 19.12, + "confidence": 0.395 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 20.08, + "end": 24.32, + "text": " And you could put the other one on Mike's helmet, we're still seeing the bleepers.", + "tokens": [ + 51314, + 400, + 291, + 727, + 829, + 264, + 661, + 472, + 322, + 6602, + 311, + 15922, + 11, + 321, + 434, + 920, + 2577, + 264, + 5408, + 595, + 433, + 13, + 51564 + ], + "temperature": 0.4, + "avg_logprob": -0.6034294282547151, + "compression_ratio": 1.425, + "no_speech_prob": 0.3341304659843445, + "confidence": 0.528, + "words": [ + { + "text": "And", + "start": 20.08, + "end": 20.22, + "confidence": 0.796 + }, + { + "text": "you", + "start": 20.22, + "end": 20.34, + "confidence": 0.885 + }, + { + "text": "could", + "start": 20.34, + "end": 20.48, + "confidence": 0.415 + }, + { + "text": "put", + "start": 20.48, + "end": 20.66, + "confidence": 0.894 + }, + { + "text": "the", + "start": 20.66, + "end": 20.84, + "confidence": 0.813 + }, + { + "text": "other", + "start": 20.84, + "end": 21.02, + "confidence": 0.774 + }, + { + "text": "one", + "start": 21.02, + "end": 21.22, + "confidence": 0.779 + }, + { + "text": "on", + "start": 21.22, + "end": 21.46, + "confidence": 0.906 + }, + { + "text": "Mike's", + "start": 21.46, + "end": 22.72, + "confidence": 0.831 + }, + { + "text": "helmet,", + "start": 22.72, + "end": 22.86, + "confidence": 0.791 + }, + { + "text": "we're", + "start": 23.06, + "end": 23.14, + "confidence": 0.354 + }, + { + "text": "still", + "start": 23.14, + "end": 23.34, + "confidence": 0.792 + }, + { + "text": "seeing", + "start": 23.34, + "end": 23.64, + "confidence": 0.728 + }, + { + "text": "the", + "start": 23.64, + "end": 23.88, + "confidence": 0.099 + }, + { + "text": "bleepers.", + "start": 23.88, + "end": 24.32, + "confidence": 0.228 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 24.86, + "end": 25.1, + "text": " Over.", + "tokens": [ + 51564, + 4886, + 13, + 51614 + ], + "temperature": 0.4, + "avg_logprob": -0.6034294282547151, + "compression_ratio": 1.425, + "no_speech_prob": 0.3341304659843445, + "confidence": 0.616, + "words": [ + { + "text": "Over.", + "start": 24.86, + "end": 25.1, + "confidence": 0.616 + } + ] + }, + { + "id": 6, + "seek": 3000, + "start": 31.32, + "end": 32.72, + "text": " I got a selected one on the helmet.", + "tokens": [ + 50414, + 286, + 658, + 257, + 8209, + 472, + 322, + 264, + 15922, + 13, + 50514 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.456, + "words": [ + { + "text": "I", + "start": 31.32, + "end": 31.62, + "confidence": 0.533 + }, + { + "text": "got", + "start": 31.62, + "end": 31.9, + "confidence": 0.474 + }, + { + "text": "a", + "start": 31.9, + "end": 32.16, + "confidence": 0.31 + }, + { + "text": "selected", + "start": 32.16, + "end": 32.28, + "confidence": 0.203 + }, + { + "text": "one", + "start": 32.28, + "end": 32.54, + "confidence": 0.558 + }, + { + "text": "on", + "start": 32.54, + "end": 32.68, + "confidence": 0.559 + }, + { + "text": "the", + "start": 32.68, + "end": 32.7, + "confidence": 0.469 + }, + { + "text": "helmet.", + "start": 32.7, + "end": 32.72, + "confidence": 0.805 + } + ] + }, + { + "id": 7, + "seek": 3000, + "start": 32.72, + "end": 35.04, + "text": " I got a selected one on the helmet and B1.", + "tokens": [ + 50514, + 286, + 658, + 257, + 8209, + 472, + 322, + 264, + 15922, + 293, + 363, + 16, + 13, + 50614 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.718, + "words": [ + { + "text": "I", + "start": 32.72, + "end": 32.9, + "confidence": 0.86 + }, + { + "text": "got", + "start": 32.9, + "end": 33.2, + "confidence": 0.85 + }, + { + "text": "a", + "start": 33.2, + "end": 33.38, + "confidence": 0.866 + }, + { + "text": "selected", + "start": 33.38, + "end": 33.68, + "confidence": 0.814 + }, + { + "text": "one", + "start": 33.68, + "end": 33.9, + "confidence": 0.76 + }, + { + "text": "on", + "start": 33.9, + "end": 34.0, + "confidence": 0.866 + }, + { + "text": "the", + "start": 34.0, + "end": 34.02, + "confidence": 0.628 + }, + { + "text": "helmet", + "start": 34.02, + "end": 34.22, + "confidence": 0.812 + }, + { + "text": "and", + "start": 34.22, + "end": 34.46, + "confidence": 0.221 + }, + { + "text": "B1.", + "start": 34.46, + "end": 35.04, + "confidence": 0.827 + } + ] + }, + { + "id": 8, + "seek": 3000, + "start": 35.06, + "end": 39.24, + "text": " The other one might go under bleep restraint.", + "tokens": [ + 50614, + 440, + 661, + 472, + 1062, + 352, + 833, + 5408, + 595, + 49281, + 13, + 50864 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.634, + "words": [ + { + "text": "The", + "start": 35.06, + "end": 35.44, + "confidence": 0.513 + }, + { + "text": "other", + "start": 35.44, + "end": 35.6, + "confidence": 0.79 + }, + { + "text": "one", + "start": 35.6, + "end": 35.84, + "confidence": 0.785 + }, + { + "text": "might", + "start": 35.84, + "end": 36.1, + "confidence": 0.649 + }, + { + "text": "go", + "start": 36.1, + "end": 38.08, + "confidence": 0.681 + }, + { + "text": "under", + "start": 38.08, + "end": 38.34, + "confidence": 0.671 + }, + { + "text": "bleep", + "start": 38.34, + "end": 38.82, + "confidence": 0.547 + }, + { + "text": "restraint.", + "start": 38.82, + "end": 39.24, + "confidence": 0.584 + } + ] + }, + { + "id": 9, + "seek": 3000, + "start": 39.84, + "end": 42.06, + "text": " We've got them in their helmet bags.", + "tokens": [ + 50864, + 492, + 600, + 658, + 552, + 294, + 641, + 15922, + 10405, + 13, + 51014 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.667, + "words": [ + { + "text": "We've", + "start": 39.84, + "end": 40.38, + "confidence": 0.625 + }, + { + "text": "got", + "start": 40.38, + "end": 40.52, + "confidence": 0.849 + }, + { + "text": "them", + "start": 40.52, + "end": 40.64, + "confidence": 0.639 + }, + { + "text": "in", + "start": 40.64, + "end": 40.8, + "confidence": 0.857 + }, + { + "text": "their", + "start": 40.8, + "end": 41.08, + "confidence": 0.702 + }, + { + "text": "helmet", + "start": 41.08, + "end": 41.58, + "confidence": 0.721 + }, + { + "text": "bags.", + "start": 41.58, + "end": 42.06, + "confidence": 0.424 + } + ] + }, + { + "id": 10, + "seek": 3000, + "start": 42.5, + "end": 47.42, + "text": " And I guess, excuse me, yeah, in the helmet bag.", + "tokens": [ + 51014, + 400, + 286, + 2041, + 11, + 8960, + 385, + 11, + 1338, + 11, + 294, + 264, + 15922, + 3411, + 13, + 51264 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.395, + "words": [ + { + "text": "And", + "start": 42.5, + "end": 43.5, + "confidence": 0.794 + }, + { + "text": "I", + "start": 43.5, + "end": 44.06, + "confidence": 0.415 + }, + { + "text": "guess,", + "start": 44.06, + "end": 44.24, + "confidence": 0.229 + }, + { + "text": "excuse", + "start": 44.44, + "end": 45.06, + "confidence": 0.577 + }, + { + "text": "me,", + "start": 45.06, + "end": 45.3, + "confidence": 0.849 + }, + { + "text": "yeah,", + "start": 45.54, + "end": 45.56, + "confidence": 0.074 + }, + { + "text": "in", + "start": 46.18, + "end": 46.74, + "confidence": 0.2 + }, + { + "text": "the", + "start": 46.74, + "end": 46.88, + "confidence": 0.706 + }, + { + "text": "helmet", + "start": 46.88, + "end": 47.08, + "confidence": 0.809 + }, + { + "text": "bag.", + "start": 47.08, + "end": 47.42, + "confidence": 0.296 + } + ] + }, + { + "id": 11, + "seek": 3000, + "start": 48.18, + "end": 49.58, + "text": " In the leavers, in the leavers bag.", + "tokens": [ + 51264, + 682, + 264, + 476, + 64, + 840, + 11, + 294, + 264, + 476, + 64, + 840, + 3411, + 13, + 51364 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.45, + "words": [ + { + "text": "In", + "start": 48.18, + "end": 48.4, + "confidence": 0.121 + }, + { + "text": "the", + "start": 48.4, + "end": 48.48, + "confidence": 0.774 + }, + { + "text": "leavers,", + "start": 48.48, + "end": 48.78, + "confidence": 0.241 + }, + { + "text": "in", + "start": 48.86, + "end": 48.98, + "confidence": 0.777 + }, + { + "text": "the", + "start": 48.98, + "end": 49.1, + "confidence": 0.817 + }, + { + "text": "leavers", + "start": 49.1, + "end": 49.32, + "confidence": 0.661 + }, + { + "text": "bag.", + "start": 49.32, + "end": 49.58, + "confidence": 0.632 + } + ] + }, + { + "id": 12, + "seek": 3000, + "start": 50.06, + "end": 50.5, + "text": " Roger.", + "tokens": [ + 51364, + 17666, + 13, + 51414 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.564, + "words": [ + { + "text": "Roger.", + "start": 50.06, + "end": 50.5, + "confidence": 0.564 + } + ] + }, + { + "id": 13, + "seek": 3000, + "start": 51.4, + "end": 52.92, + "text": " Roger. I'm taking them to the auto.", + "tokens": [ + 51414, + 17666, + 13, + 286, + 478, + 1940, + 552, + 281, + 264, + 257, + 8262, + 13, + 51514 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.328, + "words": [ + { + "text": "Roger.", + "start": 51.4, + "end": 51.84, + "confidence": 0.698 + }, + { + "text": "I'm", + "start": 52.02, + "end": 52.1, + "confidence": 0.656 + }, + { + "text": "taking", + "start": 52.1, + "end": 52.28, + "confidence": 0.538 + }, + { + "text": "them", + "start": 52.28, + "end": 52.5, + "confidence": 0.071 + }, + { + "text": "to", + "start": 52.5, + "end": 52.62, + "confidence": 0.129 + }, + { + "text": "the", + "start": 52.62, + "end": 52.7, + "confidence": 0.577 + }, + { + "text": "auto.", + "start": 52.7, + "end": 52.92, + "confidence": 0.226 + } + ] + }, + { + "id": 14, + "seek": 3000, + "start": 53.1, + "end": 54.26, + "text": " Yeah, we're taking them to the auto.", + "tokens": [ + 51514, + 865, + 11, + 321, + 434, + 1940, + 552, + 281, + 264, + 8399, + 13, + 51614 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.738, + "words": [ + { + "text": "Yeah,", + "start": 53.1, + "end": 53.26, + "confidence": 0.831 + }, + { + "text": "we're", + "start": 53.36, + "end": 53.46, + "confidence": 0.697 + }, + { + "text": "taking", + "start": 53.46, + "end": 53.64, + "confidence": 0.792 + }, + { + "text": "them", + "start": 53.64, + "end": 53.82, + "confidence": 0.733 + }, + { + "text": "to", + "start": 53.82, + "end": 53.98, + "confidence": 0.867 + }, + { + "text": "the", + "start": 53.98, + "end": 54.1, + "confidence": 0.815 + }, + { + "text": "auto.", + "start": 54.1, + "end": 54.26, + "confidence": 0.53 + } + ] + }, + { + "id": 15, + "seek": 3000, + "start": 54.5, + "end": 55.3, + "text": " You're good.", + "tokens": [ + 51614, + 509, + 434, + 665, + 13, + 51664 + ], + "temperature": 0.4, + "avg_logprob": -0.6114618367162244, + "compression_ratio": 1.8423913043478262, + "no_speech_prob": 0.1046256273984909, + "confidence": 0.092, + "words": [ + { + "text": "You're", + "start": 54.5, + "end": 55.16, + "confidence": 0.073 + }, + { + "text": "good.", + "start": 55.16, + "end": 55.3, + "confidence": 0.147 + } + ] + }, + { + "id": 16, + "seek": 5600, + "start": 56.4, + "end": 59.9, + "text": " You want to hang me on this, Charlie.", + "tokens": [ + 50364, + 509, + 528, + 281, + 3967, + 385, + 322, + 341, + 11, + 13754, + 13, + 50564 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.472, + "words": [ + { + "text": "You", + "start": 56.4, + "end": 56.78, + "confidence": 0.12 + }, + { + "text": "want", + "start": 56.78, + "end": 57.16, + "confidence": 0.432 + }, + { + "text": "to", + "start": 57.16, + "end": 57.24, + "confidence": 0.809 + }, + { + "text": "hang", + "start": 57.24, + "end": 57.42, + "confidence": 0.544 + }, + { + "text": "me", + "start": 57.42, + "end": 57.6, + "confidence": 0.866 + }, + { + "text": "on", + "start": 57.6, + "end": 59.12, + "confidence": 0.849 + }, + { + "text": "this,", + "start": 59.12, + "end": 59.52, + "confidence": 0.236 + }, + { + "text": "Charlie.", + "start": 59.72, + "end": 59.9, + "confidence": 0.622 + } + ] + }, + { + "id": 17, + "seek": 5600, + "start": 59.92, + "end": 61.86, + "text": " With the cover. I tried it already.", + "tokens": [ + 50564, + 2022, + 264, + 2060, + 13, + 286, + 3031, + 309, + 1217, + 13, + 50664 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.695, + "words": [ + { + "text": "With", + "start": 59.92, + "end": 60.64, + "confidence": 0.743 + }, + { + "text": "the", + "start": 60.64, + "end": 60.78, + "confidence": 0.316 + }, + { + "text": "cover.", + "start": 60.78, + "end": 61.06, + "confidence": 0.854 + }, + { + "text": "I", + "start": 61.16, + "end": 61.24, + "confidence": 0.939 + }, + { + "text": "tried", + "start": 61.24, + "end": 61.5, + "confidence": 0.603 + }, + { + "text": "it", + "start": 61.5, + "end": 61.64, + "confidence": 0.87 + }, + { + "text": "already.", + "start": 61.64, + "end": 61.86, + "confidence": 0.796 + } + ] + }, + { + "id": 18, + "seek": 5600, + "start": 62.4, + "end": 65.16, + "text": " Okay, fine. We weren't sure that this was a suggestion.", + "tokens": [ + 50664, + 1033, + 11, + 2489, + 13, + 492, + 4999, + 380, + 988, + 300, + 341, + 390, + 257, + 16541, + 13, + 50814 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.693, + "words": [ + { + "text": "Okay,", + "start": 62.4, + "end": 62.82, + "confidence": 0.789 + }, + { + "text": "fine.", + "start": 62.96, + "end": 63.1, + "confidence": 0.866 + }, + { + "text": "We", + "start": 63.14, + "end": 63.3, + "confidence": 0.724 + }, + { + "text": "weren't", + "start": 63.3, + "end": 63.56, + "confidence": 0.91 + }, + { + "text": "sure", + "start": 63.56, + "end": 63.74, + "confidence": 0.831 + }, + { + "text": "that", + "start": 63.74, + "end": 64.1, + "confidence": 0.253 + }, + { + "text": "this", + "start": 64.1, + "end": 64.58, + "confidence": 0.704 + }, + { + "text": "was", + "start": 64.58, + "end": 64.76, + "confidence": 0.799 + }, + { + "text": "a", + "start": 64.76, + "end": 64.88, + "confidence": 0.791 + }, + { + "text": "suggestion.", + "start": 64.88, + "end": 65.16, + "confidence": 0.464 + } + ] + }, + { + "id": 19, + "seek": 5600, + "start": 65.18, + "end": 67.82, + "text": " We thought you could check it out.", + "tokens": [ + 50814, + 492, + 1194, + 291, + 727, + 1520, + 309, + 484, + 13, + 50964 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.761, + "words": [ + { + "text": "We", + "start": 65.18, + "end": 65.4, + "confidence": 0.881 + }, + { + "text": "thought", + "start": 65.4, + "end": 65.56, + "confidence": 0.819 + }, + { + "text": "you", + "start": 65.56, + "end": 67.02, + "confidence": 0.465 + }, + { + "text": "could", + "start": 67.02, + "end": 67.22, + "confidence": 0.802 + }, + { + "text": "check", + "start": 67.22, + "end": 67.44, + "confidence": 0.754 + }, + { + "text": "it", + "start": 67.44, + "end": 67.58, + "confidence": 0.898 + }, + { + "text": "out.", + "start": 67.58, + "end": 67.82, + "confidence": 0.815 + } + ] + }, + { + "id": 20, + "seek": 5600, + "start": 68.18, + "end": 69.32, + "text": " How much we've already done that.", + "tokens": [ + 50964, + 1012, + 709, + 321, + 600, + 1217, + 1096, + 300, + 13, + 51064 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.484, + "words": [ + { + "text": "How", + "start": 68.18, + "end": 68.44, + "confidence": 0.2 + }, + { + "text": "much", + "start": 68.44, + "end": 68.6, + "confidence": 0.783 + }, + { + "text": "we've", + "start": 68.6, + "end": 68.76, + "confidence": 0.271 + }, + { + "text": "already", + "start": 68.76, + "end": 68.9, + "confidence": 0.804 + }, + { + "text": "done", + "start": 68.9, + "end": 69.1, + "confidence": 0.777 + }, + { + "text": "that.", + "start": 69.1, + "end": 69.32, + "confidence": 0.863 + } + ] + }, + { + "id": 21, + "seek": 5600, + "start": 69.5, + "end": 72.44, + "text": " So I guess whatever you come up with, just let us know.", + "tokens": [ + 51064, + 407, + 286, + 2041, + 2035, + 291, + 808, + 493, + 365, + 11, + 445, + 718, + 505, + 458, + 13, + 51314 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.78, + "words": [ + { + "text": "So", + "start": 69.5, + "end": 69.7, + "confidence": 0.694 + }, + { + "text": "I", + "start": 69.7, + "end": 70.5, + "confidence": 0.697 + }, + { + "text": "guess", + "start": 70.5, + "end": 70.7, + "confidence": 0.913 + }, + { + "text": "whatever", + "start": 70.7, + "end": 71.12, + "confidence": 0.501 + }, + { + "text": "you", + "start": 71.12, + "end": 71.26, + "confidence": 0.888 + }, + { + "text": "come", + "start": 71.26, + "end": 71.42, + "confidence": 0.643 + }, + { + "text": "up", + "start": 71.42, + "end": 71.62, + "confidence": 0.895 + }, + { + "text": "with,", + "start": 71.62, + "end": 71.8, + "confidence": 0.814 + }, + { + "text": "just", + "start": 71.86, + "end": 71.98, + "confidence": 0.839 + }, + { + "text": "let", + "start": 71.98, + "end": 72.12, + "confidence": 0.903 + }, + { + "text": "us", + "start": 72.12, + "end": 72.26, + "confidence": 0.9 + }, + { + "text": "know.", + "start": 72.26, + "end": 72.44, + "confidence": 0.802 + } + ] + }, + { + "id": 22, + "seek": 5600, + "start": 74.5, + "end": 75.22, + "text": " Okay, no problem.", + "tokens": [ + 51314, + 1033, + 11, + 572, + 1154, + 13, + 51364 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.681, + "words": [ + { + "text": "Okay,", + "start": 74.5, + "end": 74.52, + "confidence": 0.667 + }, + { + "text": "no", + "start": 74.6, + "end": 74.82, + "confidence": 0.605 + }, + { + "text": "problem.", + "start": 74.82, + "end": 75.22, + "confidence": 0.783 + } + ] + }, + { + "id": 23, + "seek": 5600, + "start": 75.58, + "end": 76.52, + "text": " No problem.", + "tokens": [ + 51364, + 883, + 1154, + 13, + 51414 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.417, + "words": [ + { + "text": "No", + "start": 75.58, + "end": 76.16, + "confidence": 0.233 + }, + { + "text": "problem.", + "start": 76.16, + "end": 76.52, + "confidence": 0.745 + } + ] + }, + { + "id": 24, + "seek": 5600, + "start": 76.52, + "end": 78.34, + "text": " We'll let you know by the end of the session.", + "tokens": [ + 51414, + 492, + 603, + 718, + 291, + 458, + 538, + 264, + 917, + 295, + 264, + 5481, + 13, + 51814 + ], + "temperature": 0.4, + "avg_logprob": -0.42095250990784283, + "compression_ratio": 1.5207373271889402, + "no_speech_prob": 0.4443783462047577, + "confidence": 0.565, + "words": [ + { + "text": "We'll", + "start": 76.52, + "end": 76.8, + "confidence": 0.74 + }, + { + "text": "let", + "start": 76.8, + "end": 76.96, + "confidence": 0.879 + }, + { + "text": "you", + "start": 76.96, + "end": 77.04, + "confidence": 0.814 + }, + { + "text": "know", + "start": 77.04, + "end": 77.18, + "confidence": 0.821 + }, + { + "text": "by", + "start": 77.18, + "end": 77.32, + "confidence": 0.091 + }, + { + "text": "the", + "start": 77.32, + "end": 77.4, + "confidence": 0.81 + }, + { + "text": "end", + "start": 77.4, + "end": 77.54, + "confidence": 0.818 + }, + { + "text": "of", + "start": 77.54, + "end": 77.68, + "confidence": 0.863 + }, + { + "text": "the", + "start": 77.68, + "end": 77.7, + "confidence": 0.508 + }, + { + "text": "session.", + "start": 77.7, + "end": 78.34, + "confidence": 0.219 + } + ] + }, + { + "id": 25, + "seek": 8500, + "start": 85.18, + "end": 85.58, + "text": " Okay.", + "tokens": [ + 50364, + 1033, + 13, + 50564 + ], + "temperature": 0.4, + "avg_logprob": -0.5724664211273194, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.9290614128112793, + "confidence": 0.316, + "words": [ + { + "text": "Okay.", + "start": 85.18, + "end": 85.58, + "confidence": 0.316 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/medium.accurate_gloria.mp3.words.json b/tests/expected/corner_cases/medium.accurate_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f0126a2a88cdf7bb4d8e71e5726337469a41e3ca --- /dev/null +++ b/tests/expected/corner_cases/medium.accurate_gloria.mp3.words.json @@ -0,0 +1,544 @@ +{ + "text": " Hello. How are you? How are you? Oh, I'm okay. I will be. I said she could stay with us tomorrow until she feels better. Of course she can. This won't be for long. Well, you can stay as long as you want, my love. I really miss you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.32, + "end": 1.82, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.189, + "words": [ + { + "text": "Hello.", + "start": 1.32, + "end": 1.82, + "confidence": 0.189 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.94, + "end": 3.54, + "text": " How are you?", + "tokens": [ + 50464, + 1012, + 366, + 291, + 30, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.157, + "words": [ + { + "text": "How", + "start": 1.94, + "end": 2.6, + "confidence": 0.004 + }, + { + "text": "are", + "start": 2.6, + "end": 3.1, + "confidence": 0.238 + }, + { + "text": "you?", + "start": 3.1, + "end": 3.54, + "confidence": 0.809 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.7, + "end": 6.38, + "text": " How are you?", + "tokens": [ + 50564, + 1012, + 366, + 291, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.353, + "words": [ + { + "text": "How", + "start": 3.7, + "end": 4.3, + "confidence": 0.021 + }, + { + "text": "are", + "start": 4.3, + "end": 6.12, + "confidence": 0.93 + }, + { + "text": "you?", + "start": 6.12, + "end": 6.38, + "confidence": 0.897 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 6.38, + "end": 7.74, + "text": " Oh, I'm okay.", + "tokens": [ + 50664, + 876, + 11, + 286, + 478, + 1392, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.63, + "words": [ + { + "text": "Oh,", + "start": 6.38, + "end": 6.66, + "confidence": 0.37 + }, + { + "text": "I'm", + "start": 6.76, + "end": 7.18, + "confidence": 0.979 + }, + { + "text": "okay.", + "start": 7.18, + "end": 7.74, + "confidence": 0.692 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 7.74, + "end": 9.2, + "text": " I will be.", + "tokens": [ + 50764, + 286, + 486, + 312, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.543, + "words": [ + { + "text": "I", + "start": 7.74, + "end": 8.64, + "confidence": 0.467 + }, + { + "text": "will", + "start": 8.64, + "end": 8.88, + "confidence": 0.582 + }, + { + "text": "be.", + "start": 8.88, + "end": 9.2, + "confidence": 0.565 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.5, + "end": 11.56, + "text": " I said she could stay with us tomorrow until she feels better.", + "tokens": [ + 50864, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 1826, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.494, + "words": [ + { + "text": "I", + "start": 9.5, + "end": 9.52, + "confidence": 0.283 + }, + { + "text": "said", + "start": 9.52, + "end": 9.64, + "confidence": 0.561 + }, + { + "text": "she", + "start": 9.64, + "end": 9.78, + "confidence": 0.919 + }, + { + "text": "could", + "start": 9.78, + "end": 9.98, + "confidence": 0.898 + }, + { + "text": "stay", + "start": 9.98, + "end": 10.16, + "confidence": 0.993 + }, + { + "text": "with", + "start": 10.16, + "end": 10.28, + "confidence": 0.991 + }, + { + "text": "us", + "start": 10.28, + "end": 10.46, + "confidence": 0.966 + }, + { + "text": "tomorrow", + "start": 10.46, + "end": 10.68, + "confidence": 0.004 + }, + { + "text": "until", + "start": 10.68, + "end": 10.84, + "confidence": 0.312 + }, + { + "text": "she", + "start": 10.84, + "end": 10.98, + "confidence": 0.997 + }, + { + "text": "feels", + "start": 10.98, + "end": 11.2, + "confidence": 0.988 + }, + { + "text": "better.", + "start": 11.2, + "end": 11.56, + "confidence": 0.825 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 11.56, + "end": 13.42, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.31, + "words": [ + { + "text": "Of", + "start": 11.56, + "end": 12.54, + "confidence": 0.09 + }, + { + "text": "course", + "start": 12.54, + "end": 12.72, + "confidence": 0.985 + }, + { + "text": "she", + "start": 12.72, + "end": 13.04, + "confidence": 0.208 + }, + { + "text": "can.", + "start": 13.04, + "end": 13.42, + "confidence": 0.392 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.54, + "end": 15.24, + "text": " This won't be for long.", + "tokens": [ + 51064, + 639, + 1582, + 380, + 312, + 337, + 938, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.444, + "words": [ + { + "text": "This", + "start": 13.54, + "end": 14.16, + "confidence": 0.007 + }, + { + "text": "won't", + "start": 14.16, + "end": 14.58, + "confidence": 0.926 + }, + { + "text": "be", + "start": 14.58, + "end": 14.8, + "confidence": 0.99 + }, + { + "text": "for", + "start": 14.8, + "end": 14.98, + "confidence": 0.978 + }, + { + "text": "long.", + "start": 14.98, + "end": 15.24, + "confidence": 0.764 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.5, + "end": 17.62, + "text": " Well, you can stay as long as you want, my love.", + "tokens": [ + 51164, + 1042, + 11, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.587, + "words": [ + { + "text": "Well,", + "start": 15.5, + "end": 15.52, + "confidence": 0.073 + }, + { + "text": "you", + "start": 15.52, + "end": 15.66, + "confidence": 0.491 + }, + { + "text": "can", + "start": 15.66, + "end": 15.88, + "confidence": 0.876 + }, + { + "text": "stay", + "start": 15.88, + "end": 16.12, + "confidence": 0.932 + }, + { + "text": "as", + "start": 16.12, + "end": 16.28, + "confidence": 0.979 + }, + { + "text": "long", + "start": 16.28, + "end": 16.4, + "confidence": 0.995 + }, + { + "text": "as", + "start": 16.4, + "end": 16.54, + "confidence": 0.995 + }, + { + "text": "you", + "start": 16.54, + "end": 16.62, + "confidence": 0.995 + }, + { + "text": "want,", + "start": 16.62, + "end": 16.8, + "confidence": 0.643 + }, + { + "text": "my", + "start": 16.98, + "end": 17.2, + "confidence": 0.958 + }, + { + "text": "love.", + "start": 17.2, + "end": 17.62, + "confidence": 0.835 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.62, + "end": 19.52, + "text": " I really miss you.", + "tokens": [ + 51264, + 286, + 534, + 1713, + 291, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.21400990205652573, + "compression_ratio": 1.4713375796178343, + "no_speech_prob": 0.050164904445409775, + "confidence": 0.477, + "words": [ + { + "text": "I", + "start": 17.62, + "end": 17.94, + "confidence": 0.495 + }, + { + "text": "really", + "start": 17.94, + "end": 18.3, + "confidence": 0.89 + }, + { + "text": "miss", + "start": 18.3, + "end": 18.78, + "confidence": 0.338 + }, + { + "text": "you.", + "start": 18.78, + "end": 19.52, + "confidence": 0.408 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/medium.efficient_gloria.mp3.words.json b/tests/expected/corner_cases/medium.efficient_gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..bf8edd2aeb5c62e31d63133e4b035fb81e3ac8af --- /dev/null +++ b/tests/expected/corner_cases/medium.efficient_gloria.mp3.words.json @@ -0,0 +1,538 @@ +{ + "text": " Hello. How are you? Love. How are you? I'm okay. I will be. I said she could stay with us tomorrow, she feels better. Of course she can. This won't be for long. Well, you can stay as long as you want, my love. I really miss you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.4, + "end": 1.74, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.285, + "words": [ + { + "text": "Hello.", + "start": 1.4, + "end": 1.74, + "confidence": 0.285 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 2.32, + "end": 3.5, + "text": " How are you?", + "tokens": [ + 50464, + 1012, + 366, + 291, + 30, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.786, + "words": [ + { + "text": "How", + "start": 2.32, + "end": 2.82, + "confidence": 0.551 + }, + { + "text": "are", + "start": 2.82, + "end": 3.24, + "confidence": 0.989 + }, + { + "text": "you?", + "start": 3.24, + "end": 3.5, + "confidence": 0.893 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.5, + "end": 4.26, + "text": " Love.", + "tokens": [ + 50564, + 5956, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.244, + "words": [ + { + "text": "Love.", + "start": 3.5, + "end": 4.26, + "confidence": 0.244 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 5.5, + "end": 6.38, + "text": " How are you?", + "tokens": [ + 50664, + 1012, + 366, + 291, + 30, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.926, + "words": [ + { + "text": "How", + "start": 5.5, + "end": 5.68, + "confidence": 0.851 + }, + { + "text": "are", + "start": 5.68, + "end": 6.08, + "confidence": 0.998 + }, + { + "text": "you?", + "start": 6.08, + "end": 6.38, + "confidence": 0.935 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 7.5, + "end": 9.18, + "text": " I'm okay. I will be.", + "tokens": [ + 50764, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.769, + "words": [ + { + "text": "I'm", + "start": 7.5, + "end": 7.52, + "confidence": 0.767 + }, + { + "text": "okay.", + "start": 7.52, + "end": 8.22, + "confidence": 0.488 + }, + { + "text": "I", + "start": 8.42, + "end": 8.64, + "confidence": 0.765 + }, + { + "text": "will", + "start": 8.64, + "end": 8.94, + "confidence": 0.949 + }, + { + "text": "be.", + "start": 8.94, + "end": 9.18, + "confidence": 0.997 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.5, + "end": 11.52, + "text": " I said she could stay with us tomorrow, she feels better.", + "tokens": [ + 50864, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 11, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.786, + "words": [ + { + "text": "I", + "start": 9.5, + "end": 9.52, + "confidence": 0.883 + }, + { + "text": "said", + "start": 9.52, + "end": 9.64, + "confidence": 0.856 + }, + { + "text": "she", + "start": 9.64, + "end": 9.78, + "confidence": 0.928 + }, + { + "text": "could", + "start": 9.78, + "end": 9.96, + "confidence": 0.909 + }, + { + "text": "stay", + "start": 9.96, + "end": 10.16, + "confidence": 0.994 + }, + { + "text": "with", + "start": 10.16, + "end": 10.28, + "confidence": 0.977 + }, + { + "text": "us", + "start": 10.28, + "end": 10.46, + "confidence": 0.993 + }, + { + "text": "tomorrow,", + "start": 10.46, + "end": 10.66, + "confidence": 0.514 + }, + { + "text": "she", + "start": 10.72, + "end": 10.86, + "confidence": 0.279 + }, + { + "text": "feels", + "start": 10.86, + "end": 11.16, + "confidence": 0.809 + }, + { + "text": "better.", + "start": 11.16, + "end": 11.52, + "confidence": 0.992 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 12.06, + "end": 13.36, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.928, + "words": [ + { + "text": "Of", + "start": 12.06, + "end": 12.34, + "confidence": 0.788 + }, + { + "text": "course", + "start": 12.34, + "end": 12.56, + "confidence": 0.989 + }, + { + "text": "she", + "start": 12.56, + "end": 12.88, + "confidence": 0.958 + }, + { + "text": "can.", + "start": 12.88, + "end": 13.36, + "confidence": 0.993 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.5, + "end": 15.26, + "text": " This won't be for long.", + "tokens": [ + 51064, + 639, + 1582, + 380, + 312, + 337, + 938, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.892, + "words": [ + { + "text": "This", + "start": 13.5, + "end": 14.28, + "confidence": 0.543 + }, + { + "text": "won't", + "start": 14.28, + "end": 14.6, + "confidence": 0.975 + }, + { + "text": "be", + "start": 14.6, + "end": 14.8, + "confidence": 0.996 + }, + { + "text": "for", + "start": 14.8, + "end": 14.96, + "confidence": 0.984 + }, + { + "text": "long.", + "start": 14.96, + "end": 15.26, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.5, + "end": 17.62, + "text": " Well, you can stay as long as you want, my love.", + "tokens": [ + 51164, + 1042, + 11, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.927, + "words": [ + { + "text": "Well,", + "start": 15.5, + "end": 15.56, + "confidence": 0.675 + }, + { + "text": "you", + "start": 15.68, + "end": 15.7, + "confidence": 0.744 + }, + { + "text": "can", + "start": 15.7, + "end": 15.94, + "confidence": 0.982 + }, + { + "text": "stay", + "start": 15.94, + "end": 16.14, + "confidence": 0.985 + }, + { + "text": "as", + "start": 16.14, + "end": 16.28, + "confidence": 0.972 + }, + { + "text": "long", + "start": 16.28, + "end": 16.4, + "confidence": 0.994 + }, + { + "text": "as", + "start": 16.4, + "end": 16.48, + "confidence": 0.996 + }, + { + "text": "you", + "start": 16.48, + "end": 16.62, + "confidence": 0.995 + }, + { + "text": "want,", + "start": 16.62, + "end": 16.8, + "confidence": 0.995 + }, + { + "text": "my", + "start": 16.88, + "end": 17.22, + "confidence": 0.949 + }, + { + "text": "love.", + "start": 17.22, + "end": 17.62, + "confidence": 0.993 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.76, + "end": 19.26, + "text": " I really miss you.", + "tokens": [ + 51264, + 286, + 534, + 1713, + 291, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.792, + "words": [ + { + "text": "I", + "start": 17.76, + "end": 17.98, + "confidence": 0.587 + }, + { + "text": "really", + "start": 17.98, + "end": 18.36, + "confidence": 0.944 + }, + { + "text": "miss", + "start": 18.36, + "end": 18.78, + "confidence": 0.905 + }, + { + "text": "you.", + "start": 18.78, + "end": 19.26, + "confidence": 0.787 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/nocond.random_music.mp4.words.json b/tests/expected/corner_cases/nocond.random_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..738b6ecf6da12d005334c8d09e06c91793417d3c --- /dev/null +++ b/tests/expected/corner_cases/nocond.random_music.mp4.words.json @@ -0,0 +1,3274 @@ +{ + "text": " I I Wake up to the sounds of the silence that allows for my mind to run around with my ear up to the ground I'm searching to behold the stories that are told when my back is to the world smiling when I turn Hey Oh the misery everybody wants to be my enemy Spell the sympathy everybody wants to be my enemy My enemy But i'm ready your words up on the wall Is you're praying for my phone and the laughter in the holes and the names that i've been called I stack it in my mind and i'm waiting for the time when I show you what it's like to be worse Oh the misery everybody wants to be my enemy Smell the sympathy everybody wants to be my enemy Look out for yourself My enemy Look out for yourself Look, okay, I'm hoping that somebody pray for me I'm praying that somebody vote for me I'm staying where nobody's supposed to be I propose to be in the wreck of emotions Ready to go whenever you let me know The road is long so put the pedal into the flow The enemy on my trail, my energy unavailable Imma tell the master the way go When the plot on my track to the top I been out of shape, thinking that I'm a box of an astronaut Blasted off the planet, rocked the cars, catastrophic And it matters more because I had it, now I had it I thought about wreaking havoc on an opposition Kinda shockin', they want it static, with precision I'm automatic, quarterback, I ain't talkin' second Pack it, pack it up, I don't panic, better, batter up Who the baddest? It don't matter, cuz we it's your Everybody wants to be my enemy Smell the sympathy everybody wants to be my enemy Oh, the misery Everybody wants to be my enemy Smell the sympathy everybody wants to be my enemy I swear I'll never be a saint No way, my enemy I swear I'll never be a saint You gotta be a saint", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 2.24, + "end": 2.36, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.4, + "avg_logprob": -0.9360545873641968, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7811808586120605, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 2.24, + "end": 2.36, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 30.2, + "end": 31.42, + "text": " I", + "tokens": [ + 50372, + 286, + 50472 + ], + "temperature": 0.4, + "avg_logprob": -0.43623164983896107, + "compression_ratio": 1.5338345864661653, + "no_speech_prob": 0.10452272742986679, + "confidence": 0.252, + "words": [ + { + "text": "I", + "start": 30.2, + "end": 31.42, + "confidence": 0.252 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 38.98, + "end": 44.76, + "text": " Wake up to the sounds of the silence that allows for my mind to run around with my ear up to the ground", + "tokens": [ + 50800, + 21062, + 493, + 281, + 264, + 3263, + 295, + 264, + 12239, + 300, + 4045, + 337, + 452, + 1575, + 281, + 1190, + 926, + 365, + 452, + 1273, + 493, + 281, + 264, + 2727, + 51084 + ], + "temperature": 0.4, + "avg_logprob": -0.43623164983896107, + "compression_ratio": 1.5338345864661653, + "no_speech_prob": 0.10452272742986679, + "confidence": 0.907, + "words": [ + { + "text": "Wake", + "start": 38.98, + "end": 39.28, + "confidence": 0.89 + }, + { + "text": "up", + "start": 39.28, + "end": 39.56, + "confidence": 0.996 + }, + { + "text": "to", + "start": 39.56, + "end": 39.74, + "confidence": 0.988 + }, + { + "text": "the", + "start": 39.74, + "end": 39.96, + "confidence": 0.863 + }, + { + "text": "sounds", + "start": 39.96, + "end": 40.32, + "confidence": 0.554 + }, + { + "text": "of", + "start": 40.32, + "end": 40.5, + "confidence": 0.942 + }, + { + "text": "the", + "start": 40.5, + "end": 40.62, + "confidence": 0.997 + }, + { + "text": "silence", + "start": 40.62, + "end": 41.1, + "confidence": 0.952 + }, + { + "text": "that", + "start": 41.1, + "end": 41.38, + "confidence": 0.969 + }, + { + "text": "allows", + "start": 41.38, + "end": 41.84, + "confidence": 0.895 + }, + { + "text": "for", + "start": 41.84, + "end": 42.06, + "confidence": 0.811 + }, + { + "text": "my", + "start": 42.06, + "end": 42.22, + "confidence": 0.946 + }, + { + "text": "mind", + "start": 42.22, + "end": 42.46, + "confidence": 0.976 + }, + { + "text": "to", + "start": 42.46, + "end": 42.68, + "confidence": 0.856 + }, + { + "text": "run", + "start": 42.68, + "end": 42.86, + "confidence": 0.994 + }, + { + "text": "around", + "start": 42.86, + "end": 43.3, + "confidence": 0.966 + }, + { + "text": "with", + "start": 43.3, + "end": 43.56, + "confidence": 0.612 + }, + { + "text": "my", + "start": 43.56, + "end": 43.68, + "confidence": 0.992 + }, + { + "text": "ear", + "start": 43.68, + "end": 43.96, + "confidence": 0.896 + }, + { + "text": "up", + "start": 43.96, + "end": 44.22, + "confidence": 0.988 + }, + { + "text": "to", + "start": 44.22, + "end": 44.34, + "confidence": 0.993 + }, + { + "text": "the", + "start": 44.34, + "end": 44.54, + "confidence": 0.996 + }, + { + "text": "ground", + "start": 44.54, + "end": 44.76, + "confidence": 0.995 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 45.08, + "end": 51.0, + "text": " I'm searching to behold the stories that are told when my back is to the world smiling when I turn", + "tokens": [ + 51094, + 286, + 478, + 10808, + 281, + 27234, + 264, + 3676, + 300, + 366, + 1907, + 562, + 452, + 646, + 307, + 281, + 264, + 1002, + 16005, + 562, + 286, + 1261, + 51405 + ], + "temperature": 0.4, + "avg_logprob": -0.43623164983896107, + "compression_ratio": 1.5338345864661653, + "no_speech_prob": 0.10452272742986679, + "confidence": 0.776, + "words": [ + { + "text": "I'm", + "start": 45.08, + "end": 45.24, + "confidence": 0.997 + }, + { + "text": "searching", + "start": 45.24, + "end": 45.68, + "confidence": 0.984 + }, + { + "text": "to", + "start": 45.68, + "end": 45.98, + "confidence": 0.991 + }, + { + "text": "behold", + "start": 45.98, + "end": 46.46, + "confidence": 0.426 + }, + { + "text": "the", + "start": 46.46, + "end": 46.84, + "confidence": 0.969 + }, + { + "text": "stories", + "start": 46.84, + "end": 47.3, + "confidence": 0.992 + }, + { + "text": "that", + "start": 47.3, + "end": 47.56, + "confidence": 0.96 + }, + { + "text": "are", + "start": 47.56, + "end": 47.76, + "confidence": 0.459 + }, + { + "text": "told", + "start": 47.76, + "end": 48.06, + "confidence": 0.598 + }, + { + "text": "when", + "start": 48.06, + "end": 48.3, + "confidence": 0.341 + }, + { + "text": "my", + "start": 48.3, + "end": 48.52, + "confidence": 0.493 + }, + { + "text": "back", + "start": 48.52, + "end": 48.7, + "confidence": 0.978 + }, + { + "text": "is", + "start": 48.7, + "end": 48.9, + "confidence": 0.981 + }, + { + "text": "to", + "start": 48.9, + "end": 49.06, + "confidence": 0.993 + }, + { + "text": "the", + "start": 49.06, + "end": 49.28, + "confidence": 0.998 + }, + { + "text": "world", + "start": 49.28, + "end": 49.64, + "confidence": 0.995 + }, + { + "text": "smiling", + "start": 49.64, + "end": 50.32, + "confidence": 0.339 + }, + { + "text": "when", + "start": 50.32, + "end": 50.62, + "confidence": 0.954 + }, + { + "text": "I", + "start": 50.62, + "end": 50.84, + "confidence": 0.962 + }, + { + "text": "turn", + "start": 50.84, + "end": 51.0, + "confidence": 0.944 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 60.0, + "end": 61.72, + "text": " Hey", + "tokens": [ + 50368, + 1911, + 50468 + ], + "temperature": 0.4, + "avg_logprob": -0.5247946894446085, + "compression_ratio": 1.47, + "no_speech_prob": 0.10004138946533203, + "confidence": 0.095, + "words": [ + { + "text": "Hey", + "start": 60.0, + "end": 61.72, + "confidence": 0.095 + } + ] + }, + { + "id": 5, + "seek": 6000, + "start": 64.62, + "end": 69.16, + "text": " Oh the misery everybody wants to be my enemy", + "tokens": [ + 50596, + 876, + 264, + 32309, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 50816 + ], + "temperature": 0.4, + "avg_logprob": -0.5247946894446085, + "compression_ratio": 1.47, + "no_speech_prob": 0.10004138946533203, + "confidence": 0.946, + "words": [ + { + "text": "Oh", + "start": 64.62, + "end": 65.38, + "confidence": 0.942 + }, + { + "text": "the", + "start": 65.38, + "end": 65.6, + "confidence": 0.776 + }, + { + "text": "misery", + "start": 65.6, + "end": 66.38, + "confidence": 0.986 + }, + { + "text": "everybody", + "start": 66.38, + "end": 67.7, + "confidence": 0.878 + }, + { + "text": "wants", + "start": 67.7, + "end": 68.08, + "confidence": 0.983 + }, + { + "text": "to", + "start": 68.08, + "end": 68.26, + "confidence": 0.998 + }, + { + "text": "be", + "start": 68.26, + "end": 68.4, + "confidence": 0.998 + }, + { + "text": "my", + "start": 68.4, + "end": 68.72, + "confidence": 0.992 + }, + { + "text": "enemy", + "start": 68.72, + "end": 69.16, + "confidence": 0.989 + } + ] + }, + { + "id": 6, + "seek": 6000, + "start": 70.88, + "end": 76.18, + "text": " Spell the sympathy everybody wants to be my enemy", + "tokens": [ + 50900, + 3550, + 285, + 264, + 33240, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 51169 + ], + "temperature": 0.4, + "avg_logprob": -0.5247946894446085, + "compression_ratio": 1.47, + "no_speech_prob": 0.10004138946533203, + "confidence": 0.848, + "words": [ + { + "text": "Spell", + "start": 70.88, + "end": 71.62, + "confidence": 0.548 + }, + { + "text": "the", + "start": 71.62, + "end": 71.88, + "confidence": 0.91 + }, + { + "text": "sympathy", + "start": 71.88, + "end": 72.64, + "confidence": 0.986 + }, + { + "text": "everybody", + "start": 72.64, + "end": 73.94, + "confidence": 0.817 + }, + { + "text": "wants", + "start": 73.94, + "end": 74.32, + "confidence": 0.988 + }, + { + "text": "to", + "start": 74.32, + "end": 74.62, + "confidence": 0.998 + }, + { + "text": "be", + "start": 74.62, + "end": 75.16, + "confidence": 0.998 + }, + { + "text": "my", + "start": 75.16, + "end": 75.72, + "confidence": 0.893 + }, + { + "text": "enemy", + "start": 75.72, + "end": 76.18, + "confidence": 0.991 + } + ] + }, + { + "id": 7, + "seek": 6000, + "start": 81.82, + "end": 82.76, + "text": " My enemy", + "tokens": [ + 51440, + 1222, + 5945, + 51540 + ], + "temperature": 0.4, + "avg_logprob": -0.5247946894446085, + "compression_ratio": 1.47, + "no_speech_prob": 0.10004138946533203, + "confidence": 0.963, + "words": [ + { + "text": "My", + "start": 81.82, + "end": 82.06, + "confidence": 0.967 + }, + { + "text": "enemy", + "start": 82.06, + "end": 82.76, + "confidence": 0.959 + } + ] + }, + { + "id": 8, + "seek": 6000, + "start": 87.66, + "end": 89.94, + "text": " But i'm ready your words up on the wall", + "tokens": [ + 51744, + 583, + 741, + 478, + 1919, + 428, + 2283, + 493, + 322, + 264, + 2929, + 51860 + ], + "temperature": 0.4, + "avg_logprob": -0.5247946894446085, + "compression_ratio": 1.47, + "no_speech_prob": 0.10004138946533203, + "confidence": 0.738, + "words": [ + { + "text": "But", + "start": 87.66, + "end": 88.0, + "confidence": 0.178 + }, + { + "text": "i'm", + "start": 88.0, + "end": 88.16, + "confidence": 0.893 + }, + { + "text": "ready", + "start": 88.16, + "end": 88.6, + "confidence": 0.962 + }, + { + "text": "your", + "start": 88.6, + "end": 88.88, + "confidence": 0.796 + }, + { + "text": "words", + "start": 88.88, + "end": 89.16, + "confidence": 0.973 + }, + { + "text": "up", + "start": 89.16, + "end": 89.48, + "confidence": 0.963 + }, + { + "text": "on", + "start": 89.48, + "end": 89.6, + "confidence": 0.992 + }, + { + "text": "the", + "start": 89.6, + "end": 89.82, + "confidence": 0.996 + }, + { + "text": "wall", + "start": 89.82, + "end": 89.94, + "confidence": 0.475 + } + ] + }, + { + "id": 9, + "seek": 9000, + "start": 90.16, + "end": 94.74, + "text": " Is you're praying for my phone and the laughter in the holes and the names that i've been called", + "tokens": [ + 50364, + 1119, + 291, + 434, + 15611, + 337, + 452, + 2593, + 293, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 741, + 600, + 668, + 1219, + 50592 + ], + "temperature": 0.4, + "avg_logprob": -0.20849926745305297, + "compression_ratio": 1.5161290322580645, + "no_speech_prob": 0.0077798133715987206, + "confidence": 0.874, + "words": [ + { + "text": "Is", + "start": 90.16, + "end": 90.34, + "confidence": 0.55 + }, + { + "text": "you're", + "start": 90.34, + "end": 90.48, + "confidence": 0.621 + }, + { + "text": "praying", + "start": 90.48, + "end": 90.86, + "confidence": 0.998 + }, + { + "text": "for", + "start": 90.86, + "end": 91.1, + "confidence": 0.997 + }, + { + "text": "my", + "start": 91.1, + "end": 91.22, + "confidence": 0.997 + }, + { + "text": "phone", + "start": 91.22, + "end": 91.72, + "confidence": 0.568 + }, + { + "text": "and", + "start": 91.72, + "end": 91.9, + "confidence": 0.951 + }, + { + "text": "the", + "start": 91.9, + "end": 92.12, + "confidence": 0.998 + }, + { + "text": "laughter", + "start": 92.12, + "end": 92.5, + "confidence": 0.994 + }, + { + "text": "in", + "start": 92.5, + "end": 92.72, + "confidence": 0.953 + }, + { + "text": "the", + "start": 92.72, + "end": 92.92, + "confidence": 1.0 + }, + { + "text": "holes", + "start": 92.92, + "end": 93.26, + "confidence": 0.797 + }, + { + "text": "and", + "start": 93.26, + "end": 93.46, + "confidence": 0.958 + }, + { + "text": "the", + "start": 93.46, + "end": 93.6, + "confidence": 0.999 + }, + { + "text": "names", + "start": 93.6, + "end": 93.84, + "confidence": 0.997 + }, + { + "text": "that", + "start": 93.84, + "end": 94.12, + "confidence": 0.997 + }, + { + "text": "i've", + "start": 94.12, + "end": 94.3, + "confidence": 0.952 + }, + { + "text": "been", + "start": 94.3, + "end": 94.52, + "confidence": 0.994 + }, + { + "text": "called", + "start": 94.52, + "end": 94.74, + "confidence": 0.808 + } + ] + }, + { + "id": 10, + "seek": 9000, + "start": 94.94, + "end": 100.06, + "text": " I stack it in my mind and i'm waiting for the time when I show you what it's like to be worse", + "tokens": [ + 50604, + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 741, + 478, + 3806, + 337, + 264, + 565, + 562, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 50860 + ], + "temperature": 0.4, + "avg_logprob": -0.20849926745305297, + "compression_ratio": 1.5161290322580645, + "no_speech_prob": 0.0077798133715987206, + "confidence": 0.95, + "words": [ + { + "text": "I", + "start": 94.94, + "end": 95.12, + "confidence": 0.979 + }, + { + "text": "stack", + "start": 95.12, + "end": 95.4, + "confidence": 0.908 + }, + { + "text": "it", + "start": 95.4, + "end": 95.7, + "confidence": 0.999 + }, + { + "text": "in", + "start": 95.7, + "end": 95.82, + "confidence": 0.999 + }, + { + "text": "my", + "start": 95.82, + "end": 96.04, + "confidence": 0.999 + }, + { + "text": "mind", + "start": 96.04, + "end": 96.42, + "confidence": 0.997 + }, + { + "text": "and", + "start": 96.42, + "end": 96.62, + "confidence": 0.931 + }, + { + "text": "i'm", + "start": 96.62, + "end": 96.74, + "confidence": 0.997 + }, + { + "text": "waiting", + "start": 96.74, + "end": 97.14, + "confidence": 0.998 + }, + { + "text": "for", + "start": 97.14, + "end": 97.34, + "confidence": 0.99 + }, + { + "text": "the", + "start": 97.34, + "end": 97.54, + "confidence": 0.998 + }, + { + "text": "time", + "start": 97.54, + "end": 97.86, + "confidence": 0.997 + }, + { + "text": "when", + "start": 97.86, + "end": 98.14, + "confidence": 0.963 + }, + { + "text": "I", + "start": 98.14, + "end": 98.3, + "confidence": 0.889 + }, + { + "text": "show", + "start": 98.3, + "end": 98.48, + "confidence": 0.995 + }, + { + "text": "you", + "start": 98.48, + "end": 98.74, + "confidence": 0.993 + }, + { + "text": "what", + "start": 98.74, + "end": 98.92, + "confidence": 0.986 + }, + { + "text": "it's", + "start": 98.92, + "end": 99.22, + "confidence": 0.994 + }, + { + "text": "like", + "start": 99.22, + "end": 99.48, + "confidence": 0.993 + }, + { + "text": "to", + "start": 99.48, + "end": 99.7, + "confidence": 0.961 + }, + { + "text": "be", + "start": 99.7, + "end": 99.84, + "confidence": 0.996 + }, + { + "text": "worse", + "start": 99.84, + "end": 100.06, + "confidence": 0.463 + } + ] + }, + { + "id": 11, + "seek": 9000, + "start": 113.98, + "end": 119.04, + "text": " Oh the misery everybody wants to be my enemy", + "tokens": [ + 51588, + 876, + 264, + 32309, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 51813 + ], + "temperature": 0.4, + "avg_logprob": -0.20849926745305297, + "compression_ratio": 1.5161290322580645, + "no_speech_prob": 0.0077798133715987206, + "confidence": 0.947, + "words": [ + { + "text": "Oh", + "start": 113.98, + "end": 115.22, + "confidence": 0.962 + }, + { + "text": "the", + "start": 115.22, + "end": 115.46, + "confidence": 0.786 + }, + { + "text": "misery", + "start": 115.46, + "end": 116.42, + "confidence": 0.998 + }, + { + "text": "everybody", + "start": 116.42, + "end": 117.58, + "confidence": 0.836 + }, + { + "text": "wants", + "start": 117.58, + "end": 117.96, + "confidence": 0.988 + }, + { + "text": "to", + "start": 117.96, + "end": 118.14, + "confidence": 1.0 + }, + { + "text": "be", + "start": 118.14, + "end": 118.26, + "confidence": 1.0 + }, + { + "text": "my", + "start": 118.26, + "end": 118.58, + "confidence": 0.996 + }, + { + "text": "enemy", + "start": 118.58, + "end": 119.04, + "confidence": 0.991 + } + ] + }, + { + "id": 12, + "seek": 12000, + "start": 120.72, + "end": 126.26, + "text": " Smell the sympathy everybody wants to be my enemy", + "tokens": [ + 50400, + 3915, + 898, + 264, + 33240, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 50669 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.897, + "words": [ + { + "text": "Smell", + "start": 120.72, + "end": 121.48, + "confidence": 0.764 + }, + { + "text": "the", + "start": 121.48, + "end": 121.7, + "confidence": 0.936 + }, + { + "text": "sympathy", + "start": 121.7, + "end": 122.48, + "confidence": 0.986 + }, + { + "text": "everybody", + "start": 122.48, + "end": 123.8, + "confidence": 0.712 + }, + { + "text": "wants", + "start": 123.8, + "end": 124.18, + "confidence": 0.987 + }, + { + "text": "to", + "start": 124.18, + "end": 124.46, + "confidence": 0.998 + }, + { + "text": "be", + "start": 124.46, + "end": 124.9, + "confidence": 0.999 + }, + { + "text": "my", + "start": 124.9, + "end": 125.56, + "confidence": 0.901 + }, + { + "text": "enemy", + "start": 125.56, + "end": 126.26, + "confidence": 0.988 + } + ] + }, + { + "id": 13, + "seek": 12000, + "start": 129.84, + "end": 130.98, + "text": " Look out for yourself", + "tokens": [ + 50856, + 2053, + 484, + 337, + 1803, + 50929 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.968, + "words": [ + { + "text": "Look", + "start": 129.84, + "end": 130.16, + "confidence": 0.936 + }, + { + "text": "out", + "start": 130.16, + "end": 130.3, + "confidence": 0.954 + }, + { + "text": "for", + "start": 130.3, + "end": 130.5, + "confidence": 0.994 + }, + { + "text": "yourself", + "start": 130.5, + "end": 130.98, + "confidence": 0.989 + } + ] + }, + { + "id": 14, + "seek": 12000, + "start": 131.52, + "end": 132.5, + "text": " My enemy", + "tokens": [ + 50936, + 1222, + 5945, + 51005 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.919, + "words": [ + { + "text": "My", + "start": 131.52, + "end": 131.9, + "confidence": 0.888 + }, + { + "text": "enemy", + "start": 131.9, + "end": 132.5, + "confidence": 0.95 + } + ] + }, + { + "id": 15, + "seek": 12000, + "start": 136.02, + "end": 137.2, + "text": " Look out for yourself", + "tokens": [ + 51160, + 2053, + 484, + 337, + 1803, + 51221 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.995, + "words": [ + { + "text": "Look", + "start": 136.02, + "end": 136.36, + "confidence": 0.987 + }, + { + "text": "out", + "start": 136.36, + "end": 136.58, + "confidence": 0.995 + }, + { + "text": "for", + "start": 136.58, + "end": 136.8, + "confidence": 0.999 + }, + { + "text": "yourself", + "start": 136.8, + "end": 137.2, + "confidence": 0.998 + } + ] + }, + { + "id": 16, + "seek": 12000, + "start": 137.64, + "end": 140.2, + "text": " Look, okay, I'm hoping that somebody pray for me", + "tokens": [ + 51236, + 2053, + 11, + 1392, + 11, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385, + 51367 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.894, + "words": [ + { + "text": "Look,", + "start": 137.64, + "end": 137.9, + "confidence": 0.901 + }, + { + "text": "okay,", + "start": 138.42, + "end": 138.58, + "confidence": 0.804 + }, + { + "text": "I'm", + "start": 138.76, + "end": 138.94, + "confidence": 0.822 + }, + { + "text": "hoping", + "start": 138.94, + "end": 139.16, + "confidence": 0.849 + }, + { + "text": "that", + "start": 139.16, + "end": 139.34, + "confidence": 0.98 + }, + { + "text": "somebody", + "start": 139.34, + "end": 139.58, + "confidence": 0.982 + }, + { + "text": "pray", + "start": 139.58, + "end": 139.86, + "confidence": 0.88 + }, + { + "text": "for", + "start": 139.86, + "end": 140.02, + "confidence": 0.923 + }, + { + "text": "me", + "start": 140.02, + "end": 140.2, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 12000, + "start": 140.32, + "end": 141.68, + "text": " I'm praying that somebody vote for me", + "tokens": [ + 51367, + 286, + 478, + 15611, + 300, + 2618, + 4740, + 337, + 385, + 51440 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.87, + "words": [ + { + "text": "I'm", + "start": 140.32, + "end": 140.44, + "confidence": 0.924 + }, + { + "text": "praying", + "start": 140.44, + "end": 140.64, + "confidence": 0.991 + }, + { + "text": "that", + "start": 140.64, + "end": 140.86, + "confidence": 0.898 + }, + { + "text": "somebody", + "start": 140.86, + "end": 141.1, + "confidence": 0.991 + }, + { + "text": "vote", + "start": 141.1, + "end": 141.34, + "confidence": 0.439 + }, + { + "text": "for", + "start": 141.34, + "end": 141.5, + "confidence": 0.999 + }, + { + "text": "me", + "start": 141.5, + "end": 141.68, + "confidence": 1.0 + } + ] + }, + { + "id": 18, + "seek": 12000, + "start": 141.7, + "end": 143.21, + "text": " I'm staying where nobody's supposed to be", + "tokens": [ + 51440, + 286, + 478, + 7939, + 689, + 5079, + 311, + 3442, + 281, + 312, + 51520 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.913, + "words": [ + { + "text": "I'm", + "start": 141.7, + "end": 141.96, + "confidence": 0.984 + }, + { + "text": "staying", + "start": 141.96, + "end": 142.16, + "confidence": 0.807 + }, + { + "text": "where", + "start": 142.16, + "end": 142.34, + "confidence": 0.948 + }, + { + "text": "nobody's", + "start": 142.34, + "end": 142.66, + "confidence": 0.787 + }, + { + "text": "supposed", + "start": 142.66, + "end": 142.96, + "confidence": 0.962 + }, + { + "text": "to", + "start": 142.96, + "end": 143.1, + "confidence": 0.998 + }, + { + "text": "be", + "start": 143.1, + "end": 143.21, + "confidence": 0.998 + } + ] + }, + { + "id": 19, + "seek": 12000, + "start": 143.21, + "end": 144.98, + "text": " I propose to be in the wreck of emotions", + "tokens": [ + 51520, + 286, + 17421, + 281, + 312, + 294, + 264, + 21478, + 295, + 8462, + 51615 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.51, + "words": [ + { + "text": "I", + "start": 143.21, + "end": 143.42, + "confidence": 0.342 + }, + { + "text": "propose", + "start": 143.42, + "end": 143.72, + "confidence": 0.33 + }, + { + "text": "to", + "start": 143.72, + "end": 143.9, + "confidence": 0.767 + }, + { + "text": "be", + "start": 143.9, + "end": 144.04, + "confidence": 0.529 + }, + { + "text": "in", + "start": 144.04, + "end": 144.2, + "confidence": 0.82 + }, + { + "text": "the", + "start": 144.2, + "end": 144.3, + "confidence": 0.299 + }, + { + "text": "wreck", + "start": 144.3, + "end": 144.4, + "confidence": 0.216 + }, + { + "text": "of", + "start": 144.4, + "end": 144.54, + "confidence": 0.996 + }, + { + "text": "emotions", + "start": 144.54, + "end": 144.98, + "confidence": 0.963 + } + ] + }, + { + "id": 20, + "seek": 12000, + "start": 145.06, + "end": 146.32, + "text": " Ready to go whenever you let me know", + "tokens": [ + 51615, + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458, + 51680 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.936, + "words": [ + { + "text": "Ready", + "start": 145.06, + "end": 145.3, + "confidence": 0.958 + }, + { + "text": "to", + "start": 145.3, + "end": 145.46, + "confidence": 0.998 + }, + { + "text": "go", + "start": 145.46, + "end": 145.6, + "confidence": 0.997 + }, + { + "text": "whenever", + "start": 145.6, + "end": 145.8, + "confidence": 0.885 + }, + { + "text": "you", + "start": 145.8, + "end": 145.96, + "confidence": 0.724 + }, + { + "text": "let", + "start": 145.96, + "end": 146.08, + "confidence": 0.974 + }, + { + "text": "me", + "start": 146.08, + "end": 146.18, + "confidence": 0.999 + }, + { + "text": "know", + "start": 146.18, + "end": 146.32, + "confidence": 0.988 + } + ] + }, + { + "id": 21, + "seek": 12000, + "start": 146.34, + "end": 147.86, + "text": " The road is long so put the pedal into the flow", + "tokens": [ + 51680, + 440, + 3060, + 307, + 938, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095, + 51754 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.932, + "words": [ + { + "text": "The", + "start": 146.34, + "end": 146.52, + "confidence": 0.974 + }, + { + "text": "road", + "start": 146.52, + "end": 146.68, + "confidence": 0.987 + }, + { + "text": "is", + "start": 146.68, + "end": 146.78, + "confidence": 0.977 + }, + { + "text": "long", + "start": 146.78, + "end": 146.92, + "confidence": 0.891 + }, + { + "text": "so", + "start": 146.92, + "end": 147.04, + "confidence": 0.827 + }, + { + "text": "put", + "start": 147.04, + "end": 147.16, + "confidence": 0.971 + }, + { + "text": "the", + "start": 147.16, + "end": 147.28, + "confidence": 0.974 + }, + { + "text": "pedal", + "start": 147.28, + "end": 147.42, + "confidence": 0.925 + }, + { + "text": "into", + "start": 147.42, + "end": 147.6, + "confidence": 0.944 + }, + { + "text": "the", + "start": 147.6, + "end": 147.74, + "confidence": 0.994 + }, + { + "text": "flow", + "start": 147.74, + "end": 147.86, + "confidence": 0.816 + } + ] + }, + { + "id": 22, + "seek": 12000, + "start": 147.9, + "end": 149.8, + "text": " The enemy on my trail, my energy unavailable", + "tokens": [ + 51754, + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699, + 51854 + ], + "temperature": 0.4, + "avg_logprob": -0.5485876959723395, + "compression_ratio": 1.7787610619469028, + "no_speech_prob": 0.02783079631626606, + "confidence": 0.957, + "words": [ + { + "text": "The", + "start": 147.9, + "end": 148.1, + "confidence": 0.913 + }, + { + "text": "enemy", + "start": 148.1, + "end": 148.26, + "confidence": 0.891 + }, + { + "text": "on", + "start": 148.26, + "end": 148.38, + "confidence": 0.965 + }, + { + "text": "my", + "start": 148.38, + "end": 148.52, + "confidence": 0.982 + }, + { + "text": "trail,", + "start": 148.52, + "end": 148.66, + "confidence": 0.99 + }, + { + "text": "my", + "start": 148.72, + "end": 148.86, + "confidence": 0.991 + }, + { + "text": "energy", + "start": 148.86, + "end": 149.1, + "confidence": 0.992 + }, + { + "text": "unavailable", + "start": 149.1, + "end": 149.8, + "confidence": 0.948 + } + ] + }, + { + "id": 23, + "seek": 14980, + "start": 150.04, + "end": 151.18, + "text": " Imma tell the master the way go", + "tokens": [ + 50370, + 50089, + 980, + 264, + 4505, + 264, + 636, + 352, + 50430 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.574, + "words": [ + { + "text": "Imma", + "start": 150.04, + "end": 150.22, + "confidence": 0.255 + }, + { + "text": "tell", + "start": 150.22, + "end": 150.36, + "confidence": 0.945 + }, + { + "text": "the", + "start": 150.36, + "end": 150.46, + "confidence": 0.402 + }, + { + "text": "master", + "start": 150.46, + "end": 150.68, + "confidence": 0.399 + }, + { + "text": "the", + "start": 150.68, + "end": 150.9, + "confidence": 0.823 + }, + { + "text": "way", + "start": 150.9, + "end": 151.0, + "confidence": 0.994 + }, + { + "text": "go", + "start": 151.0, + "end": 151.18, + "confidence": 0.646 + } + ] + }, + { + "id": 24, + "seek": 14980, + "start": 151.18, + "end": 152.56, + "text": " When the plot on my track to the top", + "tokens": [ + 50430, + 1133, + 264, + 7542, + 322, + 452, + 2837, + 281, + 264, + 1192, + 50497 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.591, + "words": [ + { + "text": "When", + "start": 151.18, + "end": 151.44, + "confidence": 0.081 + }, + { + "text": "the", + "start": 151.44, + "end": 151.62, + "confidence": 0.508 + }, + { + "text": "plot", + "start": 151.62, + "end": 151.78, + "confidence": 0.419 + }, + { + "text": "on", + "start": 151.78, + "end": 151.9, + "confidence": 0.847 + }, + { + "text": "my", + "start": 151.9, + "end": 152.02, + "confidence": 0.991 + }, + { + "text": "track", + "start": 152.02, + "end": 152.16, + "confidence": 0.642 + }, + { + "text": "to", + "start": 152.16, + "end": 152.3, + "confidence": 0.951 + }, + { + "text": "the", + "start": 152.3, + "end": 152.44, + "confidence": 0.996 + }, + { + "text": "top", + "start": 152.44, + "end": 152.56, + "confidence": 0.998 + } + ] + }, + { + "id": 25, + "seek": 14980, + "start": 152.56, + "end": 154.36, + "text": " I been out of shape, thinking that I'm a box of an astronaut", + "tokens": [ + 50497, + 286, + 668, + 484, + 295, + 3909, + 11, + 1953, + 300, + 286, + 478, + 257, + 2424, + 295, + 364, + 18516, + 50597 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.725, + "words": [ + { + "text": "I", + "start": 152.56, + "end": 152.7, + "confidence": 0.98 + }, + { + "text": "been", + "start": 152.7, + "end": 152.76, + "confidence": 0.621 + }, + { + "text": "out", + "start": 152.76, + "end": 152.9, + "confidence": 0.729 + }, + { + "text": "of", + "start": 152.9, + "end": 153.02, + "confidence": 0.966 + }, + { + "text": "shape,", + "start": 153.02, + "end": 153.12, + "confidence": 0.999 + }, + { + "text": "thinking", + "start": 153.3, + "end": 153.32, + "confidence": 0.75 + }, + { + "text": "that", + "start": 153.32, + "end": 153.46, + "confidence": 0.363 + }, + { + "text": "I'm", + "start": 153.46, + "end": 153.66, + "confidence": 0.597 + }, + { + "text": "a", + "start": 153.66, + "end": 153.74, + "confidence": 0.678 + }, + { + "text": "box", + "start": 153.74, + "end": 153.76, + "confidence": 0.878 + }, + { + "text": "of", + "start": 153.76, + "end": 153.9, + "confidence": 0.465 + }, + { + "text": "an", + "start": 153.9, + "end": 154.06, + "confidence": 0.981 + }, + { + "text": "astronaut", + "start": 154.06, + "end": 154.36, + "confidence": 0.991 + } + ] + }, + { + "id": 26, + "seek": 14980, + "start": 154.5, + "end": 156.42, + "text": " Blasted off the planet, rocked the cars, catastrophic", + "tokens": [ + 50597, + 2177, + 34440, + 766, + 264, + 5054, + 11, + 3727, + 292, + 264, + 5163, + 11, + 34915, + 50697 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.726, + "words": [ + { + "text": "Blasted", + "start": 154.5, + "end": 154.82, + "confidence": 0.789 + }, + { + "text": "off", + "start": 154.82, + "end": 155.02, + "confidence": 0.974 + }, + { + "text": "the", + "start": 155.02, + "end": 155.2, + "confidence": 0.954 + }, + { + "text": "planet,", + "start": 155.2, + "end": 155.36, + "confidence": 0.995 + }, + { + "text": "rocked", + "start": 155.44, + "end": 155.66, + "confidence": 0.687 + }, + { + "text": "the", + "start": 155.66, + "end": 155.74, + "confidence": 0.83 + }, + { + "text": "cars,", + "start": 155.74, + "end": 155.9, + "confidence": 0.246 + }, + { + "text": "catastrophic", + "start": 155.9, + "end": 156.42, + "confidence": 0.733 + } + ] + }, + { + "id": 27, + "seek": 14980, + "start": 156.44, + "end": 158.18, + "text": " And it matters more because I had it, now I had it", + "tokens": [ + 50697, + 400, + 309, + 7001, + 544, + 570, + 286, + 632, + 309, + 11, + 586, + 286, + 632, + 309, + 50780 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.811, + "words": [ + { + "text": "And", + "start": 156.44, + "end": 156.64, + "confidence": 0.821 + }, + { + "text": "it", + "start": 156.64, + "end": 156.78, + "confidence": 0.723 + }, + { + "text": "matters", + "start": 156.78, + "end": 156.96, + "confidence": 0.995 + }, + { + "text": "more", + "start": 156.96, + "end": 157.18, + "confidence": 0.984 + }, + { + "text": "because", + "start": 157.18, + "end": 157.42, + "confidence": 0.861 + }, + { + "text": "I", + "start": 157.42, + "end": 157.58, + "confidence": 0.774 + }, + { + "text": "had", + "start": 157.58, + "end": 157.66, + "confidence": 0.939 + }, + { + "text": "it,", + "start": 157.66, + "end": 157.8, + "confidence": 0.988 + }, + { + "text": "now", + "start": 157.8, + "end": 157.94, + "confidence": 0.532 + }, + { + "text": "I", + "start": 157.94, + "end": 158.08, + "confidence": 0.655 + }, + { + "text": "had", + "start": 158.08, + "end": 158.12, + "confidence": 0.947 + }, + { + "text": "it", + "start": 158.12, + "end": 158.18, + "confidence": 0.68 + } + ] + }, + { + "id": 28, + "seek": 14980, + "start": 158.18, + "end": 159.84, + "text": " I thought about wreaking havoc on an opposition", + "tokens": [ + 50780, + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504, + 50864 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.96, + "words": [ + { + "text": "I", + "start": 158.18, + "end": 158.24, + "confidence": 0.9 + }, + { + "text": "thought", + "start": 158.24, + "end": 158.38, + "confidence": 0.983 + }, + { + "text": "about", + "start": 158.38, + "end": 158.58, + "confidence": 0.98 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.86, + "confidence": 0.989 + }, + { + "text": "havoc", + "start": 158.86, + "end": 159.08, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.08, + "end": 159.3, + "confidence": 0.913 + }, + { + "text": "an", + "start": 159.3, + "end": 159.46, + "confidence": 0.912 + }, + { + "text": "opposition", + "start": 159.46, + "end": 159.84, + "confidence": 0.984 + } + ] + }, + { + "id": 29, + "seek": 14980, + "start": 159.86, + "end": 161.58, + "text": " Kinda shockin', they want it static, with precision", + "tokens": [ + 50864, + 35553, + 5588, + 259, + 6098, + 436, + 528, + 309, + 13437, + 11, + 365, + 18356, + 50952 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.557, + "words": [ + { + "text": "Kinda", + "start": 159.86, + "end": 160.12, + "confidence": 0.739 + }, + { + "text": "shockin',", + "start": 160.12, + "end": 160.48, + "confidence": 0.389 + }, + { + "text": "they", + "start": 160.48, + "end": 160.6, + "confidence": 0.304 + }, + { + "text": "want", + "start": 160.6, + "end": 160.74, + "confidence": 0.501 + }, + { + "text": "it", + "start": 160.74, + "end": 160.88, + "confidence": 0.651 + }, + { + "text": "static,", + "start": 160.88, + "end": 161.02, + "confidence": 0.984 + }, + { + "text": "with", + "start": 161.16, + "end": 161.3, + "confidence": 0.642 + }, + { + "text": "precision", + "start": 161.3, + "end": 161.58, + "confidence": 0.737 + } + ] + }, + { + "id": 30, + "seek": 14980, + "start": 161.64, + "end": 163.38, + "text": " I'm automatic, quarterback, I ain't talkin' second", + "tokens": [ + 50952, + 286, + 478, + 12509, + 11, + 31952, + 11, + 286, + 7862, + 380, + 39243, + 6, + 1150, + 51042 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.839, + "words": [ + { + "text": "I'm", + "start": 161.64, + "end": 161.88, + "confidence": 0.968 + }, + { + "text": "automatic,", + "start": 161.88, + "end": 162.2, + "confidence": 0.968 + }, + { + "text": "quarterback,", + "start": 162.3, + "end": 162.58, + "confidence": 0.777 + }, + { + "text": "I", + "start": 162.66, + "end": 162.78, + "confidence": 0.787 + }, + { + "text": "ain't", + "start": 162.78, + "end": 162.9, + "confidence": 0.982 + }, + { + "text": "talkin'", + "start": 162.9, + "end": 163.24, + "confidence": 0.749 + }, + { + "text": "second", + "start": 163.24, + "end": 163.38, + "confidence": 0.574 + } + ] + }, + { + "id": 31, + "seek": 14980, + "start": 163.38, + "end": 165.2, + "text": " Pack it, pack it up, I don't panic, better, batter up", + "tokens": [ + 51042, + 18466, + 309, + 11, + 2844, + 309, + 493, + 11, + 286, + 500, + 380, + 14783, + 11, + 1101, + 11, + 4220, + 493, + 51130 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.868, + "words": [ + { + "text": "Pack", + "start": 163.38, + "end": 163.6, + "confidence": 0.953 + }, + { + "text": "it,", + "start": 163.6, + "end": 163.68, + "confidence": 0.925 + }, + { + "text": "pack", + "start": 163.68, + "end": 163.88, + "confidence": 0.964 + }, + { + "text": "it", + "start": 163.88, + "end": 164.02, + "confidence": 0.998 + }, + { + "text": "up,", + "start": 164.02, + "end": 164.12, + "confidence": 0.979 + }, + { + "text": "I", + "start": 164.12, + "end": 164.22, + "confidence": 0.963 + }, + { + "text": "don't", + "start": 164.22, + "end": 164.34, + "confidence": 0.856 + }, + { + "text": "panic,", + "start": 164.34, + "end": 164.56, + "confidence": 0.999 + }, + { + "text": "better,", + "start": 164.62, + "end": 164.86, + "confidence": 0.64 + }, + { + "text": "batter", + "start": 164.88, + "end": 165.04, + "confidence": 0.509 + }, + { + "text": "up", + "start": 165.04, + "end": 165.2, + "confidence": 0.958 + } + ] + }, + { + "id": 32, + "seek": 14980, + "start": 165.2, + "end": 166.76, + "text": " Who the baddest? It don't matter, cuz we it's your", + "tokens": [ + 51130, + 2102, + 264, + 1578, + 23748, + 30, + 467, + 500, + 380, + 1871, + 11, + 11910, + 321, + 309, + 311, + 428, + 51209 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.727, + "words": [ + { + "text": "Who", + "start": 165.2, + "end": 165.4, + "confidence": 0.994 + }, + { + "text": "the", + "start": 165.4, + "end": 165.54, + "confidence": 0.941 + }, + { + "text": "baddest?", + "start": 165.54, + "end": 165.78, + "confidence": 0.86 + }, + { + "text": "It", + "start": 165.86, + "end": 165.9, + "confidence": 0.931 + }, + { + "text": "don't", + "start": 165.9, + "end": 166.04, + "confidence": 0.996 + }, + { + "text": "matter,", + "start": 166.04, + "end": 166.18, + "confidence": 0.998 + }, + { + "text": "cuz", + "start": 166.26, + "end": 166.4, + "confidence": 0.312 + }, + { + "text": "we", + "start": 166.4, + "end": 166.56, + "confidence": 0.95 + }, + { + "text": "it's", + "start": 166.56, + "end": 166.66, + "confidence": 0.373 + }, + { + "text": "your", + "start": 166.66, + "end": 166.76, + "confidence": 0.602 + } + ] + }, + { + "id": 33, + "seek": 14980, + "start": 166.76, + "end": 169.68, + "text": " Everybody wants to be my enemy", + "tokens": [ + 51209, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51365 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.989, + "words": [ + { + "text": "Everybody", + "start": 166.76, + "end": 167.44, + "confidence": 0.961 + }, + { + "text": "wants", + "start": 167.44, + "end": 167.82, + "confidence": 0.984 + }, + { + "text": "to", + "start": 167.82, + "end": 167.98, + "confidence": 0.999 + }, + { + "text": "be", + "start": 167.98, + "end": 168.12, + "confidence": 0.998 + }, + { + "text": "my", + "start": 168.12, + "end": 168.44, + "confidence": 0.996 + }, + { + "text": "enemy", + "start": 168.44, + "end": 169.68, + "confidence": 0.996 + } + ] + }, + { + "id": 34, + "seek": 14980, + "start": 170.62, + "end": 176.14, + "text": " Smell the sympathy everybody wants to be my enemy", + "tokens": [ + 51397, + 3915, + 898, + 264, + 33240, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 51682 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.937, + "words": [ + { + "text": "Smell", + "start": 170.62, + "end": 171.3, + "confidence": 0.978 + }, + { + "text": "the", + "start": 171.3, + "end": 171.56, + "confidence": 0.999 + }, + { + "text": "sympathy", + "start": 171.56, + "end": 172.28, + "confidence": 0.998 + }, + { + "text": "everybody", + "start": 172.28, + "end": 173.58, + "confidence": 0.604 + }, + { + "text": "wants", + "start": 173.58, + "end": 174.02, + "confidence": 0.995 + }, + { + "text": "to", + "start": 174.02, + "end": 174.3, + "confidence": 1.0 + }, + { + "text": "be", + "start": 174.3, + "end": 174.86, + "confidence": 1.0 + }, + { + "text": "my", + "start": 174.86, + "end": 175.4, + "confidence": 0.909 + }, + { + "text": "enemy", + "start": 175.4, + "end": 176.14, + "confidence": 0.999 + } + ] + }, + { + "id": 35, + "seek": 14980, + "start": 176.88, + "end": 178.4, + "text": " Oh, the misery", + "tokens": [ + 51714, + 876, + 11, + 264, + 32309, + 51799 + ], + "temperature": 0.4, + "avg_logprob": -0.49008950787390065, + "compression_ratio": 1.738872403560831, + "no_speech_prob": 0.005245593376457691, + "confidence": 0.815, + "words": [ + { + "text": "Oh,", + "start": 176.88, + "end": 177.48, + "confidence": 0.551 + }, + { + "text": "the", + "start": 177.56, + "end": 177.76, + "confidence": 0.985 + }, + { + "text": "misery", + "start": 177.76, + "end": 178.4, + "confidence": 0.999 + } + ] + }, + { + "id": 36, + "seek": 17850, + "start": 179.3, + "end": 181.62, + "text": " Everybody wants to be my enemy", + "tokens": [ + 50392, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50549 + ], + "temperature": 0.4, + "avg_logprob": -0.503788930719549, + "compression_ratio": 1.797979797979798, + "no_speech_prob": 0.02752392552793026, + "confidence": 0.996, + "words": [ + { + "text": "Everybody", + "start": 179.3, + "end": 179.94, + "confidence": 0.991 + }, + { + "text": "wants", + "start": 179.94, + "end": 180.28, + "confidence": 0.991 + }, + { + "text": "to", + "start": 180.28, + "end": 180.46, + "confidence": 0.999 + }, + { + "text": "be", + "start": 180.46, + "end": 180.62, + "confidence": 0.999 + }, + { + "text": "my", + "start": 180.62, + "end": 180.94, + "confidence": 0.999 + }, + { + "text": "enemy", + "start": 180.94, + "end": 181.62, + "confidence": 0.996 + } + ] + }, + { + "id": 37, + "seek": 17850, + "start": 183.14, + "end": 188.5, + "text": " Smell the sympathy everybody wants to be my enemy", + "tokens": [ + 50587, + 3915, + 898, + 264, + 33240, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 50867 + ], + "temperature": 0.4, + "avg_logprob": -0.503788930719549, + "compression_ratio": 1.797979797979798, + "no_speech_prob": 0.02752392552793026, + "confidence": 0.98, + "words": [ + { + "text": "Smell", + "start": 183.14, + "end": 183.8, + "confidence": 0.997 + }, + { + "text": "the", + "start": 183.8, + "end": 184.06, + "confidence": 0.999 + }, + { + "text": "sympathy", + "start": 184.06, + "end": 184.66, + "confidence": 0.997 + }, + { + "text": "everybody", + "start": 184.66, + "end": 186.1, + "confidence": 0.859 + }, + { + "text": "wants", + "start": 186.1, + "end": 186.5, + "confidence": 0.994 + }, + { + "text": "to", + "start": 186.5, + "end": 186.74, + "confidence": 1.0 + }, + { + "text": "be", + "start": 186.74, + "end": 187.2, + "confidence": 1.0 + }, + { + "text": "my", + "start": 187.2, + "end": 187.9, + "confidence": 0.967 + }, + { + "text": "enemy", + "start": 187.9, + "end": 188.5, + "confidence": 0.999 + } + ] + }, + { + "id": 38, + "seek": 17850, + "start": 188.88, + "end": 192.07, + "text": " I swear I'll never be a saint", + "tokens": [ + 50867, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374, + 51044 + ], + "temperature": 0.4, + "avg_logprob": -0.503788930719549, + "compression_ratio": 1.797979797979798, + "no_speech_prob": 0.02752392552793026, + "confidence": 0.536, + "words": [ + { + "text": "I", + "start": 188.88, + "end": 189.22, + "confidence": 0.099 + }, + { + "text": "swear", + "start": 189.22, + "end": 189.44, + "confidence": 0.83 + }, + { + "text": "I'll", + "start": 189.44, + "end": 191.28, + "confidence": 0.428 + }, + { + "text": "never", + "start": 191.28, + "end": 191.3, + "confidence": 0.966 + }, + { + "text": "be", + "start": 191.3, + "end": 191.68, + "confidence": 0.991 + }, + { + "text": "a", + "start": 191.68, + "end": 191.92, + "confidence": 0.579 + }, + { + "text": "saint", + "start": 191.92, + "end": 192.07, + "confidence": 0.811 + } + ] + }, + { + "id": 39, + "seek": 17850, + "start": 192.07, + "end": 194.76, + "text": " No way, my enemy", + "tokens": [ + 51044, + 883, + 636, + 11, + 452, + 5945, + 51177 + ], + "temperature": 0.4, + "avg_logprob": -0.503788930719549, + "compression_ratio": 1.797979797979798, + "no_speech_prob": 0.02752392552793026, + "confidence": 0.883, + "words": [ + { + "text": "No", + "start": 192.07, + "end": 192.66, + "confidence": 0.682 + }, + { + "text": "way,", + "start": 192.66, + "end": 193.18, + "confidence": 0.974 + }, + { + "text": "my", + "start": 193.88, + "end": 194.18, + "confidence": 0.955 + }, + { + "text": "enemy", + "start": 194.18, + "end": 194.76, + "confidence": 0.956 + } + ] + }, + { + "id": 40, + "seek": 17850, + "start": 194.82, + "end": 198.36, + "text": " I swear I'll never be a saint", + "tokens": [ + 51177, + 286, + 11902, + 286, + 603, + 1128, + 312, + 257, + 28374, + 51354 + ], + "temperature": 0.4, + "avg_logprob": -0.503788930719549, + "compression_ratio": 1.797979797979798, + "no_speech_prob": 0.02752392552793026, + "confidence": 0.968, + "words": [ + { + "text": "I", + "start": 194.82, + "end": 195.68, + "confidence": 0.82 + }, + { + "text": "swear", + "start": 195.68, + "end": 196.56, + "confidence": 0.998 + }, + { + "text": "I'll", + "start": 196.56, + "end": 197.28, + "confidence": 0.976 + }, + { + "text": "never", + "start": 197.28, + "end": 197.52, + "confidence": 0.998 + }, + { + "text": "be", + "start": 197.52, + "end": 197.9, + "confidence": 0.999 + }, + { + "text": "a", + "start": 197.9, + "end": 198.12, + "confidence": 0.996 + }, + { + "text": "saint", + "start": 198.12, + "end": 198.36, + "confidence": 0.997 + } + ] + }, + { + "id": 41, + "seek": 17850, + "start": 198.48, + "end": 199.82, + "text": " You gotta be a saint", + "tokens": [ + 51354, + 509, + 3428, + 312, + 257, + 28374, + 51454 + ], + "temperature": 0.4, + "avg_logprob": -0.503788930719549, + "compression_ratio": 1.797979797979798, + "no_speech_prob": 0.02752392552793026, + "confidence": 0.659, + "words": [ + { + "text": "You", + "start": 198.48, + "end": 198.7, + "confidence": 0.479 + }, + { + "text": "gotta", + "start": 198.7, + "end": 198.88, + "confidence": 0.476 + }, + { + "text": "be", + "start": 198.88, + "end": 199.1, + "confidence": 0.964 + }, + { + "text": "a", + "start": 199.1, + "end": 199.34, + "confidence": 0.736 + }, + { + "text": "saint", + "start": 199.34, + "end": 199.82, + "confidence": 0.769 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/nocond_music.mp4.words.json b/tests/expected/corner_cases/nocond_music.mp4.words.json new file mode 100644 index 0000000000000000000000000000000000000000..16008ee11f6dcad6b80d128ede4d5c88368f3cb3 --- /dev/null +++ b/tests/expected/corner_cases/nocond_music.mp4.words.json @@ -0,0 +1,3224 @@ +{ + "text": " I I Wake up to the sounds of the silence that allows for my mind to run around with my ear up to the ground I'm searching to be home the stories that are told when the back is to the world smiling when I turn Oh Everybody wants to be my enemy Sympathy everybody wants to be my enemy My enemy Oh Your words up on the wall as you're praying for my phone and the laughter in the holes and the names that I've been called I stack it in my mind and I'm waiting for the time when I show you what it's like to be worse Oh the misery everybody wants to be my enemy Spare the sympathy everybody wants to be my enemy My enemy Look out for yourself Look, okay I'm hoping that somebody pray for me Praying that somebody vote for me Staying where nobody supposed to be I propose to be in a wreck of emotions Ready to go whenever you let me know The road is long so put the pedal into the flow The enemy on my trail, my energy unavailable I'ma tell them I still a way go Way when I'm plotting, I drive to the top I been out of shape, thinking that I'm a box, I'm an astronaut Blasted off the planet, rocked the cars, catastrophic And it matters more because I had it not had I thought about wreaking havoc on an opposition Kinda shocking, they want it static with precision I'm automatic, quarterback, I ain't talking sack and packet Pack it up, I don't panic, batter, batter up Who the baddest, it don't matter cause we it's your Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy Oh, the misery Everybody wants to be my enemy Spare the sympathy Everybody wants to be my enemy My enemy, I swear, I'll never be a saint No way, my enemy My enemy, I swear, I'll never be a saint Look out for yourself", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 2.24, + "end": 2.36, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.9360545873641968, + "compression_ratio": 0.1111111111111111, + "no_speech_prob": 0.7811808586120605, + "confidence": 0.032, + "words": [ + { + "text": "I", + "start": 2.24, + "end": 2.36, + "confidence": 0.032 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 30.0, + "end": 31.7, + "text": " I", + "tokens": [ + 50364, + 286, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.3369734782092976, + "compression_ratio": 1.537313432835821, + "no_speech_prob": 0.10452272742986679, + "confidence": 0.134, + "words": [ + { + "text": "I", + "start": 30.0, + "end": 31.7, + "confidence": 0.134 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 38.78, + "end": 44.8, + "text": " Wake up to the sounds of the silence that allows for my mind to run around with my ear up to the ground", + "tokens": [ + 50796, + 21062, + 493, + 281, + 264, + 3263, + 295, + 264, + 12239, + 300, + 4045, + 337, + 452, + 1575, + 281, + 1190, + 926, + 365, + 452, + 1273, + 493, + 281, + 264, + 2727, + 51094 + ], + "temperature": 0.0, + "avg_logprob": -0.3369734782092976, + "compression_ratio": 1.537313432835821, + "no_speech_prob": 0.10452272742986679, + "confidence": 0.902, + "words": [ + { + "text": "Wake", + "start": 38.78, + "end": 39.26, + "confidence": 0.808 + }, + { + "text": "up", + "start": 39.26, + "end": 39.56, + "confidence": 0.996 + }, + { + "text": "to", + "start": 39.56, + "end": 39.74, + "confidence": 0.986 + }, + { + "text": "the", + "start": 39.74, + "end": 39.96, + "confidence": 0.86 + }, + { + "text": "sounds", + "start": 39.96, + "end": 40.32, + "confidence": 0.545 + }, + { + "text": "of", + "start": 40.32, + "end": 40.5, + "confidence": 0.936 + }, + { + "text": "the", + "start": 40.5, + "end": 40.62, + "confidence": 0.997 + }, + { + "text": "silence", + "start": 40.62, + "end": 41.1, + "confidence": 0.951 + }, + { + "text": "that", + "start": 41.1, + "end": 41.38, + "confidence": 0.97 + }, + { + "text": "allows", + "start": 41.38, + "end": 41.84, + "confidence": 0.894 + }, + { + "text": "for", + "start": 41.84, + "end": 42.06, + "confidence": 0.805 + }, + { + "text": "my", + "start": 42.06, + "end": 42.22, + "confidence": 0.948 + }, + { + "text": "mind", + "start": 42.22, + "end": 42.46, + "confidence": 0.977 + }, + { + "text": "to", + "start": 42.46, + "end": 42.68, + "confidence": 0.862 + }, + { + "text": "run", + "start": 42.68, + "end": 42.86, + "confidence": 0.994 + }, + { + "text": "around", + "start": 42.86, + "end": 43.3, + "confidence": 0.967 + }, + { + "text": "with", + "start": 43.3, + "end": 43.56, + "confidence": 0.61 + }, + { + "text": "my", + "start": 43.56, + "end": 43.68, + "confidence": 0.992 + }, + { + "text": "ear", + "start": 43.68, + "end": 43.96, + "confidence": 0.897 + }, + { + "text": "up", + "start": 43.96, + "end": 44.22, + "confidence": 0.988 + }, + { + "text": "to", + "start": 44.22, + "end": 44.34, + "confidence": 0.993 + }, + { + "text": "the", + "start": 44.34, + "end": 44.54, + "confidence": 0.996 + }, + { + "text": "ground", + "start": 44.54, + "end": 44.8, + "confidence": 0.995 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 45.12, + "end": 51.0, + "text": " I'm searching to be home the stories that are told when the back is to the world smiling when I turn", + "tokens": [ + 51094, + 286, + 478, + 10808, + 281, + 312, + 1280, + 264, + 3676, + 300, + 366, + 1907, + 562, + 264, + 646, + 307, + 281, + 264, + 1002, + 16005, + 562, + 286, + 1261, + 51405 + ], + "temperature": 0.0, + "avg_logprob": -0.3369734782092976, + "compression_ratio": 1.537313432835821, + "no_speech_prob": 0.10452272742986679, + "confidence": 0.749, + "words": [ + { + "text": "I'm", + "start": 45.12, + "end": 45.24, + "confidence": 0.996 + }, + { + "text": "searching", + "start": 45.24, + "end": 45.68, + "confidence": 0.984 + }, + { + "text": "to", + "start": 45.68, + "end": 45.98, + "confidence": 0.991 + }, + { + "text": "be", + "start": 45.98, + "end": 46.24, + "confidence": 0.568 + }, + { + "text": "home", + "start": 46.24, + "end": 46.54, + "confidence": 0.503 + }, + { + "text": "the", + "start": 46.54, + "end": 46.84, + "confidence": 0.668 + }, + { + "text": "stories", + "start": 46.84, + "end": 47.3, + "confidence": 0.962 + }, + { + "text": "that", + "start": 47.3, + "end": 47.56, + "confidence": 0.949 + }, + { + "text": "are", + "start": 47.56, + "end": 47.76, + "confidence": 0.462 + }, + { + "text": "told", + "start": 47.76, + "end": 48.06, + "confidence": 0.532 + }, + { + "text": "when", + "start": 48.06, + "end": 48.3, + "confidence": 0.344 + }, + { + "text": "the", + "start": 48.3, + "end": 48.52, + "confidence": 0.508 + }, + { + "text": "back", + "start": 48.52, + "end": 48.68, + "confidence": 0.907 + }, + { + "text": "is", + "start": 48.68, + "end": 48.9, + "confidence": 0.921 + }, + { + "text": "to", + "start": 48.9, + "end": 49.06, + "confidence": 0.989 + }, + { + "text": "the", + "start": 49.06, + "end": 49.28, + "confidence": 0.998 + }, + { + "text": "world", + "start": 49.28, + "end": 49.64, + "confidence": 0.995 + }, + { + "text": "smiling", + "start": 49.64, + "end": 50.32, + "confidence": 0.343 + }, + { + "text": "when", + "start": 50.32, + "end": 50.62, + "confidence": 0.952 + }, + { + "text": "I", + "start": 50.62, + "end": 50.84, + "confidence": 0.949 + }, + { + "text": "turn", + "start": 50.84, + "end": 51.0, + "confidence": 0.936 + } + ] + }, + { + "id": 4, + "seek": 6000, + "start": 60.0, + "end": 61.74, + "text": " Oh", + "tokens": [ + 50364, + 876, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.6962367466517857, + "compression_ratio": 1.5185185185185186, + "no_speech_prob": 0.10870116949081421, + "confidence": 0.603, + "words": [ + { + "text": "Oh", + "start": 60.0, + "end": 61.74, + "confidence": 0.603 + } + ] + }, + { + "id": 5, + "seek": 6000, + "start": 67.14, + "end": 69.1, + "text": " Everybody wants to be my enemy", + "tokens": [ + 50714, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.6962367466517857, + "compression_ratio": 1.5185185185185186, + "no_speech_prob": 0.10870116949081421, + "confidence": 0.86, + "words": [ + { + "text": "Everybody", + "start": 67.14, + "end": 67.74, + "confidence": 0.555 + }, + { + "text": "wants", + "start": 67.74, + "end": 68.12, + "confidence": 0.8 + }, + { + "text": "to", + "start": 68.12, + "end": 68.28, + "confidence": 0.987 + }, + { + "text": "be", + "start": 68.28, + "end": 68.4, + "confidence": 0.989 + }, + { + "text": "my", + "start": 68.4, + "end": 68.64, + "confidence": 0.985 + }, + { + "text": "enemy", + "start": 68.64, + "end": 69.1, + "confidence": 0.951 + } + ] + }, + { + "id": 6, + "seek": 6000, + "start": 71.7, + "end": 76.14, + "text": " Sympathy everybody wants to be my enemy", + "tokens": [ + 50950, + 3902, + 2455, + 9527, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 51168 + ], + "temperature": 0.0, + "avg_logprob": -0.6962367466517857, + "compression_ratio": 1.5185185185185186, + "no_speech_prob": 0.10870116949081421, + "confidence": 0.859, + "words": [ + { + "text": "Sympathy", + "start": 71.7, + "end": 73.14, + "confidence": 0.912 + }, + { + "text": "everybody", + "start": 73.14, + "end": 74.0, + "confidence": 0.492 + }, + { + "text": "wants", + "start": 74.0, + "end": 74.36, + "confidence": 0.806 + }, + { + "text": "to", + "start": 74.36, + "end": 74.62, + "confidence": 0.991 + }, + { + "text": "be", + "start": 74.62, + "end": 75.3, + "confidence": 0.966 + }, + { + "text": "my", + "start": 75.3, + "end": 75.7, + "confidence": 0.927 + }, + { + "text": "enemy", + "start": 75.7, + "end": 76.14, + "confidence": 0.952 + } + ] + }, + { + "id": 7, + "seek": 6000, + "start": 81.62, + "end": 82.58, + "text": " My enemy", + "tokens": [ + 51422, + 1222, + 5945, + 51522 + ], + "temperature": 0.0, + "avg_logprob": -0.6962367466517857, + "compression_ratio": 1.5185185185185186, + "no_speech_prob": 0.10870116949081421, + "confidence": 0.911, + "words": [ + { + "text": "My", + "start": 81.62, + "end": 82.08, + "confidence": 0.865 + }, + { + "text": "enemy", + "start": 82.08, + "end": 82.58, + "confidence": 0.959 + } + ] + }, + { + "id": 8, + "seek": 8316, + "start": 83.38, + "end": 84.32, + "text": " Oh", + "tokens": [ + 50364, + 876, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.2714941057108216, + "compression_ratio": 1.46, + "no_speech_prob": 0.029241792857646942, + "confidence": 0.474, + "words": [ + { + "text": "Oh", + "start": 83.38, + "end": 84.32, + "confidence": 0.474 + } + ] + }, + { + "id": 9, + "seek": 8316, + "start": 88.64, + "end": 94.74, + "text": " Your words up on the wall as you're praying for my phone and the laughter in the holes and the names that I've been called", + "tokens": [ + 50618, + 2260, + 2283, + 493, + 322, + 264, + 2929, + 382, + 291, + 434, + 15611, + 337, + 452, + 2593, + 293, + 264, + 13092, + 294, + 264, + 8118, + 293, + 264, + 5288, + 300, + 286, + 600, + 668, + 1219, + 50932 + ], + "temperature": 0.0, + "avg_logprob": -0.2714941057108216, + "compression_ratio": 1.46, + "no_speech_prob": 0.029241792857646942, + "confidence": 0.831, + "words": [ + { + "text": "Your", + "start": 88.64, + "end": 88.86, + "confidence": 0.884 + }, + { + "text": "words", + "start": 88.86, + "end": 89.18, + "confidence": 0.978 + }, + { + "text": "up", + "start": 89.18, + "end": 89.48, + "confidence": 0.911 + }, + { + "text": "on", + "start": 89.48, + "end": 89.6, + "confidence": 0.986 + }, + { + "text": "the", + "start": 89.6, + "end": 89.76, + "confidence": 0.99 + }, + { + "text": "wall", + "start": 89.76, + "end": 90.12, + "confidence": 0.362 + }, + { + "text": "as", + "start": 90.12, + "end": 90.36, + "confidence": 0.539 + }, + { + "text": "you're", + "start": 90.36, + "end": 90.44, + "confidence": 0.754 + }, + { + "text": "praying", + "start": 90.44, + "end": 90.86, + "confidence": 0.983 + }, + { + "text": "for", + "start": 90.86, + "end": 91.1, + "confidence": 0.965 + }, + { + "text": "my", + "start": 91.1, + "end": 91.22, + "confidence": 0.975 + }, + { + "text": "phone", + "start": 91.22, + "end": 91.72, + "confidence": 0.599 + }, + { + "text": "and", + "start": 91.72, + "end": 91.92, + "confidence": 0.796 + }, + { + "text": "the", + "start": 91.92, + "end": 92.1, + "confidence": 0.957 + }, + { + "text": "laughter", + "start": 92.1, + "end": 92.46, + "confidence": 0.986 + }, + { + "text": "in", + "start": 92.46, + "end": 92.72, + "confidence": 0.942 + }, + { + "text": "the", + "start": 92.72, + "end": 92.9, + "confidence": 0.996 + }, + { + "text": "holes", + "start": 92.9, + "end": 93.28, + "confidence": 0.728 + }, + { + "text": "and", + "start": 93.28, + "end": 93.48, + "confidence": 0.745 + }, + { + "text": "the", + "start": 93.48, + "end": 93.62, + "confidence": 0.982 + }, + { + "text": "names", + "start": 93.62, + "end": 93.82, + "confidence": 0.984 + }, + { + "text": "that", + "start": 93.82, + "end": 94.14, + "confidence": 0.867 + }, + { + "text": "I've", + "start": 94.14, + "end": 94.32, + "confidence": 0.869 + }, + { + "text": "been", + "start": 94.32, + "end": 94.52, + "confidence": 0.927 + }, + { + "text": "called", + "start": 94.52, + "end": 94.74, + "confidence": 0.636 + } + ] + }, + { + "id": 10, + "seek": 8316, + "start": 94.96, + "end": 100.1, + "text": " I stack it in my mind and I'm waiting for the time when I show you what it's like to be worse", + "tokens": [ + 50932, + 286, + 8630, + 309, + 294, + 452, + 1575, + 293, + 286, + 478, + 3806, + 337, + 264, + 565, + 562, + 286, + 855, + 291, + 437, + 309, + 311, + 411, + 281, + 312, + 5324, + 51200 + ], + "temperature": 0.0, + "avg_logprob": -0.2714941057108216, + "compression_ratio": 1.46, + "no_speech_prob": 0.029241792857646942, + "confidence": 0.888, + "words": [ + { + "text": "I", + "start": 94.96, + "end": 95.12, + "confidence": 0.837 + }, + { + "text": "stack", + "start": 95.12, + "end": 95.42, + "confidence": 0.234 + }, + { + "text": "it", + "start": 95.42, + "end": 95.7, + "confidence": 0.987 + }, + { + "text": "in", + "start": 95.7, + "end": 95.84, + "confidence": 0.996 + }, + { + "text": "my", + "start": 95.84, + "end": 96.18, + "confidence": 0.993 + }, + { + "text": "mind", + "start": 96.18, + "end": 96.46, + "confidence": 0.951 + }, + { + "text": "and", + "start": 96.46, + "end": 96.62, + "confidence": 0.756 + }, + { + "text": "I'm", + "start": 96.62, + "end": 96.88, + "confidence": 0.987 + }, + { + "text": "waiting", + "start": 96.88, + "end": 97.14, + "confidence": 0.998 + }, + { + "text": "for", + "start": 97.14, + "end": 97.36, + "confidence": 0.986 + }, + { + "text": "the", + "start": 97.36, + "end": 97.54, + "confidence": 0.997 + }, + { + "text": "time", + "start": 97.54, + "end": 97.9, + "confidence": 0.979 + }, + { + "text": "when", + "start": 97.9, + "end": 98.16, + "confidence": 0.943 + }, + { + "text": "I", + "start": 98.16, + "end": 98.26, + "confidence": 0.999 + }, + { + "text": "show", + "start": 98.26, + "end": 98.48, + "confidence": 0.981 + }, + { + "text": "you", + "start": 98.48, + "end": 98.76, + "confidence": 0.999 + }, + { + "text": "what", + "start": 98.76, + "end": 98.92, + "confidence": 0.98 + }, + { + "text": "it's", + "start": 98.92, + "end": 99.24, + "confidence": 0.991 + }, + { + "text": "like", + "start": 99.24, + "end": 99.46, + "confidence": 0.985 + }, + { + "text": "to", + "start": 99.46, + "end": 99.72, + "confidence": 0.813 + }, + { + "text": "be", + "start": 99.72, + "end": 99.84, + "confidence": 0.966 + }, + { + "text": "worse", + "start": 99.84, + "end": 100.1, + "confidence": 0.65 + } + ] + }, + { + "id": 11, + "seek": 11316, + "start": 114.54, + "end": 119.04, + "text": " Oh the misery everybody wants to be my enemy", + "tokens": [ + 50414, + 876, + 264, + 32309, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 50652 + ], + "temperature": 0.0, + "avg_logprob": -0.6831691347319504, + "compression_ratio": 1.537313432835821, + "no_speech_prob": 0.18881404399871826, + "confidence": 0.825, + "words": [ + { + "text": "Oh", + "start": 114.54, + "end": 115.28, + "confidence": 0.845 + }, + { + "text": "the", + "start": 115.28, + "end": 115.5, + "confidence": 0.506 + }, + { + "text": "misery", + "start": 115.5, + "end": 116.26, + "confidence": 0.983 + }, + { + "text": "everybody", + "start": 116.26, + "end": 117.6, + "confidence": 0.473 + }, + { + "text": "wants", + "start": 117.6, + "end": 117.96, + "confidence": 0.945 + }, + { + "text": "to", + "start": 117.96, + "end": 118.14, + "confidence": 0.988 + }, + { + "text": "be", + "start": 118.14, + "end": 118.26, + "confidence": 0.996 + }, + { + "text": "my", + "start": 118.26, + "end": 118.58, + "confidence": 0.992 + }, + { + "text": "enemy", + "start": 118.58, + "end": 119.04, + "confidence": 0.964 + } + ] + }, + { + "id": 12, + "seek": 11316, + "start": 120.52, + "end": 126.22, + "text": " Spare the sympathy everybody wants to be my enemy", + "tokens": [ + 50740, + 1738, + 543, + 264, + 33240, + 2201, + 2738, + 281, + 312, + 452, + 5945, + 51004 + ], + "temperature": 0.0, + "avg_logprob": -0.6831691347319504, + "compression_ratio": 1.537313432835821, + "no_speech_prob": 0.18881404399871826, + "confidence": 0.778, + "words": [ + { + "text": "Spare", + "start": 120.52, + "end": 121.44, + "confidence": 0.459 + }, + { + "text": "the", + "start": 121.44, + "end": 121.7, + "confidence": 0.961 + }, + { + "text": "sympathy", + "start": 121.7, + "end": 122.48, + "confidence": 0.974 + }, + { + "text": "everybody", + "start": 122.48, + "end": 123.8, + "confidence": 0.495 + }, + { + "text": "wants", + "start": 123.8, + "end": 124.18, + "confidence": 0.959 + }, + { + "text": "to", + "start": 124.18, + "end": 124.42, + "confidence": 0.996 + }, + { + "text": "be", + "start": 124.42, + "end": 124.9, + "confidence": 0.998 + }, + { + "text": "my", + "start": 124.9, + "end": 125.6, + "confidence": 0.904 + }, + { + "text": "enemy", + "start": 125.6, + "end": 126.22, + "confidence": 0.972 + } + ] + }, + { + "id": 13, + "seek": 11316, + "start": 131.54, + "end": 132.4, + "text": " My enemy", + "tokens": [ + 51260, + 1222, + 5945, + 51360 + ], + "temperature": 0.0, + "avg_logprob": -0.6831691347319504, + "compression_ratio": 1.537313432835821, + "no_speech_prob": 0.18881404399871826, + "confidence": 0.929, + "words": [ + { + "text": "My", + "start": 131.54, + "end": 131.92, + "confidence": 0.917 + }, + { + "text": "enemy", + "start": 131.92, + "end": 132.4, + "confidence": 0.941 + } + ] + }, + { + "id": 14, + "seek": 13308, + "start": 133.08, + "end": 137.12, + "text": " Look out for yourself", + "tokens": [ + 50364, + 2053, + 484, + 337, + 1803, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.635, + "words": [ + { + "text": "Look", + "start": 133.08, + "end": 136.22, + "confidence": 0.223 + }, + { + "text": "out", + "start": 136.22, + "end": 136.56, + "confidence": 0.884 + }, + { + "text": "for", + "start": 136.56, + "end": 136.74, + "confidence": 0.98 + }, + { + "text": "yourself", + "start": 136.74, + "end": 137.12, + "confidence": 0.842 + } + ] + }, + { + "id": 15, + "seek": 13308, + "start": 137.6, + "end": 140.24, + "text": " Look, okay I'm hoping that somebody pray for me", + "tokens": [ + 50564, + 2053, + 11, + 1392, + 286, + 478, + 7159, + 300, + 2618, + 3690, + 337, + 385, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.835, + "words": [ + { + "text": "Look,", + "start": 137.6, + "end": 137.88, + "confidence": 0.837 + }, + { + "text": "okay", + "start": 138.22, + "end": 138.58, + "confidence": 0.711 + }, + { + "text": "I'm", + "start": 138.58, + "end": 138.92, + "confidence": 0.7 + }, + { + "text": "hoping", + "start": 138.92, + "end": 139.16, + "confidence": 0.773 + }, + { + "text": "that", + "start": 139.16, + "end": 139.34, + "confidence": 0.964 + }, + { + "text": "somebody", + "start": 139.34, + "end": 139.58, + "confidence": 0.973 + }, + { + "text": "pray", + "start": 139.58, + "end": 139.82, + "confidence": 0.799 + }, + { + "text": "for", + "start": 139.82, + "end": 140.0, + "confidence": 0.975 + }, + { + "text": "me", + "start": 140.0, + "end": 140.24, + "confidence": 0.998 + } + ] + }, + { + "id": 16, + "seek": 13308, + "start": 140.38, + "end": 141.56, + "text": " Praying that somebody vote for me", + "tokens": [ + 50714, + 2114, + 32600, + 300, + 2618, + 4740, + 337, + 385, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.914, + "words": [ + { + "text": "Praying", + "start": 140.38, + "end": 140.68, + "confidence": 0.856 + }, + { + "text": "that", + "start": 140.68, + "end": 140.84, + "confidence": 0.978 + }, + { + "text": "somebody", + "start": 140.84, + "end": 141.1, + "confidence": 0.994 + }, + { + "text": "vote", + "start": 141.1, + "end": 141.34, + "confidence": 0.751 + }, + { + "text": "for", + "start": 141.34, + "end": 141.46, + "confidence": 0.999 + }, + { + "text": "me", + "start": 141.46, + "end": 141.56, + "confidence": 0.999 + } + ] + }, + { + "id": 17, + "seek": 13308, + "start": 141.9, + "end": 143.22, + "text": " Staying where nobody supposed to be", + "tokens": [ + 50764, + 8691, + 278, + 689, + 5079, + 3442, + 281, + 312, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.818, + "words": [ + { + "text": "Staying", + "start": 141.9, + "end": 142.24, + "confidence": 0.748 + }, + { + "text": "where", + "start": 142.24, + "end": 142.34, + "confidence": 0.909 + }, + { + "text": "nobody", + "start": 142.34, + "end": 142.62, + "confidence": 0.955 + }, + { + "text": "supposed", + "start": 142.62, + "end": 142.94, + "confidence": 0.506 + }, + { + "text": "to", + "start": 142.94, + "end": 143.1, + "confidence": 0.996 + }, + { + "text": "be", + "start": 143.1, + "end": 143.22, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 13308, + "start": 143.24, + "end": 144.98, + "text": " I propose to be in a wreck of emotions", + "tokens": [ + 50864, + 286, + 17421, + 281, + 312, + 294, + 257, + 21478, + 295, + 8462, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.611, + "words": [ + { + "text": "I", + "start": 143.24, + "end": 143.42, + "confidence": 0.209 + }, + { + "text": "propose", + "start": 143.42, + "end": 143.72, + "confidence": 0.43 + }, + { + "text": "to", + "start": 143.72, + "end": 143.88, + "confidence": 0.836 + }, + { + "text": "be", + "start": 143.88, + "end": 144.02, + "confidence": 0.517 + }, + { + "text": "in", + "start": 144.02, + "end": 144.2, + "confidence": 0.864 + }, + { + "text": "a", + "start": 144.2, + "end": 144.28, + "confidence": 0.55 + }, + { + "text": "wreck", + "start": 144.28, + "end": 144.4, + "confidence": 0.69 + }, + { + "text": "of", + "start": 144.4, + "end": 144.54, + "confidence": 0.978 + }, + { + "text": "emotions", + "start": 144.54, + "end": 144.98, + "confidence": 0.947 + } + ] + }, + { + "id": 19, + "seek": 13308, + "start": 145.06, + "end": 146.32, + "text": " Ready to go whenever you let me know", + "tokens": [ + 50964, + 9944, + 281, + 352, + 5699, + 291, + 718, + 385, + 458, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.868, + "words": [ + { + "text": "Ready", + "start": 145.06, + "end": 145.3, + "confidence": 0.975 + }, + { + "text": "to", + "start": 145.3, + "end": 145.46, + "confidence": 0.996 + }, + { + "text": "go", + "start": 145.46, + "end": 145.6, + "confidence": 0.995 + }, + { + "text": "whenever", + "start": 145.6, + "end": 145.8, + "confidence": 0.84 + }, + { + "text": "you", + "start": 145.8, + "end": 145.94, + "confidence": 0.431 + }, + { + "text": "let", + "start": 145.94, + "end": 146.08, + "confidence": 0.945 + }, + { + "text": "me", + "start": 146.08, + "end": 146.2, + "confidence": 0.997 + }, + { + "text": "know", + "start": 146.2, + "end": 146.32, + "confidence": 0.98 + } + ] + }, + { + "id": 20, + "seek": 13308, + "start": 146.36, + "end": 147.86, + "text": " The road is long so put the pedal into the flow", + "tokens": [ + 51014, + 440, + 3060, + 307, + 938, + 370, + 829, + 264, + 19122, + 666, + 264, + 3095, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.944, + "words": [ + { + "text": "The", + "start": 146.36, + "end": 146.52, + "confidence": 0.962 + }, + { + "text": "road", + "start": 146.52, + "end": 146.68, + "confidence": 0.99 + }, + { + "text": "is", + "start": 146.68, + "end": 146.76, + "confidence": 0.987 + }, + { + "text": "long", + "start": 146.76, + "end": 146.9, + "confidence": 0.971 + }, + { + "text": "so", + "start": 146.9, + "end": 147.04, + "confidence": 0.853 + }, + { + "text": "put", + "start": 147.04, + "end": 147.16, + "confidence": 0.948 + }, + { + "text": "the", + "start": 147.16, + "end": 147.28, + "confidence": 0.947 + }, + { + "text": "pedal", + "start": 147.28, + "end": 147.4, + "confidence": 0.993 + }, + { + "text": "into", + "start": 147.4, + "end": 147.6, + "confidence": 0.907 + }, + { + "text": "the", + "start": 147.6, + "end": 147.74, + "confidence": 0.988 + }, + { + "text": "flow", + "start": 147.74, + "end": 147.86, + "confidence": 0.851 + } + ] + }, + { + "id": 21, + "seek": 13308, + "start": 147.92, + "end": 149.8, + "text": " The enemy on my trail, my energy unavailable", + "tokens": [ + 51114, + 440, + 5945, + 322, + 452, + 9924, + 11, + 452, + 2281, + 36541, + 32699, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.968, + "words": [ + { + "text": "The", + "start": 147.92, + "end": 148.1, + "confidence": 0.931 + }, + { + "text": "enemy", + "start": 148.1, + "end": 148.28, + "confidence": 0.942 + }, + { + "text": "on", + "start": 148.28, + "end": 148.4, + "confidence": 0.964 + }, + { + "text": "my", + "start": 148.4, + "end": 148.5, + "confidence": 0.966 + }, + { + "text": "trail,", + "start": 148.5, + "end": 148.64, + "confidence": 0.983 + }, + { + "text": "my", + "start": 148.7, + "end": 148.88, + "confidence": 0.985 + }, + { + "text": "energy", + "start": 148.88, + "end": 149.1, + "confidence": 0.993 + }, + { + "text": "unavailable", + "start": 149.1, + "end": 149.8, + "confidence": 0.976 + } + ] + }, + { + "id": 22, + "seek": 13308, + "start": 150.02, + "end": 151.18, + "text": " I'ma tell them I still a way go", + "tokens": [ + 51214, + 286, + 478, + 64, + 980, + 552, + 286, + 920, + 257, + 636, + 352, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.523, + "words": [ + { + "text": "I'ma", + "start": 150.02, + "end": 150.26, + "confidence": 0.73 + }, + { + "text": "tell", + "start": 150.26, + "end": 150.36, + "confidence": 0.987 + }, + { + "text": "them", + "start": 150.36, + "end": 150.46, + "confidence": 0.366 + }, + { + "text": "I", + "start": 150.46, + "end": 150.62, + "confidence": 0.431 + }, + { + "text": "still", + "start": 150.62, + "end": 150.78, + "confidence": 0.282 + }, + { + "text": "a", + "start": 150.78, + "end": 150.94, + "confidence": 0.19 + }, + { + "text": "way", + "start": 150.94, + "end": 151.04, + "confidence": 0.775 + }, + { + "text": "go", + "start": 151.04, + "end": 151.18, + "confidence": 0.61 + } + ] + }, + { + "id": 23, + "seek": 13308, + "start": 151.18, + "end": 152.56, + "text": " Way when I'm plotting, I drive to the top", + "tokens": [ + 51264, + 9558, + 562, + 286, + 478, + 41178, + 11, + 286, + 3332, + 281, + 264, + 1192, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.561, + "words": [ + { + "text": "Way", + "start": 151.18, + "end": 151.38, + "confidence": 0.241 + }, + { + "text": "when", + "start": 151.38, + "end": 151.56, + "confidence": 0.471 + }, + { + "text": "I'm", + "start": 151.56, + "end": 151.7, + "confidence": 0.581 + }, + { + "text": "plotting,", + "start": 151.7, + "end": 151.82, + "confidence": 0.363 + }, + { + "text": "I", + "start": 151.88, + "end": 151.98, + "confidence": 0.563 + }, + { + "text": "drive", + "start": 151.98, + "end": 152.16, + "confidence": 0.404 + }, + { + "text": "to", + "start": 152.16, + "end": 152.32, + "confidence": 0.982 + }, + { + "text": "the", + "start": 152.32, + "end": 152.42, + "confidence": 0.994 + }, + { + "text": "top", + "start": 152.42, + "end": 152.56, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 13308, + "start": 152.56, + "end": 154.3, + "text": " I been out of shape, thinking that I'm a box, I'm an astronaut", + "tokens": [ + 51314, + 286, + 668, + 484, + 295, + 3909, + 11, + 1953, + 300, + 286, + 478, + 257, + 2424, + 11, + 286, + 478, + 364, + 18516, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.812, + "words": [ + { + "text": "I", + "start": 152.56, + "end": 152.7, + "confidence": 0.983 + }, + { + "text": "been", + "start": 152.7, + "end": 152.76, + "confidence": 0.526 + }, + { + "text": "out", + "start": 152.76, + "end": 152.9, + "confidence": 0.848 + }, + { + "text": "of", + "start": 152.9, + "end": 153.0, + "confidence": 0.972 + }, + { + "text": "shape,", + "start": 153.0, + "end": 153.12, + "confidence": 0.996 + }, + { + "text": "thinking", + "start": 153.18, + "end": 153.32, + "confidence": 0.725 + }, + { + "text": "that", + "start": 153.32, + "end": 153.48, + "confidence": 0.434 + }, + { + "text": "I'm", + "start": 153.48, + "end": 153.62, + "confidence": 0.796 + }, + { + "text": "a", + "start": 153.62, + "end": 153.68, + "confidence": 0.725 + }, + { + "text": "box,", + "start": 153.68, + "end": 153.76, + "confidence": 0.798 + }, + { + "text": "I'm", + "start": 153.82, + "end": 153.92, + "confidence": 0.966 + }, + { + "text": "an", + "start": 153.92, + "end": 154.08, + "confidence": 0.984 + }, + { + "text": "astronaut", + "start": 154.08, + "end": 154.3, + "confidence": 0.972 + } + ] + }, + { + "id": 25, + "seek": 13308, + "start": 154.5, + "end": 156.38, + "text": " Blasted off the planet, rocked the cars, catastrophic", + "tokens": [ + 51414, + 2177, + 34440, + 766, + 264, + 5054, + 11, + 3727, + 292, + 264, + 5163, + 11, + 34915, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.771, + "words": [ + { + "text": "Blasted", + "start": 154.5, + "end": 154.82, + "confidence": 0.741 + }, + { + "text": "off", + "start": 154.82, + "end": 155.02, + "confidence": 0.967 + }, + { + "text": "the", + "start": 155.02, + "end": 155.18, + "confidence": 0.949 + }, + { + "text": "planet,", + "start": 155.18, + "end": 155.36, + "confidence": 0.992 + }, + { + "text": "rocked", + "start": 155.44, + "end": 155.68, + "confidence": 0.726 + }, + { + "text": "the", + "start": 155.68, + "end": 155.74, + "confidence": 0.855 + }, + { + "text": "cars,", + "start": 155.74, + "end": 155.88, + "confidence": 0.438 + }, + { + "text": "catastrophic", + "start": 155.9, + "end": 156.38, + "confidence": 0.747 + } + ] + }, + { + "id": 26, + "seek": 13308, + "start": 156.42, + "end": 158.11, + "text": " And it matters more because I had it not had", + "tokens": [ + 51514, + 400, + 309, + 7001, + 544, + 570, + 286, + 632, + 309, + 406, + 632, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.758, + "words": [ + { + "text": "And", + "start": 156.42, + "end": 156.62, + "confidence": 0.816 + }, + { + "text": "it", + "start": 156.62, + "end": 156.76, + "confidence": 0.519 + }, + { + "text": "matters", + "start": 156.76, + "end": 156.94, + "confidence": 0.994 + }, + { + "text": "more", + "start": 156.94, + "end": 157.2, + "confidence": 0.993 + }, + { + "text": "because", + "start": 157.2, + "end": 157.42, + "confidence": 0.831 + }, + { + "text": "I", + "start": 157.42, + "end": 157.56, + "confidence": 0.939 + }, + { + "text": "had", + "start": 157.56, + "end": 157.68, + "confidence": 0.965 + }, + { + "text": "it", + "start": 157.68, + "end": 157.82, + "confidence": 0.984 + }, + { + "text": "not", + "start": 157.82, + "end": 157.92, + "confidence": 0.306 + }, + { + "text": "had", + "start": 157.92, + "end": 158.11, + "confidence": 0.659 + } + ] + }, + { + "id": 27, + "seek": 13308, + "start": 158.11, + "end": 159.84, + "text": " I thought about wreaking havoc on an opposition", + "tokens": [ + 51614, + 286, + 1194, + 466, + 46674, + 2456, + 47367, + 322, + 364, + 13504, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.963, + "words": [ + { + "text": "I", + "start": 158.11, + "end": 158.2, + "confidence": 0.98 + }, + { + "text": "thought", + "start": 158.2, + "end": 158.4, + "confidence": 0.963 + }, + { + "text": "about", + "start": 158.4, + "end": 158.58, + "confidence": 0.97 + }, + { + "text": "wreaking", + "start": 158.58, + "end": 158.86, + "confidence": 0.992 + }, + { + "text": "havoc", + "start": 158.86, + "end": 159.1, + "confidence": 1.0 + }, + { + "text": "on", + "start": 159.1, + "end": 159.32, + "confidence": 0.914 + }, + { + "text": "an", + "start": 159.32, + "end": 159.46, + "confidence": 0.876 + }, + { + "text": "opposition", + "start": 159.46, + "end": 159.84, + "confidence": 0.989 + } + ] + }, + { + "id": 28, + "seek": 13308, + "start": 159.84, + "end": 161.56, + "text": " Kinda shocking, they want it static with precision", + "tokens": [ + 51714, + 35553, + 18776, + 11, + 436, + 528, + 309, + 13437, + 365, + 18356, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.30687705002262095, + "compression_ratio": 1.7378378378378379, + "no_speech_prob": 0.20625492930412292, + "confidence": 0.605, + "words": [ + { + "text": "Kinda", + "start": 159.84, + "end": 160.1, + "confidence": 0.521 + }, + { + "text": "shocking,", + "start": 160.1, + "end": 160.4, + "confidence": 0.604 + }, + { + "text": "they", + "start": 160.52, + "end": 160.6, + "confidence": 0.47 + }, + { + "text": "want", + "start": 160.6, + "end": 160.74, + "confidence": 0.652 + }, + { + "text": "it", + "start": 160.74, + "end": 160.88, + "confidence": 0.451 + }, + { + "text": "static", + "start": 160.88, + "end": 161.04, + "confidence": 0.991 + }, + { + "text": "with", + "start": 161.04, + "end": 161.28, + "confidence": 0.482 + }, + { + "text": "precision", + "start": 161.28, + "end": 161.56, + "confidence": 0.857 + } + ] + }, + { + "id": 29, + "seek": 16108, + "start": 161.66, + "end": 163.46, + "text": " I'm automatic, quarterback, I ain't talking sack and packet", + "tokens": [ + 50364, + 286, + 478, + 12509, + 11, + 31952, + 11, + 286, + 7862, + 380, + 1417, + 33209, + 293, + 20300, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.732, + "words": [ + { + "text": "I'm", + "start": 161.66, + "end": 161.86, + "confidence": 0.949 + }, + { + "text": "automatic,", + "start": 161.86, + "end": 162.22, + "confidence": 0.979 + }, + { + "text": "quarterback,", + "start": 162.56, + "end": 162.58, + "confidence": 0.68 + }, + { + "text": "I", + "start": 162.68, + "end": 162.78, + "confidence": 0.751 + }, + { + "text": "ain't", + "start": 162.78, + "end": 162.9, + "confidence": 0.962 + }, + { + "text": "talking", + "start": 162.9, + "end": 163.1, + "confidence": 0.722 + }, + { + "text": "sack", + "start": 163.1, + "end": 163.34, + "confidence": 0.381 + }, + { + "text": "and", + "start": 163.34, + "end": 163.44, + "confidence": 0.534 + }, + { + "text": "packet", + "start": 163.44, + "end": 163.46, + "confidence": 0.525 + } + ] + }, + { + "id": 30, + "seek": 16108, + "start": 163.62, + "end": 165.21, + "text": " Pack it up, I don't panic, batter, batter up", + "tokens": [ + 50464, + 18466, + 309, + 493, + 11, + 286, + 500, + 380, + 14783, + 11, + 4220, + 11, + 4220, + 493, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.906, + "words": [ + { + "text": "Pack", + "start": 163.62, + "end": 163.86, + "confidence": 0.984 + }, + { + "text": "it", + "start": 163.86, + "end": 164.0, + "confidence": 0.918 + }, + { + "text": "up,", + "start": 164.0, + "end": 164.12, + "confidence": 0.978 + }, + { + "text": "I", + "start": 164.12, + "end": 164.2, + "confidence": 0.944 + }, + { + "text": "don't", + "start": 164.2, + "end": 164.36, + "confidence": 0.881 + }, + { + "text": "panic,", + "start": 164.36, + "end": 164.56, + "confidence": 0.997 + }, + { + "text": "batter,", + "start": 164.76, + "end": 164.88, + "confidence": 0.77 + }, + { + "text": "batter", + "start": 164.92, + "end": 165.06, + "confidence": 0.963 + }, + { + "text": "up", + "start": 165.06, + "end": 165.21, + "confidence": 0.783 + } + ] + }, + { + "id": 31, + "seek": 16108, + "start": 165.21, + "end": 166.76, + "text": " Who the baddest, it don't matter cause we it's your", + "tokens": [ + 50564, + 2102, + 264, + 1578, + 23748, + 11, + 309, + 500, + 380, + 1871, + 3082, + 321, + 309, + 311, + 428, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.754, + "words": [ + { + "text": "Who", + "start": 165.21, + "end": 165.4, + "confidence": 0.993 + }, + { + "text": "the", + "start": 165.4, + "end": 165.54, + "confidence": 0.942 + }, + { + "text": "baddest,", + "start": 165.54, + "end": 165.76, + "confidence": 0.912 + }, + { + "text": "it", + "start": 165.8, + "end": 165.9, + "confidence": 0.992 + }, + { + "text": "don't", + "start": 165.9, + "end": 166.04, + "confidence": 0.996 + }, + { + "text": "matter", + "start": 166.04, + "end": 166.18, + "confidence": 0.998 + }, + { + "text": "cause", + "start": 166.18, + "end": 166.38, + "confidence": 0.372 + }, + { + "text": "we", + "start": 166.38, + "end": 166.56, + "confidence": 0.952 + }, + { + "text": "it's", + "start": 166.56, + "end": 166.66, + "confidence": 0.366 + }, + { + "text": "your", + "start": 166.66, + "end": 166.76, + "confidence": 0.702 + } + ] + }, + { + "id": 32, + "seek": 16108, + "start": 166.88, + "end": 168.94, + "text": " Everybody wants to be my enemy", + "tokens": [ + 50664, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.971, + "words": [ + { + "text": "Everybody", + "start": 166.88, + "end": 167.44, + "confidence": 0.905 + }, + { + "text": "wants", + "start": 167.44, + "end": 167.82, + "confidence": 0.969 + }, + { + "text": "to", + "start": 167.82, + "end": 167.98, + "confidence": 0.996 + }, + { + "text": "be", + "start": 167.98, + "end": 168.16, + "confidence": 0.997 + }, + { + "text": "my", + "start": 168.16, + "end": 168.5, + "confidence": 0.994 + }, + { + "text": "enemy", + "start": 168.5, + "end": 168.94, + "confidence": 0.967 + } + ] + }, + { + "id": 33, + "seek": 16108, + "start": 170.58, + "end": 172.2, + "text": " Spare the sympathy", + "tokens": [ + 50764, + 1738, + 543, + 264, + 33240, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.85, + "words": [ + { + "text": "Spare", + "start": 170.58, + "end": 171.28, + "confidence": 0.75 + }, + { + "text": "the", + "start": 171.28, + "end": 171.54, + "confidence": 0.964 + }, + { + "text": "sympathy", + "start": 171.54, + "end": 172.2, + "confidence": 0.965 + } + ] + }, + { + "id": 34, + "seek": 16108, + "start": 173.08, + "end": 176.16, + "text": " Everybody wants to be my enemy", + "tokens": [ + 50914, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.968, + "words": [ + { + "text": "Everybody", + "start": 173.08, + "end": 173.7, + "confidence": 0.996 + }, + { + "text": "wants", + "start": 173.7, + "end": 174.06, + "confidence": 0.991 + }, + { + "text": "to", + "start": 174.06, + "end": 174.32, + "confidence": 0.999 + }, + { + "text": "be", + "start": 174.32, + "end": 174.64, + "confidence": 0.999 + }, + { + "text": "my", + "start": 174.64, + "end": 175.46, + "confidence": 0.839 + }, + { + "text": "enemy", + "start": 175.46, + "end": 176.16, + "confidence": 0.996 + } + ] + }, + { + "id": 35, + "seek": 16108, + "start": 176.9, + "end": 178.28, + "text": " Oh, the misery", + "tokens": [ + 51114, + 876, + 11, + 264, + 32309, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.911, + "words": [ + { + "text": "Oh,", + "start": 176.9, + "end": 177.48, + "confidence": 0.782 + }, + { + "text": "the", + "start": 177.54, + "end": 177.76, + "confidence": 0.969 + }, + { + "text": "misery", + "start": 177.76, + "end": 178.28, + "confidence": 0.999 + } + ] + }, + { + "id": 36, + "seek": 16108, + "start": 179.28, + "end": 181.6, + "text": " Everybody wants to be my enemy", + "tokens": [ + 51214, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.998, + "words": [ + { + "text": "Everybody", + "start": 179.28, + "end": 179.92, + "confidence": 0.997 + }, + { + "text": "wants", + "start": 179.92, + "end": 180.28, + "confidence": 0.994 + }, + { + "text": "to", + "start": 180.28, + "end": 180.44, + "confidence": 1.0 + }, + { + "text": "be", + "start": 180.44, + "end": 180.62, + "confidence": 0.999 + }, + { + "text": "my", + "start": 180.62, + "end": 180.98, + "confidence": 0.999 + }, + { + "text": "enemy", + "start": 180.98, + "end": 181.6, + "confidence": 0.999 + } + ] + }, + { + "id": 37, + "seek": 16108, + "start": 183.06, + "end": 184.48, + "text": " Spare the sympathy", + "tokens": [ + 51414, + 1738, + 543, + 264, + 33240, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.999, + "words": [ + { + "text": "Spare", + "start": 183.06, + "end": 183.76, + "confidence": 1.0 + }, + { + "text": "the", + "start": 183.76, + "end": 184.08, + "confidence": 0.999 + }, + { + "text": "sympathy", + "start": 184.08, + "end": 184.48, + "confidence": 0.999 + } + ] + }, + { + "id": 38, + "seek": 16108, + "start": 185.48, + "end": 188.38, + "text": " Everybody wants to be my enemy", + "tokens": [ + 51514, + 7646, + 2738, + 281, + 312, + 452, + 5945, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.18466945570342394, + "compression_ratio": 2.0683229813664594, + "no_speech_prob": 0.3935411274433136, + "confidence": 0.993, + "words": [ + { + "text": "Everybody", + "start": 185.48, + "end": 186.16, + "confidence": 0.993 + }, + { + "text": "wants", + "start": 186.16, + "end": 186.5, + "confidence": 0.994 + }, + { + "text": "to", + "start": 186.5, + "end": 186.78, + "confidence": 1.0 + }, + { + "text": "be", + "start": 186.78, + "end": 187.38, + "confidence": 1.0 + }, + { + "text": "my", + "start": 187.38, + "end": 187.94, + "confidence": 0.972 + }, + { + "text": "enemy", + "start": 187.94, + "end": 188.38, + "confidence": 0.999 + } + ] + }, + { + "id": 39, + "seek": 18808, + "start": 188.78, + "end": 192.06, + "text": " My enemy, I swear, I'll never be a saint", + "tokens": [ + 50364, + 1222, + 5945, + 11, + 286, + 11902, + 11, + 286, + 603, + 1128, + 312, + 257, + 28374, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.33241142545427593, + "compression_ratio": 1.4634146341463414, + "no_speech_prob": 0.3475542664527893, + "confidence": 0.553, + "words": [ + { + "text": "My", + "start": 188.78, + "end": 189.2, + "confidence": 0.117 + }, + { + "text": "enemy,", + "start": 189.2, + "end": 189.74, + "confidence": 0.88 + }, + { + "text": "I", + "start": 189.74, + "end": 189.76, + "confidence": 0.396 + }, + { + "text": "swear,", + "start": 189.76, + "end": 190.3, + "confidence": 0.952 + }, + { + "text": "I'll", + "start": 190.82, + "end": 191.32, + "confidence": 0.699 + }, + { + "text": "never", + "start": 191.32, + "end": 191.34, + "confidence": 0.984 + }, + { + "text": "be", + "start": 191.34, + "end": 191.7, + "confidence": 0.98 + }, + { + "text": "a", + "start": 191.7, + "end": 191.9, + "confidence": 0.261 + }, + { + "text": "saint", + "start": 191.9, + "end": 192.06, + "confidence": 0.565 + } + ] + }, + { + "id": 40, + "seek": 18808, + "start": 192.14, + "end": 194.76, + "text": " No way, my enemy", + "tokens": [ + 50564, + 883, + 636, + 11, + 452, + 5945, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.33241142545427593, + "compression_ratio": 1.4634146341463414, + "no_speech_prob": 0.3475542664527893, + "confidence": 0.739, + "words": [ + { + "text": "No", + "start": 192.14, + "end": 192.68, + "confidence": 0.336 + }, + { + "text": "way,", + "start": 192.68, + "end": 193.12, + "confidence": 0.94 + }, + { + "text": "my", + "start": 193.88, + "end": 194.16, + "confidence": 0.96 + }, + { + "text": "enemy", + "start": 194.16, + "end": 194.76, + "confidence": 0.984 + } + ] + }, + { + "id": 41, + "seek": 18808, + "start": 194.84, + "end": 198.34, + "text": " My enemy, I swear, I'll never be a saint", + "tokens": [ + 50714, + 1222, + 5945, + 11, + 286, + 11902, + 11, + 286, + 603, + 1128, + 312, + 257, + 28374, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.33241142545427593, + "compression_ratio": 1.4634146341463414, + "no_speech_prob": 0.3475542664527893, + "confidence": 0.987, + "words": [ + { + "text": "My", + "start": 194.84, + "end": 195.48, + "confidence": 0.916 + }, + { + "text": "enemy,", + "start": 195.48, + "end": 195.88, + "confidence": 0.993 + }, + { + "text": "I", + "start": 196.32, + "end": 196.36, + "confidence": 0.988 + }, + { + "text": "swear,", + "start": 196.36, + "end": 196.82, + "confidence": 0.995 + }, + { + "text": "I'll", + "start": 197.12, + "end": 197.3, + "confidence": 0.995 + }, + { + "text": "never", + "start": 197.3, + "end": 197.56, + "confidence": 0.998 + }, + { + "text": "be", + "start": 197.56, + "end": 197.86, + "confidence": 0.999 + }, + { + "text": "a", + "start": 197.86, + "end": 198.1, + "confidence": 0.995 + }, + { + "text": "saint", + "start": 198.1, + "end": 198.34, + "confidence": 0.996 + } + ] + }, + { + "id": 42, + "seek": 18808, + "start": 198.54, + "end": 199.42, + "text": " Look out for yourself", + "tokens": [ + 50864, + 2053, + 484, + 337, + 1803, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.33241142545427593, + "compression_ratio": 1.4634146341463414, + "no_speech_prob": 0.3475542664527893, + "confidence": 0.633, + "words": [ + { + "text": "Look", + "start": 198.54, + "end": 198.74, + "confidence": 0.234 + }, + { + "text": "out", + "start": 198.74, + "end": 198.94, + "confidence": 0.945 + }, + { + "text": "for", + "start": 198.94, + "end": 199.06, + "confidence": 0.923 + }, + { + "text": "yourself", + "start": 199.06, + "end": 199.42, + "confidence": 0.788 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/random.nocond_apollo11.mp3.words.json b/tests/expected/corner_cases/random.nocond_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..d5cba98964fcb9713cfc544fb5ed932a510d0dd6 --- /dev/null +++ b/tests/expected/corner_cases/random.nocond_apollo11.mp3.words.json @@ -0,0 +1,2037 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like the... Yeah, I'll put that there, my friend. They make the one that's on the helmet we were going to have in B1. And you can put the other one on the mic helmet with those GVA flipper strings. I got them. I got them. I got them. They're the better helmet that B1 has. They got the one mic down. They go in there and we fix it. We got them in there helmet bags. And we get them in the helmet bags. At least this one, at least the bag. Right here. Right here. We're thinking they could be on it. Yeah, we're thinking they could be on it. There you go. We were going to hang with the cover. I tried it already. Okay, fine. We weren't sure that. Just a suggestion. We thought we'd say you could check it out. I'm like you were already done that. So I guess we're going to come up with this. Let us know. Okay, no problem. Okay, no problem. We'll let you know when the end of the session. Bye.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 6.54, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.1, + "avg_logprob": -0.7578097025553385, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.541, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.88, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.88, + "end": 1.26, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.72, + "end": 1.94, + "confidence": 0.518 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.823 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.968 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.946 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.909 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.971 + }, + { + "text": "Soyuz-VA", + "start": 3.92, + "end": 5.16, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.16, + "end": 5.74, + "confidence": 0.475 + }, + { + "text": "GVA.", + "start": 5.74, + "end": 6.54, + "confidence": 0.435 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 13.48, + "text": " Alright, okay, we like the...", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 11, + 321, + 411, + 264, + 485, + 51014 + ], + "temperature": 0.1, + "avg_logprob": -0.7578097025553385, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.412, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 11.04, + "confidence": 0.31 + }, + { + "text": "okay,", + "start": 11.68, + "end": 12.22, + "confidence": 0.504 + }, + { + "text": "we", + "start": 12.5, + "end": 12.98, + "confidence": 0.609 + }, + { + "text": "like", + "start": 12.98, + "end": 13.26, + "confidence": 0.505 + }, + { + "text": "the...", + "start": 13.26, + "end": 13.48, + "confidence": 0.247 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 13.48, + "end": 14.6, + "text": " Yeah, I'll put that there, my friend.", + "tokens": [ + 51014, + 865, + 11, + 286, + 603, + 829, + 300, + 456, + 11, + 452, + 1277, + 13, + 51114 + ], + "temperature": 0.1, + "avg_logprob": -0.7578097025553385, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.218, + "words": [ + { + "text": "Yeah,", + "start": 13.48, + "end": 13.5, + "confidence": 0.281 + }, + { + "text": "I'll", + "start": 13.5, + "end": 13.78, + "confidence": 0.195 + }, + { + "text": "put", + "start": 13.78, + "end": 13.96, + "confidence": 0.206 + }, + { + "text": "that", + "start": 13.96, + "end": 14.12, + "confidence": 0.86 + }, + { + "text": "there,", + "start": 14.12, + "end": 14.38, + "confidence": 0.325 + }, + { + "text": "my", + "start": 14.4, + "end": 14.54, + "confidence": 0.095 + }, + { + "text": "friend.", + "start": 14.54, + "end": 14.6, + "confidence": 0.088 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 14.6, + "end": 19.08, + "text": " They make the one that's on the helmet we were going to have in B1.", + "tokens": [ + 51114, + 814, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.1, + "avg_logprob": -0.7578097025553385, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.6, + "words": [ + { + "text": "They", + "start": 14.6, + "end": 15.46, + "confidence": 0.366 + }, + { + "text": "make", + "start": 15.46, + "end": 15.68, + "confidence": 0.458 + }, + { + "text": "the", + "start": 15.68, + "end": 15.84, + "confidence": 0.354 + }, + { + "text": "one", + "start": 15.84, + "end": 16.06, + "confidence": 0.71 + }, + { + "text": "that's", + "start": 16.06, + "end": 16.28, + "confidence": 0.572 + }, + { + "text": "on", + "start": 16.28, + "end": 16.48, + "confidence": 0.636 + }, + { + "text": "the", + "start": 16.48, + "end": 16.78, + "confidence": 0.858 + }, + { + "text": "helmet", + "start": 16.78, + "end": 17.26, + "confidence": 0.892 + }, + { + "text": "we", + "start": 17.26, + "end": 17.52, + "confidence": 0.185 + }, + { + "text": "were", + "start": 17.52, + "end": 17.78, + "confidence": 0.529 + }, + { + "text": "going", + "start": 17.78, + "end": 17.92, + "confidence": 0.55 + }, + { + "text": "to", + "start": 17.92, + "end": 17.98, + "confidence": 0.984 + }, + { + "text": "have", + "start": 17.98, + "end": 18.2, + "confidence": 0.958 + }, + { + "text": "in", + "start": 18.2, + "end": 18.38, + "confidence": 0.844 + }, + { + "text": "B1.", + "start": 18.38, + "end": 19.08, + "confidence": 0.765 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 20.12, + "end": 24.52, + "text": " And you can put the other one on the mic helmet with those GVA flipper strings.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 932, + 15124, + 13985, + 13, + 51614 + ], + "temperature": 0.1, + "avg_logprob": -0.7578097025553385, + "compression_ratio": 1.4202898550724639, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.468, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.16, + "confidence": 0.534 + }, + { + "text": "you", + "start": 20.16, + "end": 20.32, + "confidence": 0.942 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.74 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.992 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.99 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.993 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.982 + }, + { + "text": "on", + "start": 21.18, + "end": 21.42, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.42, + "end": 21.94, + "confidence": 0.461 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.48, + "confidence": 0.384 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.8, + "confidence": 0.89 + }, + { + "text": "with", + "start": 22.8, + "end": 23.06, + "confidence": 0.436 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.45 + }, + { + "text": "GVA", + "start": 23.3, + "end": 23.74, + "confidence": 0.222 + }, + { + "text": "flipper", + "start": 23.74, + "end": 24.18, + "confidence": 0.103 + }, + { + "text": "strings.", + "start": 24.18, + "end": 24.52, + "confidence": 0.207 + } + ] + }, + { + "id": 5, + "seek": 2500, + "start": 25.0, + "end": 32.12, + "text": " I got them.", + "tokens": [ + 50364, + 286, + 658, + 552, + 13, + 50714 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.311, + "words": [ + { + "text": "I", + "start": 25.0, + "end": 27.3, + "confidence": 0.083 + }, + { + "text": "got", + "start": 27.3, + "end": 31.82, + "confidence": 0.79 + }, + { + "text": "them.", + "start": 31.82, + "end": 32.12, + "confidence": 0.458 + } + ] + }, + { + "id": 6, + "seek": 2500, + "start": 32.62, + "end": 33.36, + "text": " I got them.", + "tokens": [ + 50714, + 286, + 658, + 552, + 13, + 50764 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.907, + "words": [ + { + "text": "I", + "start": 32.62, + "end": 32.88, + "confidence": 0.841 + }, + { + "text": "got", + "start": 32.88, + "end": 33.18, + "confidence": 0.971 + }, + { + "text": "them.", + "start": 33.18, + "end": 33.36, + "confidence": 0.914 + } + ] + }, + { + "id": 7, + "seek": 2500, + "start": 33.36, + "end": 33.46, + "text": " I got them.", + "tokens": [ + 50764, + 286, + 658, + 552, + 13, + 50814 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.704, + "words": [ + { + "text": "I", + "start": 33.36, + "end": 33.38, + "confidence": 0.405 + }, + { + "text": "got", + "start": 33.38, + "end": 33.4, + "confidence": 0.923 + }, + { + "text": "them.", + "start": 33.4, + "end": 33.46, + "confidence": 0.932 + } + ] + }, + { + "id": 8, + "seek": 2500, + "start": 33.5, + "end": 35.3, + "text": " They're the better helmet that B1 has.", + "tokens": [ + 50814, + 814, + 434, + 264, + 1101, + 15922, + 300, + 363, + 16, + 575, + 13, + 50914 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.585, + "words": [ + { + "text": "They're", + "start": 33.5, + "end": 33.52, + "confidence": 0.545 + }, + { + "text": "the", + "start": 33.52, + "end": 33.58, + "confidence": 0.576 + }, + { + "text": "better", + "start": 33.58, + "end": 33.76, + "confidence": 0.671 + }, + { + "text": "helmet", + "start": 33.76, + "end": 34.2, + "confidence": 0.769 + }, + { + "text": "that", + "start": 34.2, + "end": 34.42, + "confidence": 0.428 + }, + { + "text": "B1", + "start": 34.42, + "end": 34.92, + "confidence": 0.492 + }, + { + "text": "has.", + "start": 34.92, + "end": 35.3, + "confidence": 0.875 + } + ] + }, + { + "id": 9, + "seek": 2500, + "start": 35.5, + "end": 36.38, + "text": " They got the one mic down.", + "tokens": [ + 50914, + 814, + 658, + 264, + 472, + 3123, + 760, + 13, + 51014 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.485, + "words": [ + { + "text": "They", + "start": 35.5, + "end": 35.52, + "confidence": 0.461 + }, + { + "text": "got", + "start": 35.52, + "end": 35.58, + "confidence": 0.46 + }, + { + "text": "the", + "start": 35.58, + "end": 35.68, + "confidence": 0.593 + }, + { + "text": "one", + "start": 35.68, + "end": 35.78, + "confidence": 0.885 + }, + { + "text": "mic", + "start": 35.78, + "end": 36.06, + "confidence": 0.296 + }, + { + "text": "down.", + "start": 36.06, + "end": 36.38, + "confidence": 0.393 + } + ] + }, + { + "id": 10, + "seek": 2500, + "start": 37.62, + "end": 39.26, + "text": " They go in there and we fix it.", + "tokens": [ + 51014, + 814, + 352, + 294, + 456, + 293, + 321, + 3191, + 309, + 13, + 51114 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.292, + "words": [ + { + "text": "They", + "start": 37.62, + "end": 37.82, + "confidence": 0.342 + }, + { + "text": "go", + "start": 37.82, + "end": 38.06, + "confidence": 0.345 + }, + { + "text": "in", + "start": 38.06, + "end": 38.24, + "confidence": 0.356 + }, + { + "text": "there", + "start": 38.24, + "end": 38.36, + "confidence": 0.574 + }, + { + "text": "and", + "start": 38.36, + "end": 38.48, + "confidence": 0.228 + }, + { + "text": "we", + "start": 38.48, + "end": 38.74, + "confidence": 0.179 + }, + { + "text": "fix", + "start": 38.74, + "end": 38.94, + "confidence": 0.099 + }, + { + "text": "it.", + "start": 38.94, + "end": 39.26, + "confidence": 0.533 + } + ] + }, + { + "id": 11, + "seek": 2500, + "start": 39.88, + "end": 41.96, + "text": " We got them in there helmet bags.", + "tokens": [ + 51114, + 492, + 658, + 552, + 294, + 456, + 15922, + 10405, + 13, + 51264 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.485, + "words": [ + { + "text": "We", + "start": 39.88, + "end": 40.2, + "confidence": 0.627 + }, + { + "text": "got", + "start": 40.2, + "end": 40.48, + "confidence": 0.389 + }, + { + "text": "them", + "start": 40.48, + "end": 40.64, + "confidence": 0.922 + }, + { + "text": "in", + "start": 40.64, + "end": 40.74, + "confidence": 0.947 + }, + { + "text": "there", + "start": 40.74, + "end": 40.96, + "confidence": 0.566 + }, + { + "text": "helmet", + "start": 40.96, + "end": 41.52, + "confidence": 0.23 + }, + { + "text": "bags.", + "start": 41.52, + "end": 41.96, + "confidence": 0.227 + } + ] + }, + { + "id": 12, + "seek": 2500, + "start": 43.06, + "end": 47.4, + "text": " And we get them in the helmet bags.", + "tokens": [ + 51264, + 400, + 321, + 483, + 552, + 294, + 264, + 15922, + 10405, + 13, + 51514 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.328, + "words": [ + { + "text": "And", + "start": 43.06, + "end": 43.38, + "confidence": 0.788 + }, + { + "text": "we", + "start": 43.38, + "end": 44.0, + "confidence": 0.348 + }, + { + "text": "get", + "start": 44.0, + "end": 44.16, + "confidence": 0.36 + }, + { + "text": "them", + "start": 44.16, + "end": 44.5, + "confidence": 0.235 + }, + { + "text": "in", + "start": 44.5, + "end": 44.56, + "confidence": 0.682 + }, + { + "text": "the", + "start": 44.56, + "end": 45.02, + "confidence": 0.171 + }, + { + "text": "helmet", + "start": 45.02, + "end": 46.46, + "confidence": 0.094 + }, + { + "text": "bags.", + "start": 46.46, + "end": 47.4, + "confidence": 0.526 + } + ] + }, + { + "id": 13, + "seek": 2500, + "start": 48.22, + "end": 49.58, + "text": " At least this one, at least the bag.", + "tokens": [ + 51514, + 1711, + 1935, + 341, + 472, + 11, + 412, + 1935, + 264, + 3411, + 13, + 51614 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.545, + "words": [ + { + "text": "At", + "start": 48.22, + "end": 48.36, + "confidence": 0.137 + }, + { + "text": "least", + "start": 48.36, + "end": 48.6, + "confidence": 0.924 + }, + { + "text": "this", + "start": 48.6, + "end": 48.82, + "confidence": 0.322 + }, + { + "text": "one,", + "start": 48.82, + "end": 48.98, + "confidence": 0.454 + }, + { + "text": "at", + "start": 49.06, + "end": 49.14, + "confidence": 0.855 + }, + { + "text": "least", + "start": 49.14, + "end": 49.22, + "confidence": 0.999 + }, + { + "text": "the", + "start": 49.22, + "end": 49.38, + "confidence": 0.73 + }, + { + "text": "bag.", + "start": 49.38, + "end": 49.58, + "confidence": 0.675 + } + ] + }, + { + "id": 14, + "seek": 2500, + "start": 50.1, + "end": 50.6, + "text": " Right here.", + "tokens": [ + 51614, + 1779, + 510, + 13, + 51664 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.331, + "words": [ + { + "text": "Right", + "start": 50.1, + "end": 50.4, + "confidence": 0.223 + }, + { + "text": "here.", + "start": 50.4, + "end": 50.6, + "confidence": 0.49 + } + ] + }, + { + "id": 15, + "seek": 2500, + "start": 51.46, + "end": 51.91, + "text": " Right here.", + "tokens": [ + 51664, + 1779, + 510, + 13, + 51714 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.838, + "words": [ + { + "text": "Right", + "start": 51.46, + "end": 51.74, + "confidence": 0.761 + }, + { + "text": "here.", + "start": 51.74, + "end": 51.91, + "confidence": 0.923 + } + ] + }, + { + "id": 16, + "seek": 2500, + "start": 51.91, + "end": 52.94, + "text": " We're thinking they could be on it.", + "tokens": [ + 51714, + 492, + 434, + 1953, + 436, + 727, + 312, + 322, + 309, + 13, + 51764 + ], + "temperature": 0.1, + "avg_logprob": -0.6201622441129865, + "compression_ratio": 1.8987341772151898, + "no_speech_prob": 0.25520434975624084, + "confidence": 0.454, + "words": [ + { + "text": "We're", + "start": 51.91, + "end": 52.28, + "confidence": 0.462 + }, + { + "text": "thinking", + "start": 52.28, + "end": 52.3, + "confidence": 0.561 + }, + { + "text": "they", + "start": 52.3, + "end": 52.5, + "confidence": 0.13 + }, + { + "text": "could", + "start": 52.5, + "end": 52.62, + "confidence": 0.317 + }, + { + "text": "be", + "start": 52.62, + "end": 52.74, + "confidence": 0.864 + }, + { + "text": "on", + "start": 52.74, + "end": 52.88, + "confidence": 0.658 + }, + { + "text": "it.", + "start": 52.88, + "end": 52.94, + "confidence": 0.643 + } + ] + }, + { + "id": 17, + "seek": 5300, + "start": 53.04, + "end": 54.36, + "text": " Yeah, we're thinking they could be on it.", + "tokens": [ + 50364, + 865, + 11, + 321, + 434, + 1953, + 436, + 727, + 312, + 322, + 309, + 13, + 50464 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.848, + "words": [ + { + "text": "Yeah,", + "start": 53.04, + "end": 53.24, + "confidence": 0.596 + }, + { + "text": "we're", + "start": 53.34, + "end": 53.44, + "confidence": 0.8 + }, + { + "text": "thinking", + "start": 53.44, + "end": 53.64, + "confidence": 0.989 + }, + { + "text": "they", + "start": 53.64, + "end": 53.84, + "confidence": 0.884 + }, + { + "text": "could", + "start": 53.84, + "end": 53.96, + "confidence": 0.865 + }, + { + "text": "be", + "start": 53.96, + "end": 54.08, + "confidence": 0.97 + }, + { + "text": "on", + "start": 54.08, + "end": 54.24, + "confidence": 0.899 + }, + { + "text": "it.", + "start": 54.24, + "end": 54.36, + "confidence": 0.902 + } + ] + }, + { + "id": 18, + "seek": 5300, + "start": 54.5, + "end": 54.84, + "text": " There you go.", + "tokens": [ + 50464, + 821, + 291, + 352, + 13, + 50514 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.414, + "words": [ + { + "text": "There", + "start": 54.5, + "end": 54.56, + "confidence": 0.104 + }, + { + "text": "you", + "start": 54.56, + "end": 54.68, + "confidence": 0.948 + }, + { + "text": "go.", + "start": 54.68, + "end": 54.84, + "confidence": 0.722 + } + ] + }, + { + "id": 19, + "seek": 5300, + "start": 56.4, + "end": 61.0, + "text": " We were going to hang with the cover.", + "tokens": [ + 50514, + 492, + 645, + 516, + 281, + 3967, + 365, + 264, + 2060, + 13, + 50764 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.418, + "words": [ + { + "text": "We", + "start": 56.4, + "end": 56.66, + "confidence": 0.31 + }, + { + "text": "were", + "start": 56.66, + "end": 56.88, + "confidence": 0.495 + }, + { + "text": "going", + "start": 56.88, + "end": 57.1, + "confidence": 0.377 + }, + { + "text": "to", + "start": 57.1, + "end": 57.22, + "confidence": 0.955 + }, + { + "text": "hang", + "start": 57.22, + "end": 57.38, + "confidence": 0.308 + }, + { + "text": "with", + "start": 57.38, + "end": 59.24, + "confidence": 0.297 + }, + { + "text": "the", + "start": 59.24, + "end": 59.48, + "confidence": 0.237 + }, + { + "text": "cover.", + "start": 59.48, + "end": 61.0, + "confidence": 0.78 + } + ] + }, + { + "id": 20, + "seek": 5300, + "start": 61.1, + "end": 61.8, + "text": " I tried it already.", + "tokens": [ + 50764, + 286, + 3031, + 309, + 1217, + 13, + 50814 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.746, + "words": [ + { + "text": "I", + "start": 61.1, + "end": 61.24, + "confidence": 0.57 + }, + { + "text": "tried", + "start": 61.24, + "end": 61.44, + "confidence": 0.815 + }, + { + "text": "it", + "start": 61.44, + "end": 61.62, + "confidence": 0.789 + }, + { + "text": "already.", + "start": 61.62, + "end": 61.8, + "confidence": 0.847 + } + ] + }, + { + "id": 21, + "seek": 5300, + "start": 62.5, + "end": 63.02, + "text": " Okay, fine.", + "tokens": [ + 50814, + 1033, + 11, + 2489, + 13, + 50864 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.803, + "words": [ + { + "text": "Okay,", + "start": 62.5, + "end": 62.78, + "confidence": 0.693 + }, + { + "text": "fine.", + "start": 62.9, + "end": 63.02, + "confidence": 0.93 + } + ] + }, + { + "id": 22, + "seek": 5300, + "start": 63.02, + "end": 63.96, + "text": " We weren't sure that.", + "tokens": [ + 50864, + 492, + 4999, + 380, + 988, + 300, + 13, + 50914 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.856, + "words": [ + { + "text": "We", + "start": 63.02, + "end": 63.3, + "confidence": 0.942 + }, + { + "text": "weren't", + "start": 63.3, + "end": 63.56, + "confidence": 0.995 + }, + { + "text": "sure", + "start": 63.56, + "end": 63.72, + "confidence": 0.983 + }, + { + "text": "that.", + "start": 63.72, + "end": 63.96, + "confidence": 0.501 + } + ] + }, + { + "id": 23, + "seek": 5300, + "start": 64.16, + "end": 65.12, + "text": " Just a suggestion.", + "tokens": [ + 50914, + 1449, + 257, + 16541, + 13, + 50964 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.771, + "words": [ + { + "text": "Just", + "start": 64.16, + "end": 64.56, + "confidence": 0.57 + }, + { + "text": "a", + "start": 64.56, + "end": 64.74, + "confidence": 0.808 + }, + { + "text": "suggestion.", + "start": 64.74, + "end": 65.12, + "confidence": 0.998 + } + ] + }, + { + "id": 24, + "seek": 5300, + "start": 65.24, + "end": 67.7, + "text": " We thought we'd say you could check it out.", + "tokens": [ + 50964, + 492, + 1194, + 321, + 1116, + 584, + 291, + 727, + 1520, + 309, + 484, + 13, + 51114 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.717, + "words": [ + { + "text": "We", + "start": 65.24, + "end": 65.42, + "confidence": 0.871 + }, + { + "text": "thought", + "start": 65.42, + "end": 65.54, + "confidence": 0.982 + }, + { + "text": "we'd", + "start": 65.54, + "end": 65.92, + "confidence": 0.68 + }, + { + "text": "say", + "start": 65.92, + "end": 66.06, + "confidence": 0.18 + }, + { + "text": "you", + "start": 66.06, + "end": 67.08, + "confidence": 0.914 + }, + { + "text": "could", + "start": 67.08, + "end": 67.22, + "confidence": 0.754 + }, + { + "text": "check", + "start": 67.22, + "end": 67.38, + "confidence": 0.74 + }, + { + "text": "it", + "start": 67.38, + "end": 67.54, + "confidence": 0.995 + }, + { + "text": "out.", + "start": 67.54, + "end": 67.7, + "confidence": 0.997 + } + ] + }, + { + "id": 25, + "seek": 5300, + "start": 68.2, + "end": 69.26, + "text": " I'm like you were already done that.", + "tokens": [ + 51114, + 286, + 478, + 411, + 291, + 645, + 1217, + 1096, + 300, + 13, + 51164 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.406, + "words": [ + { + "text": "I'm", + "start": 68.2, + "end": 68.46, + "confidence": 0.352 + }, + { + "text": "like", + "start": 68.46, + "end": 68.54, + "confidence": 0.222 + }, + { + "text": "you", + "start": 68.54, + "end": 68.66, + "confidence": 0.531 + }, + { + "text": "were", + "start": 68.66, + "end": 68.76, + "confidence": 0.295 + }, + { + "text": "already", + "start": 68.76, + "end": 68.88, + "confidence": 0.793 + }, + { + "text": "done", + "start": 68.88, + "end": 69.08, + "confidence": 0.55 + }, + { + "text": "that.", + "start": 69.08, + "end": 69.26, + "confidence": 0.397 + } + ] + }, + { + "id": 26, + "seek": 5300, + "start": 69.42, + "end": 71.91, + "text": " So I guess we're going to come up with this.", + "tokens": [ + 51164, + 407, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13, + 51314 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.723, + "words": [ + { + "text": "So", + "start": 69.42, + "end": 69.58, + "confidence": 0.843 + }, + { + "text": "I", + "start": 69.58, + "end": 69.84, + "confidence": 0.483 + }, + { + "text": "guess", + "start": 69.84, + "end": 70.62, + "confidence": 0.987 + }, + { + "text": "we're", + "start": 70.62, + "end": 71.08, + "confidence": 0.756 + }, + { + "text": "going", + "start": 71.08, + "end": 71.22, + "confidence": 0.668 + }, + { + "text": "to", + "start": 71.22, + "end": 71.28, + "confidence": 0.987 + }, + { + "text": "come", + "start": 71.28, + "end": 71.42, + "confidence": 0.955 + }, + { + "text": "up", + "start": 71.42, + "end": 71.6, + "confidence": 0.641 + }, + { + "text": "with", + "start": 71.6, + "end": 71.76, + "confidence": 0.958 + }, + { + "text": "this.", + "start": 71.76, + "end": 71.91, + "confidence": 0.319 + } + ] + }, + { + "id": 27, + "seek": 5300, + "start": 71.91, + "end": 72.44, + "text": " Let us know.", + "tokens": [ + 51314, + 961, + 505, + 458, + 13, + 51364 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.944, + "words": [ + { + "text": "Let", + "start": 71.91, + "end": 72.12, + "confidence": 0.849 + }, + { + "text": "us", + "start": 72.12, + "end": 72.24, + "confidence": 0.993 + }, + { + "text": "know.", + "start": 72.24, + "end": 72.44, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 5300, + "start": 74.16, + "end": 75.16, + "text": " Okay, no problem.", + "tokens": [ + 51364, + 1033, + 11, + 572, + 1154, + 13, + 51464 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.869, + "words": [ + { + "text": "Okay,", + "start": 74.16, + "end": 74.44, + "confidence": 0.785 + }, + { + "text": "no", + "start": 74.58, + "end": 74.82, + "confidence": 0.858 + }, + { + "text": "problem.", + "start": 74.82, + "end": 75.16, + "confidence": 0.974 + } + ] + }, + { + "id": 29, + "seek": 5300, + "start": 75.22, + "end": 76.48, + "text": " Okay, no problem.", + "tokens": [ + 51464, + 1033, + 11, + 572, + 1154, + 13, + 51514 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.763, + "words": [ + { + "text": "Okay,", + "start": 75.22, + "end": 75.8, + "confidence": 0.492 + }, + { + "text": "no", + "start": 75.94, + "end": 76.14, + "confidence": 0.907 + }, + { + "text": "problem.", + "start": 76.14, + "end": 76.48, + "confidence": 0.996 + } + ] + }, + { + "id": 30, + "seek": 5300, + "start": 76.52, + "end": 78.24, + "text": " We'll let you know when the end of the session.", + "tokens": [ + 51514, + 492, + 603, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 264, + 5481, + 13, + 51614 + ], + "temperature": 0.1, + "avg_logprob": -0.36524712948398735, + "compression_ratio": 1.691304347826087, + "no_speech_prob": 0.05761953815817833, + "confidence": 0.603, + "words": [ + { + "text": "We'll", + "start": 76.52, + "end": 76.8, + "confidence": 0.704 + }, + { + "text": "let", + "start": 76.8, + "end": 76.92, + "confidence": 0.965 + }, + { + "text": "you", + "start": 76.92, + "end": 77.0, + "confidence": 0.453 + }, + { + "text": "know", + "start": 77.0, + "end": 77.16, + "confidence": 0.998 + }, + { + "text": "when", + "start": 77.16, + "end": 77.32, + "confidence": 0.401 + }, + { + "text": "the", + "start": 77.32, + "end": 77.4, + "confidence": 0.784 + }, + { + "text": "end", + "start": 77.4, + "end": 77.52, + "confidence": 0.989 + }, + { + "text": "of", + "start": 77.52, + "end": 77.64, + "confidence": 0.981 + }, + { + "text": "the", + "start": 77.64, + "end": 77.7, + "confidence": 0.419 + }, + { + "text": "session.", + "start": 77.7, + "end": 78.24, + "confidence": 0.138 + } + ] + }, + { + "id": 31, + "seek": 7800, + "start": 88.94, + "end": 88.96, + "text": " Bye.", + "tokens": [ + 50364, + 4621, + 13, + 50914 + ], + "temperature": 0.1, + "avg_logprob": -0.8541962623596191, + "compression_ratio": 0.3333333333333333, + "no_speech_prob": 0.881976306438446, + "confidence": 0.099, + "words": [ + { + "text": "Bye.", + "start": 88.94, + "end": 88.96, + "confidence": 0.099 + } + ] + } + ], + "language": "English" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/random_apollo11.mp3.words.json b/tests/expected/corner_cases/random_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..32b5409d96b3a953fa0e798789f1bbd6a3b31c48 --- /dev/null +++ b/tests/expected/corner_cases/random_apollo11.mp3.words.json @@ -0,0 +1,1812 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA. Alright, okay. Yeah, let's take that camera. They make it want to go on the helmet. We're going to have a B1 and you can put the other one on the mic helmet with those GVA. Over. Alright, got them. Alright, got them. They're the better helmet that C1 has. They got the one mic that makes going through the leak reshapes. We got them in there helmet bags. And uh, we got the helmet in there helmet bags. The leak system, the leak of the bag. Right, yeah. Yeah, we're taking the leak to the auto, see you later. Hey, we were going to hang with the cover on the right side already. Okay, fine. We weren't sure of that, just a suggestion. We thought we'd, uh, you could check it out. It's not much of a hard to turn on, so uh, I guess we're going to come up with just let us know. Okay, no problem. Okay, no problem. We'll let you know when the end of the sun. Hello.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 6.54, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-EA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.2, + "avg_logprob": -0.6702656972975958, + "compression_ratio": 1.3597883597883598, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.548, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.88, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.88, + "end": 1.26, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.72, + "end": 1.94, + "confidence": 0.518 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.823 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.968 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.946 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.909 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.971 + }, + { + "text": "Soyuz-EA", + "start": 3.92, + "end": 5.26, + "confidence": 0.321 + }, + { + "text": "GLEME", + "start": 5.26, + "end": 5.74, + "confidence": 0.558 + }, + { + "text": "GVA.", + "start": 5.74, + "end": 6.54, + "confidence": 0.336 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 12.2, + "text": " Alright, okay.", + "tokens": [ + 50914, + 2798, + 11, + 1392, + 13, + 50964 + ], + "temperature": 0.2, + "avg_logprob": -0.6702656972975958, + "compression_ratio": 1.3597883597883598, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.366, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 11.08, + "confidence": 0.282 + }, + { + "text": "okay.", + "start": 11.68, + "end": 12.2, + "confidence": 0.476 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.82, + "end": 14.4, + "text": " Yeah, let's take that camera.", + "tokens": [ + 51014, + 865, + 11, + 718, + 311, + 747, + 300, + 2799, + 13, + 51064 + ], + "temperature": 0.2, + "avg_logprob": -0.6702656972975958, + "compression_ratio": 1.3597883597883598, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.357, + "words": [ + { + "text": "Yeah,", + "start": 12.82, + "end": 13.0, + "confidence": 0.167 + }, + { + "text": "let's", + "start": 13.04, + "end": 13.38, + "confidence": 0.277 + }, + { + "text": "take", + "start": 13.38, + "end": 14.02, + "confidence": 0.398 + }, + { + "text": "that", + "start": 14.02, + "end": 14.26, + "confidence": 0.478 + }, + { + "text": "camera.", + "start": 14.26, + "end": 14.4, + "confidence": 0.849 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 14.84, + "end": 17.26, + "text": " They make it want to go on the helmet.", + "tokens": [ + 51114, + 814, + 652, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 13, + 51214 + ], + "temperature": 0.2, + "avg_logprob": -0.6702656972975958, + "compression_ratio": 1.3597883597883598, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.551, + "words": [ + { + "text": "They", + "start": 14.84, + "end": 15.48, + "confidence": 0.397 + }, + { + "text": "make", + "start": 15.48, + "end": 15.68, + "confidence": 0.428 + }, + { + "text": "it", + "start": 15.68, + "end": 15.86, + "confidence": 0.21 + }, + { + "text": "want", + "start": 15.86, + "end": 16.06, + "confidence": 0.24 + }, + { + "text": "to", + "start": 16.06, + "end": 16.2, + "confidence": 0.965 + }, + { + "text": "go", + "start": 16.2, + "end": 16.38, + "confidence": 0.904 + }, + { + "text": "on", + "start": 16.38, + "end": 16.6, + "confidence": 0.952 + }, + { + "text": "the", + "start": 16.6, + "end": 16.8, + "confidence": 0.889 + }, + { + "text": "helmet.", + "start": 16.8, + "end": 17.26, + "confidence": 0.745 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 17.62, + "end": 23.74, + "text": " We're going to have a B1 and you can put the other one on the mic helmet with those GVA.", + "tokens": [ + 51264, + 492, + 434, + 516, + 281, + 362, + 257, + 363, + 16, + 293, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 13, + 51564 + ], + "temperature": 0.2, + "avg_logprob": -0.6702656972975958, + "compression_ratio": 1.3597883597883598, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.671, + "words": [ + { + "text": "We're", + "start": 17.62, + "end": 17.8, + "confidence": 0.779 + }, + { + "text": "going", + "start": 17.8, + "end": 17.92, + "confidence": 0.633 + }, + { + "text": "to", + "start": 17.92, + "end": 17.98, + "confidence": 0.992 + }, + { + "text": "have", + "start": 17.98, + "end": 18.2, + "confidence": 0.953 + }, + { + "text": "a", + "start": 18.2, + "end": 18.38, + "confidence": 0.36 + }, + { + "text": "B1", + "start": 18.38, + "end": 19.08, + "confidence": 0.836 + }, + { + "text": "and", + "start": 19.08, + "end": 20.16, + "confidence": 0.42 + }, + { + "text": "you", + "start": 20.16, + "end": 20.32, + "confidence": 0.865 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.699 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.971 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.976 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.97 + }, + { + "text": "on", + "start": 21.18, + "end": 21.42, + "confidence": 0.988 + }, + { + "text": "the", + "start": 21.42, + "end": 21.94, + "confidence": 0.627 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.48, + "confidence": 0.391 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.8, + "confidence": 0.935 + }, + { + "text": "with", + "start": 22.8, + "end": 23.06, + "confidence": 0.466 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.358 + }, + { + "text": "GVA.", + "start": 23.3, + "end": 23.74, + "confidence": 0.363 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 24.88, + "end": 25.12, + "text": " Over.", + "tokens": [ + 51614, + 4886, + 13, + 51664 + ], + "temperature": 0.2, + "avg_logprob": -0.6702656972975958, + "compression_ratio": 1.3597883597883598, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.29, + "words": [ + { + "text": "Over.", + "start": 24.88, + "end": 25.12, + "confidence": 0.29 + } + ] + }, + { + "id": 6, + "seek": 3000, + "start": 31.28, + "end": 32.12, + "text": " Alright, got them.", + "tokens": [ + 50414, + 2798, + 11, + 658, + 552, + 13, + 50464 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.531, + "words": [ + { + "text": "Alright,", + "start": 31.28, + "end": 31.56, + "confidence": 0.866 + }, + { + "text": "got", + "start": 31.82, + "end": 31.86, + "confidence": 0.43 + }, + { + "text": "them.", + "start": 31.86, + "end": 32.12, + "confidence": 0.402 + } + ] + }, + { + "id": 7, + "seek": 3000, + "start": 32.74, + "end": 33.44, + "text": " Alright, got them.", + "tokens": [ + 50514, + 2798, + 11, + 658, + 552, + 13, + 50564 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.879, + "words": [ + { + "text": "Alright,", + "start": 32.74, + "end": 32.92, + "confidence": 0.74 + }, + { + "text": "got", + "start": 33.12, + "end": 33.18, + "confidence": 0.939 + }, + { + "text": "them.", + "start": 33.18, + "end": 33.44, + "confidence": 0.976 + } + ] + }, + { + "id": 8, + "seek": 3000, + "start": 34.5, + "end": 35.3, + "text": " They're the better helmet that C1 has.", + "tokens": [ + 50614, + 814, + 434, + 264, + 1101, + 15922, + 300, + 383, + 16, + 575, + 13, + 50664 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.373, + "words": [ + { + "text": "They're", + "start": 34.5, + "end": 34.52, + "confidence": 0.222 + }, + { + "text": "the", + "start": 34.52, + "end": 34.54, + "confidence": 0.185 + }, + { + "text": "better", + "start": 34.54, + "end": 34.56, + "confidence": 0.512 + }, + { + "text": "helmet", + "start": 34.56, + "end": 34.58, + "confidence": 0.699 + }, + { + "text": "that", + "start": 34.58, + "end": 34.6, + "confidence": 0.247 + }, + { + "text": "C1", + "start": 34.6, + "end": 34.92, + "confidence": 0.464 + }, + { + "text": "has.", + "start": 34.92, + "end": 35.3, + "confidence": 0.798 + } + ] + }, + { + "id": 9, + "seek": 3000, + "start": 37.58, + "end": 39.34, + "text": " They got the one mic that makes going through the leak reshapes.", + "tokens": [ + 50714, + 814, + 658, + 264, + 472, + 3123, + 300, + 1669, + 516, + 807, + 264, + 17143, + 725, + 71, + 569, + 279, + 13, + 50814 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.362, + "words": [ + { + "text": "They", + "start": 37.58, + "end": 37.8, + "confidence": 0.355 + }, + { + "text": "got", + "start": 37.8, + "end": 37.9, + "confidence": 0.372 + }, + { + "text": "the", + "start": 37.9, + "end": 37.92, + "confidence": 0.702 + }, + { + "text": "one", + "start": 37.92, + "end": 37.94, + "confidence": 0.721 + }, + { + "text": "mic", + "start": 37.94, + "end": 37.96, + "confidence": 0.632 + }, + { + "text": "that", + "start": 37.96, + "end": 37.98, + "confidence": 0.117 + }, + { + "text": "makes", + "start": 37.98, + "end": 38.0, + "confidence": 0.191 + }, + { + "text": "going", + "start": 38.0, + "end": 38.12, + "confidence": 0.749 + }, + { + "text": "through", + "start": 38.12, + "end": 38.34, + "confidence": 0.305 + }, + { + "text": "the", + "start": 38.34, + "end": 38.5, + "confidence": 0.397 + }, + { + "text": "leak", + "start": 38.5, + "end": 38.76, + "confidence": 0.41 + }, + { + "text": "reshapes.", + "start": 38.76, + "end": 39.34, + "confidence": 0.288 + } + ] + }, + { + "id": 10, + "seek": 3000, + "start": 39.9, + "end": 41.94, + "text": " We got them in there helmet bags.", + "tokens": [ + 50864, + 492, + 658, + 552, + 294, + 456, + 15922, + 10405, + 13, + 50964 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.576, + "words": [ + { + "text": "We", + "start": 39.9, + "end": 40.2, + "confidence": 0.487 + }, + { + "text": "got", + "start": 40.2, + "end": 40.48, + "confidence": 0.582 + }, + { + "text": "them", + "start": 40.48, + "end": 40.64, + "confidence": 0.881 + }, + { + "text": "in", + "start": 40.64, + "end": 40.74, + "confidence": 0.917 + }, + { + "text": "there", + "start": 40.74, + "end": 40.96, + "confidence": 0.418 + }, + { + "text": "helmet", + "start": 40.96, + "end": 41.52, + "confidence": 0.654 + }, + { + "text": "bags.", + "start": 41.52, + "end": 41.94, + "confidence": 0.336 + } + ] + }, + { + "id": 11, + "seek": 3000, + "start": 43.02, + "end": 47.4, + "text": " And uh, we got the helmet in there helmet bags.", + "tokens": [ + 51014, + 400, + 2232, + 11, + 321, + 658, + 264, + 15922, + 294, + 456, + 15922, + 10405, + 13, + 51264 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.431, + "words": [ + { + "text": "And", + "start": 43.02, + "end": 43.34, + "confidence": 0.575 + }, + { + "text": "uh,", + "start": 43.34, + "end": 43.74, + "confidence": 0.208 + }, + { + "text": "we", + "start": 43.86, + "end": 44.04, + "confidence": 0.423 + }, + { + "text": "got", + "start": 44.04, + "end": 44.2, + "confidence": 0.549 + }, + { + "text": "the", + "start": 44.2, + "end": 44.5, + "confidence": 0.209 + }, + { + "text": "helmet", + "start": 44.5, + "end": 46.46, + "confidence": 0.845 + }, + { + "text": "in", + "start": 46.46, + "end": 46.72, + "confidence": 0.188 + }, + { + "text": "there", + "start": 46.72, + "end": 47.06, + "confidence": 0.391 + }, + { + "text": "helmet", + "start": 47.06, + "end": 47.08, + "confidence": 0.69 + }, + { + "text": "bags.", + "start": 47.08, + "end": 47.4, + "confidence": 0.889 + } + ] + }, + { + "id": 12, + "seek": 3000, + "start": 48.5, + "end": 49.58, + "text": " The leak system, the leak of the bag.", + "tokens": [ + 51314, + 440, + 17143, + 1185, + 11, + 264, + 17143, + 295, + 264, + 3411, + 13, + 51364 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.499, + "words": [ + { + "text": "The", + "start": 48.5, + "end": 48.52, + "confidence": 0.266 + }, + { + "text": "leak", + "start": 48.52, + "end": 48.6, + "confidence": 0.299 + }, + { + "text": "system,", + "start": 48.6, + "end": 48.86, + "confidence": 0.54 + }, + { + "text": "the", + "start": 49.06, + "end": 49.14, + "confidence": 0.589 + }, + { + "text": "leak", + "start": 49.14, + "end": 49.22, + "confidence": 0.962 + }, + { + "text": "of", + "start": 49.22, + "end": 49.32, + "confidence": 0.244 + }, + { + "text": "the", + "start": 49.32, + "end": 49.4, + "confidence": 0.941 + }, + { + "text": "bag.", + "start": 49.4, + "end": 49.58, + "confidence": 0.689 + } + ] + }, + { + "id": 13, + "seek": 3000, + "start": 51.34, + "end": 51.94, + "text": " Right, yeah.", + "tokens": [ + 51414, + 1779, + 11, + 1338, + 13, + 51464 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.369, + "words": [ + { + "text": "Right,", + "start": 51.34, + "end": 51.72, + "confidence": 0.187 + }, + { + "text": "yeah.", + "start": 51.82, + "end": 51.94, + "confidence": 0.73 + } + ] + }, + { + "id": 14, + "seek": 3000, + "start": 53.06, + "end": 55.22, + "text": " Yeah, we're taking the leak to the auto, see you later.", + "tokens": [ + 51514, + 865, + 11, + 321, + 434, + 1940, + 264, + 17143, + 281, + 264, + 8399, + 11, + 536, + 291, + 1780, + 13, + 51614 + ], + "temperature": 0.2, + "avg_logprob": -0.714374287923177, + "compression_ratio": 1.8131868131868132, + "no_speech_prob": 0.07163436710834503, + "confidence": 0.425, + "words": [ + { + "text": "Yeah,", + "start": 53.06, + "end": 53.26, + "confidence": 0.561 + }, + { + "text": "we're", + "start": 53.34, + "end": 53.42, + "confidence": 0.725 + }, + { + "text": "taking", + "start": 53.42, + "end": 53.62, + "confidence": 0.683 + }, + { + "text": "the", + "start": 53.62, + "end": 53.8, + "confidence": 0.661 + }, + { + "text": "leak", + "start": 53.8, + "end": 53.88, + "confidence": 0.396 + }, + { + "text": "to", + "start": 53.88, + "end": 54.02, + "confidence": 0.08 + }, + { + "text": "the", + "start": 54.02, + "end": 54.08, + "confidence": 0.935 + }, + { + "text": "auto,", + "start": 54.08, + "end": 54.26, + "confidence": 0.228 + }, + { + "text": "see", + "start": 54.46, + "end": 54.58, + "confidence": 0.191 + }, + { + "text": "you", + "start": 54.58, + "end": 55.14, + "confidence": 0.401 + }, + { + "text": "later.", + "start": 55.14, + "end": 55.22, + "confidence": 0.503 + } + ] + }, + { + "id": 15, + "seek": 5500, + "start": 56.4, + "end": 61.48, + "text": " Hey, we were going to hang with the cover on the right side already.", + "tokens": [ + 50414, + 1911, + 11, + 321, + 645, + 516, + 281, + 3967, + 365, + 264, + 2060, + 322, + 264, + 558, + 1252, + 1217, + 13, + 50664 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.371, + "words": [ + { + "text": "Hey,", + "start": 56.4, + "end": 56.6, + "confidence": 0.195 + }, + { + "text": "we", + "start": 56.62, + "end": 56.74, + "confidence": 0.949 + }, + { + "text": "were", + "start": 56.74, + "end": 56.9, + "confidence": 0.526 + }, + { + "text": "going", + "start": 56.9, + "end": 57.1, + "confidence": 0.184 + }, + { + "text": "to", + "start": 57.1, + "end": 57.22, + "confidence": 0.938 + }, + { + "text": "hang", + "start": 57.22, + "end": 57.38, + "confidence": 0.255 + }, + { + "text": "with", + "start": 57.38, + "end": 59.24, + "confidence": 0.252 + }, + { + "text": "the", + "start": 59.24, + "end": 60.24, + "confidence": 0.529 + }, + { + "text": "cover", + "start": 60.24, + "end": 61.02, + "confidence": 0.737 + }, + { + "text": "on", + "start": 61.02, + "end": 61.22, + "confidence": 0.324 + }, + { + "text": "the", + "start": 61.22, + "end": 61.28, + "confidence": 0.44 + }, + { + "text": "right", + "start": 61.28, + "end": 61.3, + "confidence": 0.088 + }, + { + "text": "side", + "start": 61.3, + "end": 61.4, + "confidence": 0.4 + }, + { + "text": "already.", + "start": 61.4, + "end": 61.48, + "confidence": 0.447 + } + ] + }, + { + "id": 16, + "seek": 5500, + "start": 62.52, + "end": 63.02, + "text": " Okay, fine.", + "tokens": [ + 50714, + 1033, + 11, + 2489, + 13, + 50764 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.91, + "words": [ + { + "text": "Okay,", + "start": 62.52, + "end": 62.78, + "confidence": 0.914 + }, + { + "text": "fine.", + "start": 62.88, + "end": 63.02, + "confidence": 0.905 + } + ] + }, + { + "id": 17, + "seek": 5500, + "start": 63.02, + "end": 65.1, + "text": " We weren't sure of that, just a suggestion.", + "tokens": [ + 50764, + 492, + 4999, + 380, + 988, + 295, + 300, + 11, + 445, + 257, + 16541, + 13, + 50864 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.842, + "words": [ + { + "text": "We", + "start": 63.02, + "end": 63.3, + "confidence": 0.937 + }, + { + "text": "weren't", + "start": 63.3, + "end": 63.56, + "confidence": 0.976 + }, + { + "text": "sure", + "start": 63.56, + "end": 63.72, + "confidence": 0.929 + }, + { + "text": "of", + "start": 63.72, + "end": 63.84, + "confidence": 0.502 + }, + { + "text": "that,", + "start": 63.84, + "end": 63.98, + "confidence": 0.986 + }, + { + "text": "just", + "start": 64.12, + "end": 64.56, + "confidence": 0.772 + }, + { + "text": "a", + "start": 64.56, + "end": 64.72, + "confidence": 0.675 + }, + { + "text": "suggestion.", + "start": 64.72, + "end": 65.1, + "confidence": 0.994 + } + ] + }, + { + "id": 18, + "seek": 5500, + "start": 65.2, + "end": 67.5, + "text": " We thought we'd, uh, you could check it out.", + "tokens": [ + 50864, + 492, + 1194, + 321, + 1116, + 11, + 2232, + 11, + 291, + 727, + 1520, + 309, + 484, + 13, + 50964 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.754, + "words": [ + { + "text": "We", + "start": 65.2, + "end": 65.4, + "confidence": 0.915 + }, + { + "text": "thought", + "start": 65.4, + "end": 65.54, + "confidence": 0.972 + }, + { + "text": "we'd,", + "start": 65.54, + "end": 65.9, + "confidence": 0.679 + }, + { + "text": "uh,", + "start": 65.96, + "end": 66.02, + "confidence": 0.597 + }, + { + "text": "you", + "start": 66.78, + "end": 67.04, + "confidence": 0.408 + }, + { + "text": "could", + "start": 67.04, + "end": 67.2, + "confidence": 0.808 + }, + { + "text": "check", + "start": 67.2, + "end": 67.36, + "confidence": 0.751 + }, + { + "text": "it", + "start": 67.36, + "end": 67.48, + "confidence": 0.984 + }, + { + "text": "out.", + "start": 67.48, + "end": 67.5, + "confidence": 0.998 + } + ] + }, + { + "id": 19, + "seek": 5500, + "start": 68.2, + "end": 72.4, + "text": " It's not much of a hard to turn on, so uh, I guess we're going to come up with just let us know.", + "tokens": [ + 51014, + 467, + 311, + 406, + 709, + 295, + 257, + 1152, + 281, + 1261, + 322, + 11, + 370, + 2232, + 11, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 445, + 718, + 505, + 458, + 13, + 51214 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.608, + "words": [ + { + "text": "It's", + "start": 68.2, + "end": 68.3, + "confidence": 0.411 + }, + { + "text": "not", + "start": 68.3, + "end": 68.4, + "confidence": 0.946 + }, + { + "text": "much", + "start": 68.4, + "end": 68.56, + "confidence": 0.902 + }, + { + "text": "of", + "start": 68.56, + "end": 68.7, + "confidence": 0.574 + }, + { + "text": "a", + "start": 68.7, + "end": 68.72, + "confidence": 0.719 + }, + { + "text": "hard", + "start": 68.72, + "end": 68.84, + "confidence": 0.144 + }, + { + "text": "to", + "start": 68.84, + "end": 68.98, + "confidence": 0.405 + }, + { + "text": "turn", + "start": 68.98, + "end": 69.1, + "confidence": 0.673 + }, + { + "text": "on,", + "start": 69.1, + "end": 69.22, + "confidence": 0.157 + }, + { + "text": "so", + "start": 69.26, + "end": 69.56, + "confidence": 0.844 + }, + { + "text": "uh,", + "start": 69.56, + "end": 69.84, + "confidence": 0.31 + }, + { + "text": "I", + "start": 70.34, + "end": 70.5, + "confidence": 0.786 + }, + { + "text": "guess", + "start": 70.5, + "end": 70.64, + "confidence": 0.993 + }, + { + "text": "we're", + "start": 70.64, + "end": 71.08, + "confidence": 0.562 + }, + { + "text": "going", + "start": 71.08, + "end": 71.2, + "confidence": 0.781 + }, + { + "text": "to", + "start": 71.2, + "end": 71.28, + "confidence": 0.991 + }, + { + "text": "come", + "start": 71.28, + "end": 71.42, + "confidence": 0.964 + }, + { + "text": "up", + "start": 71.42, + "end": 71.6, + "confidence": 0.952 + }, + { + "text": "with", + "start": 71.6, + "end": 71.76, + "confidence": 0.954 + }, + { + "text": "just", + "start": 71.76, + "end": 71.94, + "confidence": 0.316 + }, + { + "text": "let", + "start": 71.94, + "end": 72.1, + "confidence": 0.811 + }, + { + "text": "us", + "start": 72.1, + "end": 72.24, + "confidence": 0.977 + }, + { + "text": "know.", + "start": 72.24, + "end": 72.4, + "confidence": 0.997 + } + ] + }, + { + "id": 20, + "seek": 5500, + "start": 74.22, + "end": 75.03, + "text": " Okay, no problem.", + "tokens": [ + 51314, + 1033, + 11, + 572, + 1154, + 13, + 51364 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.912, + "words": [ + { + "text": "Okay,", + "start": 74.22, + "end": 74.48, + "confidence": 0.833 + }, + { + "text": "no", + "start": 74.62, + "end": 74.84, + "confidence": 0.935 + }, + { + "text": "problem.", + "start": 74.84, + "end": 75.03, + "confidence": 0.974 + } + ] + }, + { + "id": 21, + "seek": 5500, + "start": 75.03, + "end": 76.48, + "text": " Okay, no problem.", + "tokens": [ + 51364, + 1033, + 11, + 572, + 1154, + 13, + 51414 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.839, + "words": [ + { + "text": "Okay,", + "start": 75.03, + "end": 75.78, + "confidence": 0.629 + }, + { + "text": "no", + "start": 75.94, + "end": 76.14, + "confidence": 0.946 + }, + { + "text": "problem.", + "start": 76.14, + "end": 76.48, + "confidence": 0.991 + } + ] + }, + { + "id": 22, + "seek": 5500, + "start": 76.48, + "end": 78.22, + "text": " We'll let you know when the end of the sun.", + "tokens": [ + 51414, + 492, + 603, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 264, + 3295, + 13, + 51514 + ], + "temperature": 0.2, + "avg_logprob": -0.45444447285420186, + "compression_ratio": 1.6168224299065421, + "no_speech_prob": 0.3397921025753021, + "confidence": 0.607, + "words": [ + { + "text": "We'll", + "start": 76.48, + "end": 76.8, + "confidence": 0.667 + }, + { + "text": "let", + "start": 76.8, + "end": 76.92, + "confidence": 0.949 + }, + { + "text": "you", + "start": 76.92, + "end": 77.08, + "confidence": 0.544 + }, + { + "text": "know", + "start": 77.08, + "end": 77.18, + "confidence": 0.998 + }, + { + "text": "when", + "start": 77.18, + "end": 77.3, + "confidence": 0.533 + }, + { + "text": "the", + "start": 77.3, + "end": 77.4, + "confidence": 0.894 + }, + { + "text": "end", + "start": 77.4, + "end": 77.52, + "confidence": 0.97 + }, + { + "text": "of", + "start": 77.52, + "end": 77.64, + "confidence": 0.976 + }, + { + "text": "the", + "start": 77.64, + "end": 78.02, + "confidence": 0.527 + }, + { + "text": "sun.", + "start": 78.02, + "end": 78.22, + "confidence": 0.075 + } + ] + }, + { + "id": 23, + "seek": 8500, + "start": 85.22, + "end": 85.4, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50414 + ], + "temperature": 0.2, + "avg_logprob": -0.6911918640136718, + "compression_ratio": 0.42857142857142855, + "no_speech_prob": 0.9557693600654602, + "confidence": 0.216, + "words": [ + { + "text": "Hello.", + "start": 85.22, + "end": 85.4, + "confidence": 0.216 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/corner_cases/stucked_lm_apollo11.mp3.words.json b/tests/expected/corner_cases/stucked_lm_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..99f2a2e64135a3ccd31e7300ef50ca7d6ce4f90c --- /dev/null +++ b/tests/expected/corner_cases/stucked_lm_apollo11.mp3.words.json @@ -0,0 +1,3820 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.52, + "end": 6.54, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.7225993307013261, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.541, + "words": [ + { + "text": "Apollo", + "start": 0.52, + "end": 0.88, + "confidence": 0.155 + }, + { + "text": "11,", + "start": 0.88, + "end": 1.26, + "confidence": 0.977 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.986 + }, + { + "text": "we", + "start": 1.72, + "end": 1.94, + "confidence": 0.518 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.823 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.989 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.968 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.946 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.984 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.909 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.971 + }, + { + "text": "Soyuz-VA", + "start": 3.92, + "end": 5.16, + "confidence": 0.26 + }, + { + "text": "GLEME", + "start": 5.16, + "end": 5.74, + "confidence": 0.475 + }, + { + "text": "GVA.", + "start": 5.74, + "end": 6.54, + "confidence": 0.435 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 19.06, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.7225993307013261, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.468, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 11.04, + "confidence": 0.31 + }, + { + "text": "okay,", + "start": 11.68, + "end": 12.22, + "confidence": 0.504 + }, + { + "text": "we", + "start": 12.52, + "end": 12.96, + "confidence": 0.609 + }, + { + "text": "like", + "start": 12.96, + "end": 13.28, + "confidence": 0.505 + }, + { + "text": "to", + "start": 13.28, + "end": 13.54, + "confidence": 0.263 + }, + { + "text": "say", + "start": 13.54, + "end": 14.9, + "confidence": 0.138 + }, + { + "text": "that", + "start": 14.9, + "end": 15.38, + "confidence": 0.199 + }, + { + "text": "they", + "start": 15.38, + "end": 15.44, + "confidence": 0.417 + }, + { + "text": "make", + "start": 15.44, + "end": 15.68, + "confidence": 0.402 + }, + { + "text": "the", + "start": 15.68, + "end": 15.84, + "confidence": 0.253 + }, + { + "text": "one", + "start": 15.84, + "end": 16.06, + "confidence": 0.609 + }, + { + "text": "that's", + "start": 16.06, + "end": 16.28, + "confidence": 0.442 + }, + { + "text": "on", + "start": 16.28, + "end": 16.48, + "confidence": 0.595 + }, + { + "text": "the", + "start": 16.48, + "end": 16.78, + "confidence": 0.872 + }, + { + "text": "helmet", + "start": 16.78, + "end": 17.26, + "confidence": 0.856 + }, + { + "text": "we're", + "start": 17.26, + "end": 17.76, + "confidence": 0.299 + }, + { + "text": "going", + "start": 17.76, + "end": 17.92, + "confidence": 0.598 + }, + { + "text": "to", + "start": 17.92, + "end": 18.06, + "confidence": 0.822 + }, + { + "text": "have", + "start": 18.06, + "end": 18.2, + "confidence": 0.835 + }, + { + "text": "in", + "start": 18.2, + "end": 18.36, + "confidence": 0.717 + }, + { + "text": "B1.", + "start": 18.36, + "end": 19.06, + "confidence": 0.764 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 19.36, + "end": 24.52, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.7225993307013261, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.485, + "words": [ + { + "text": "And", + "start": 19.36, + "end": 20.2, + "confidence": 0.668 + }, + { + "text": "you", + "start": 20.2, + "end": 20.32, + "confidence": 0.948 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.725 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.98 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.989 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.991 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.978 + }, + { + "text": "on", + "start": 21.18, + "end": 21.4, + "confidence": 0.989 + }, + { + "text": "the", + "start": 21.4, + "end": 21.94, + "confidence": 0.522 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.48, + "confidence": 0.414 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.8, + "confidence": 0.882 + }, + { + "text": "with", + "start": 22.8, + "end": 23.06, + "confidence": 0.423 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.466 + }, + { + "text": "GVA", + "start": 23.3, + "end": 23.74, + "confidence": 0.214 + }, + { + "text": "blizzard", + "start": 23.74, + "end": 24.18, + "confidence": 0.108 + }, + { + "text": "frames.", + "start": 24.18, + "end": 24.52, + "confidence": 0.255 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 31.34, + "end": 54.98, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.11149242824978299, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.4302051365375519, + "confidence": 0.93, + "words": [ + { + "text": "Alright,", + "start": 31.34, + "end": 31.52, + "confidence": 0.066 + }, + { + "text": "got", + "start": 31.82, + "end": 31.84, + "confidence": 0.335 + }, + { + "text": "them,", + "start": 31.84, + "end": 32.12, + "confidence": 0.274 + }, + { + "text": "got", + "start": 32.38, + "end": 32.9, + "confidence": 0.568 + }, + { + "text": "them,", + "start": 32.9, + "end": 33.46, + "confidence": 0.941 + }, + { + "text": "got", + "start": 33.74, + "end": 33.76, + "confidence": 0.581 + }, + { + "text": "them,", + "start": 33.76, + "end": 33.78, + "confidence": 0.925 + }, + { + "text": "got", + "start": 33.78, + "end": 33.8, + "confidence": 0.523 + }, + { + "text": "them,", + "start": 33.8, + "end": 33.9, + "confidence": 0.906 + }, + { + "text": "got", + "start": 34.12, + "end": 34.14, + "confidence": 0.635 + }, + { + "text": "them,", + "start": 34.14, + "end": 34.5, + "confidence": 0.92 + }, + { + "text": "got", + "start": 34.5, + "end": 34.6, + "confidence": 0.688 + }, + { + "text": "them,", + "start": 34.6, + "end": 34.62, + "confidence": 0.946 + }, + { + "text": "got", + "start": 34.62, + "end": 34.64, + "confidence": 0.771 + }, + { + "text": "them,", + "start": 34.64, + "end": 34.66, + "confidence": 0.967 + }, + { + "text": "got", + "start": 34.66, + "end": 34.68, + "confidence": 0.856 + }, + { + "text": "them,", + "start": 34.68, + "end": 34.7, + "confidence": 0.975 + }, + { + "text": "got", + "start": 34.82, + "end": 34.92, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 34.92, + "end": 34.94, + "confidence": 0.981 + }, + { + "text": "got", + "start": 34.94, + "end": 34.96, + "confidence": 0.909 + }, + { + "text": "them,", + "start": 34.96, + "end": 35.1, + "confidence": 0.985 + }, + { + "text": "got", + "start": 35.1, + "end": 35.28, + "confidence": 0.931 + }, + { + "text": "them,", + "start": 35.28, + "end": 35.62, + "confidence": 0.988 + }, + { + "text": "got", + "start": 35.74, + "end": 35.76, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 35.76, + "end": 35.78, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.0, + "end": 36.02, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.04, + "confidence": 0.988 + }, + { + "text": "got", + "start": 36.04, + "end": 36.06, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 36.06, + "end": 36.08, + "confidence": 0.99 + }, + { + "text": "got", + "start": 36.08, + "end": 36.1, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.12, + "confidence": 0.991 + }, + { + "text": "got", + "start": 36.12, + "end": 36.14, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 36.14, + "end": 36.16, + "confidence": 0.992 + }, + { + "text": "got", + "start": 36.16, + "end": 36.34, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 36.34, + "end": 36.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 36.74, + "end": 37.46, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.82, + "confidence": 0.991 + }, + { + "text": "got", + "start": 37.82, + "end": 37.84, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.84, + "end": 38.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.12, + "end": 38.14, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 38.14, + "end": 38.5, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.5, + "end": 38.52, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.52, + "end": 38.54, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.54, + "end": 38.56, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 38.56, + "end": 38.58, + "confidence": 0.992 + }, + { + "text": "got", + "start": 38.58, + "end": 38.6, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 38.6, + "end": 38.62, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.62, + "end": 38.64, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.64, + "end": 38.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.66, + "end": 38.68, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 38.68, + "end": 38.7, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.7, + "end": 38.72, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.72, + "end": 38.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.74, + "end": 38.76, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.76, + "end": 38.78, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.78, + "end": 38.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.8, + "end": 38.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.82, + "end": 38.84, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.84, + "end": 38.86, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.86, + "end": 38.88, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.88, + "end": 38.9, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.9, + "end": 38.92, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 38.92, + "end": 38.94, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.94, + "end": 38.96, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 38.96, + "end": 38.98, + "confidence": 0.993 + }, + { + "text": "got", + "start": 38.98, + "end": 39.0, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 39.0, + "end": 39.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 39.02, + "end": 39.04, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 39.04, + "end": 39.06, + "confidence": 0.993 + }, + { + "text": "got", + "start": 39.06, + "end": 39.08, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 39.08, + "end": 39.1, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.1, + "end": 39.12, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 39.12, + "end": 39.14, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.14, + "end": 39.16, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 39.16, + "end": 39.18, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.18, + "end": 39.2, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 39.2, + "end": 39.22, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.22, + "end": 39.24, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 39.24, + "end": 39.26, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.26, + "end": 39.28, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 39.28, + "end": 39.3, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.3, + "end": 39.32, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 39.32, + "end": 39.34, + "confidence": 0.994 + }, + { + "text": "got", + "start": 39.34, + "end": 39.36, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 39.36, + "end": 39.38, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.38, + "end": 39.4, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 39.4, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.42, + "end": 39.44, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.44, + "end": 39.46, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.46, + "end": 39.48, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 39.48, + "end": 39.5, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.5, + "end": 39.52, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.52, + "end": 39.54, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.54, + "end": 39.56, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.56, + "end": 39.58, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.58, + "end": 39.6, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.6, + "end": 39.62, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.62, + "end": 39.64, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.64, + "end": 39.66, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.66, + "end": 39.68, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.68, + "end": 39.7, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.7, + "end": 39.72, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.72, + "end": 39.74, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.74, + "end": 39.76, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.76, + "end": 39.78, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.78, + "end": 39.8, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.8, + "end": 39.82, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.82, + "end": 39.84, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.84, + "end": 39.86, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.86, + "end": 39.88, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.88, + "end": 39.9, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.9, + "end": 39.92, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.92, + "end": 39.94, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.94, + "end": 39.96, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.96, + "end": 39.98, + "confidence": 0.995 + }, + { + "text": "got", + "start": 39.98, + "end": 40.0, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.0, + "end": 40.02, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.02, + "end": 40.04, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.04, + "end": 40.06, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.06, + "end": 40.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.08, + "end": 40.1, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.1, + "end": 40.12, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.12, + "end": 40.14, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.14, + "end": 40.16, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.16, + "end": 40.18, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.18, + "end": 40.2, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.2, + "end": 40.22, + "confidence": 0.995 + }, + { + "text": "got", + "start": 40.22, + "end": 40.46, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.46, + "end": 40.76, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.02, + "end": 41.04, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 41.04, + "end": 41.72, + "confidence": 0.996 + }, + { + "text": "got", + "start": 41.9, + "end": 41.92, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 41.92, + "end": 43.0, + "confidence": 0.996 + }, + { + "text": "got", + "start": 43.0, + "end": 44.06, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 44.06, + "end": 44.88, + "confidence": 0.996 + }, + { + "text": "got", + "start": 44.94, + "end": 45.46, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 45.46, + "end": 45.76, + "confidence": 0.996 + }, + { + "text": "got", + "start": 45.76, + "end": 47.06, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 47.06, + "end": 47.76, + "confidence": 0.996 + }, + { + "text": "got", + "start": 47.82, + "end": 48.5, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 48.5, + "end": 48.9, + "confidence": 0.996 + }, + { + "text": "got", + "start": 48.9, + "end": 49.2, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 49.2, + "end": 50.8, + "confidence": 0.996 + }, + { + "text": "got", + "start": 51.18, + "end": 51.74, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 51.74, + "end": 52.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 52.44, + "end": 53.26, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 53.26, + "end": 53.74, + "confidence": 0.997 + }, + { + "text": "got", + "start": 53.76, + "end": 54.2, + "confidence": 0.995 + }, + { + "text": "them", + "start": 54.2, + "end": 54.98, + "confidence": 0.997 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 55.0, + "end": 85.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.05316366529250893, + "compression_ratio": 29.52, + "no_speech_prob": 0.2454655021429062, + "confidence": 0.948, + "words": [ + { + "text": "got", + "start": 55.0, + "end": 55.1, + "confidence": 0.213 + }, + { + "text": "them,", + "start": 55.1, + "end": 55.12, + "confidence": 0.95 + }, + { + "text": "got", + "start": 55.16, + "end": 55.18, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 55.18, + "end": 55.5, + "confidence": 0.997 + }, + { + "text": "got", + "start": 55.56, + "end": 57.36, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 57.36, + "end": 57.52, + "confidence": 0.997 + }, + { + "text": "got", + "start": 57.58, + "end": 57.68, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 57.68, + "end": 57.7, + "confidence": 0.997 + }, + { + "text": "got", + "start": 57.7, + "end": 57.72, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 57.72, + "end": 57.74, + "confidence": 0.993 + }, + { + "text": "got", + "start": 57.74, + "end": 57.76, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 57.76, + "end": 57.78, + "confidence": 0.988 + }, + { + "text": "got", + "start": 57.78, + "end": 57.8, + "confidence": 0.872 + }, + { + "text": "them,", + "start": 57.8, + "end": 57.82, + "confidence": 0.982 + }, + { + "text": "got", + "start": 57.82, + "end": 57.84, + "confidence": 0.862 + }, + { + "text": "them,", + "start": 57.84, + "end": 57.86, + "confidence": 0.984 + }, + { + "text": "got", + "start": 57.86, + "end": 57.88, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 57.88, + "end": 57.9, + "confidence": 0.983 + }, + { + "text": "got", + "start": 57.9, + "end": 58.98, + "confidence": 0.85 + }, + { + "text": "them,", + "start": 58.98, + "end": 59.22, + "confidence": 0.924 + }, + { + "text": "got", + "start": 60.5, + "end": 61.26, + "confidence": 0.56 + }, + { + "text": "them,", + "start": 61.26, + "end": 61.94, + "confidence": 0.958 + }, + { + "text": "got", + "start": 61.94, + "end": 61.96, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 61.96, + "end": 61.98, + "confidence": 0.976 + }, + { + "text": "got", + "start": 61.98, + "end": 62.0, + "confidence": 0.842 + }, + { + "text": "them,", + "start": 62.0, + "end": 62.02, + "confidence": 0.977 + }, + { + "text": "got", + "start": 62.02, + "end": 62.04, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 62.04, + "end": 62.06, + "confidence": 0.977 + }, + { + "text": "got", + "start": 62.06, + "end": 62.08, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 62.08, + "end": 62.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 62.1, + "end": 62.12, + "confidence": 0.853 + }, + { + "text": "them,", + "start": 62.12, + "end": 62.14, + "confidence": 0.983 + }, + { + "text": "got", + "start": 62.14, + "end": 62.16, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 62.16, + "end": 62.18, + "confidence": 0.986 + }, + { + "text": "got", + "start": 62.18, + "end": 62.2, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 62.2, + "end": 62.22, + "confidence": 0.985 + }, + { + "text": "got", + "start": 62.22, + "end": 62.24, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 62.24, + "end": 62.26, + "confidence": 0.985 + }, + { + "text": "got", + "start": 62.26, + "end": 62.28, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 62.28, + "end": 62.3, + "confidence": 0.986 + }, + { + "text": "got", + "start": 62.3, + "end": 62.32, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 62.32, + "end": 62.34, + "confidence": 0.987 + }, + { + "text": "got", + "start": 62.34, + "end": 62.36, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 62.36, + "end": 62.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 62.38, + "end": 62.4, + "confidence": 0.88 + }, + { + "text": "them,", + "start": 62.4, + "end": 62.42, + "confidence": 0.988 + }, + { + "text": "got", + "start": 62.44, + "end": 62.46, + "confidence": 0.883 + }, + { + "text": "them,", + "start": 62.46, + "end": 62.48, + "confidence": 0.989 + }, + { + "text": "got", + "start": 62.54, + "end": 62.78, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 62.78, + "end": 62.8, + "confidence": 0.989 + }, + { + "text": "got", + "start": 62.8, + "end": 62.82, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 62.82, + "end": 62.84, + "confidence": 0.99 + }, + { + "text": "got", + "start": 62.84, + "end": 62.86, + "confidence": 0.901 + }, + { + "text": "them,", + "start": 62.86, + "end": 62.88, + "confidence": 0.99 + }, + { + "text": "got", + "start": 62.88, + "end": 62.9, + "confidence": 0.904 + }, + { + "text": "them,", + "start": 62.9, + "end": 62.92, + "confidence": 0.99 + }, + { + "text": "got", + "start": 62.92, + "end": 62.94, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 62.94, + "end": 62.96, + "confidence": 0.991 + }, + { + "text": "got", + "start": 62.96, + "end": 62.98, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 62.98, + "end": 63.0, + "confidence": 0.991 + }, + { + "text": "got", + "start": 63.0, + "end": 63.02, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 63.02, + "end": 63.04, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.04, + "end": 63.06, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 63.06, + "end": 63.08, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.08, + "end": 63.1, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 63.1, + "end": 63.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.12, + "end": 63.14, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 63.14, + "end": 63.16, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.16, + "end": 63.18, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 63.18, + "end": 63.2, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.2, + "end": 63.22, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 63.22, + "end": 63.24, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.24, + "end": 63.26, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 63.26, + "end": 63.28, + "confidence": 0.993 + }, + { + "text": "got", + "start": 63.28, + "end": 63.3, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 63.3, + "end": 63.32, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.32, + "end": 63.34, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 63.34, + "end": 63.36, + "confidence": 0.993 + }, + { + "text": "got", + "start": 63.36, + "end": 63.38, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 63.38, + "end": 63.4, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.4, + "end": 63.46, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 63.46, + "end": 63.64, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.64, + "end": 63.68, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 63.68, + "end": 63.7, + "confidence": 0.993 + }, + { + "text": "got", + "start": 63.7, + "end": 63.72, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 63.72, + "end": 63.74, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.74, + "end": 63.76, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 63.76, + "end": 63.78, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.78, + "end": 63.8, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 63.8, + "end": 63.82, + "confidence": 0.992 + }, + { + "text": "got", + "start": 63.82, + "end": 63.84, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 63.84, + "end": 64.42, + "confidence": 0.992 + }, + { + "text": "got", + "start": 64.46, + "end": 64.58, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 64.58, + "end": 64.64, + "confidence": 0.993 + }, + { + "text": "got", + "start": 64.72, + "end": 65.06, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 65.06, + "end": 65.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 65.22, + "end": 66.0, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 66.0, + "end": 66.2, + "confidence": 0.992 + }, + { + "text": "got", + "start": 66.38, + "end": 67.34, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 67.34, + "end": 67.44, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.48, + "end": 67.5, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 67.5, + "end": 67.52, + "confidence": 0.993 + }, + { + "text": "got", + "start": 67.52, + "end": 67.54, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 67.54, + "end": 67.56, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.56, + "end": 67.58, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 67.58, + "end": 67.6, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.6, + "end": 67.62, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 67.62, + "end": 67.64, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.68, + "end": 67.7, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 67.7, + "end": 67.72, + "confidence": 0.992 + }, + { + "text": "got", + "start": 67.72, + "end": 67.74, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 67.74, + "end": 67.86, + "confidence": 0.992 + }, + { + "text": "got", + "start": 68.12, + "end": 68.36, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 68.36, + "end": 68.38, + "confidence": 0.992 + }, + { + "text": "got", + "start": 68.56, + "end": 68.58, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 68.58, + "end": 68.6, + "confidence": 0.992 + }, + { + "text": "got", + "start": 68.6, + "end": 68.8, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 68.8, + "end": 69.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 69.26, + "end": 69.28, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 69.28, + "end": 69.44, + "confidence": 0.992 + }, + { + "text": "got", + "start": 69.84, + "end": 69.86, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 69.86, + "end": 70.02, + "confidence": 0.992 + }, + { + "text": "got", + "start": 70.2, + "end": 70.5, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 70.5, + "end": 71.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 71.56, + "end": 71.58, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 71.58, + "end": 71.94, + "confidence": 0.993 + }, + { + "text": "got", + "start": 71.94, + "end": 72.1, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 72.1, + "end": 72.82, + "confidence": 0.993 + }, + { + "text": "got", + "start": 72.82, + "end": 74.48, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 74.48, + "end": 74.66, + "confidence": 0.993 + }, + { + "text": "got", + "start": 74.74, + "end": 74.88, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 74.88, + "end": 74.9, + "confidence": 0.993 + }, + { + "text": "got", + "start": 74.92, + "end": 74.94, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 74.94, + "end": 74.96, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.12, + "end": 75.14, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 75.14, + "end": 75.24, + "confidence": 0.993 + }, + { + "text": "got", + "start": 75.24, + "end": 75.78, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 75.78, + "end": 76.02, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.02, + "end": 76.4, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 76.4, + "end": 76.6, + "confidence": 0.993 + }, + { + "text": "got", + "start": 76.6, + "end": 76.82, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 76.82, + "end": 77.76, + "confidence": 0.993 + }, + { + "text": "got", + "start": 78.22, + "end": 78.24, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 78.24, + "end": 78.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 80.0, + "end": 80.02, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 80.02, + "end": 82.6, + "confidence": 0.993 + }, + { + "text": "got", + "start": 82.6, + "end": 84.98, + "confidence": 0.98 + }, + { + "text": "them", + "start": 84.98, + "end": 85.0, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.0, + "end": 115.0, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04273154596576776, + "compression_ratio": 29.52, + "no_speech_prob": 0.6358686685562134, + "confidence": 0.953, + "words": [ + { + "text": "got", + "start": 85.0, + "end": 85.4, + "confidence": 0.455 + }, + { + "text": "them,", + "start": 85.4, + "end": 85.58, + "confidence": 0.951 + }, + { + "text": "got", + "start": 86.12, + "end": 86.72, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 86.72, + "end": 87.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 87.24, + "end": 87.56, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 87.56, + "end": 87.94, + "confidence": 0.994 + }, + { + "text": "got", + "start": 88.08, + "end": 88.48, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 88.48, + "end": 88.5, + "confidence": 0.992 + }, + { + "text": "got", + "start": 88.5, + "end": 88.9, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 88.9, + "end": 89.02, + "confidence": 0.99 + }, + { + "text": "got", + "start": 89.06, + "end": 89.08, + "confidence": 0.92 + }, + { + "text": "them,", + "start": 89.08, + "end": 89.1, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.1, + "end": 89.12, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 89.12, + "end": 89.14, + "confidence": 0.987 + }, + { + "text": "got", + "start": 89.58, + "end": 89.6, + "confidence": 0.907 + }, + { + "text": "them,", + "start": 89.6, + "end": 89.62, + "confidence": 0.986 + }, + { + "text": "got", + "start": 90.06, + "end": 90.08, + "confidence": 0.915 + }, + { + "text": "them,", + "start": 90.08, + "end": 90.1, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.68, + "end": 90.7, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 90.7, + "end": 90.72, + "confidence": 0.972 + }, + { + "text": "got", + "start": 91.88, + "end": 91.9, + "confidence": 0.856 + }, + { + "text": "them,", + "start": 91.9, + "end": 91.92, + "confidence": 0.965 + }, + { + "text": "got", + "start": 91.92, + "end": 91.94, + "confidence": 0.831 + }, + { + "text": "them,", + "start": 91.94, + "end": 91.96, + "confidence": 0.968 + }, + { + "text": "got", + "start": 91.96, + "end": 91.98, + "confidence": 0.855 + }, + { + "text": "them,", + "start": 91.98, + "end": 92.0, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.0, + "end": 92.02, + "confidence": 0.879 + }, + { + "text": "them,", + "start": 92.02, + "end": 92.04, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.04, + "end": 92.06, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 92.06, + "end": 92.08, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.08, + "end": 92.1, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 92.1, + "end": 92.12, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.12, + "end": 92.14, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 92.14, + "end": 92.16, + "confidence": 0.972 + }, + { + "text": "got", + "start": 92.16, + "end": 92.18, + "confidence": 0.882 + }, + { + "text": "them,", + "start": 92.18, + "end": 92.2, + "confidence": 0.972 + }, + { + "text": "got", + "start": 92.2, + "end": 92.22, + "confidence": 0.879 + }, + { + "text": "them,", + "start": 92.22, + "end": 92.24, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.24, + "end": 92.26, + "confidence": 0.879 + }, + { + "text": "them,", + "start": 92.26, + "end": 92.28, + "confidence": 0.97 + }, + { + "text": "got", + "start": 92.28, + "end": 92.3, + "confidence": 0.878 + }, + { + "text": "them,", + "start": 92.3, + "end": 92.32, + "confidence": 0.971 + }, + { + "text": "got", + "start": 92.32, + "end": 92.34, + "confidence": 0.874 + }, + { + "text": "them,", + "start": 92.34, + "end": 92.36, + "confidence": 0.971 + }, + { + "text": "got", + "start": 92.36, + "end": 92.38, + "confidence": 0.882 + }, + { + "text": "them,", + "start": 92.38, + "end": 92.4, + "confidence": 0.973 + }, + { + "text": "got", + "start": 92.4, + "end": 92.42, + "confidence": 0.881 + }, + { + "text": "them,", + "start": 92.42, + "end": 92.44, + "confidence": 0.973 + }, + { + "text": "got", + "start": 92.44, + "end": 92.46, + "confidence": 0.881 + }, + { + "text": "them,", + "start": 92.46, + "end": 92.48, + "confidence": 0.975 + }, + { + "text": "got", + "start": 92.48, + "end": 92.5, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 92.5, + "end": 92.52, + "confidence": 0.976 + }, + { + "text": "got", + "start": 92.52, + "end": 92.54, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 92.54, + "end": 92.56, + "confidence": 0.977 + }, + { + "text": "got", + "start": 92.56, + "end": 92.58, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 92.58, + "end": 92.6, + "confidence": 0.978 + }, + { + "text": "got", + "start": 92.6, + "end": 92.62, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 92.62, + "end": 92.64, + "confidence": 0.978 + }, + { + "text": "got", + "start": 92.64, + "end": 92.66, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 92.66, + "end": 92.68, + "confidence": 0.979 + }, + { + "text": "got", + "start": 92.68, + "end": 92.7, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 92.7, + "end": 92.72, + "confidence": 0.98 + }, + { + "text": "got", + "start": 92.72, + "end": 92.74, + "confidence": 0.897 + }, + { + "text": "them,", + "start": 92.74, + "end": 92.76, + "confidence": 0.98 + }, + { + "text": "got", + "start": 92.76, + "end": 92.78, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 92.78, + "end": 92.8, + "confidence": 0.982 + }, + { + "text": "got", + "start": 92.8, + "end": 92.82, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 92.82, + "end": 92.84, + "confidence": 0.983 + }, + { + "text": "got", + "start": 92.84, + "end": 92.86, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 92.86, + "end": 92.88, + "confidence": 0.983 + }, + { + "text": "got", + "start": 92.88, + "end": 92.9, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 92.9, + "end": 92.92, + "confidence": 0.984 + }, + { + "text": "got", + "start": 92.92, + "end": 92.94, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 92.94, + "end": 92.96, + "confidence": 0.985 + }, + { + "text": "got", + "start": 92.96, + "end": 92.98, + "confidence": 0.924 + }, + { + "text": "them,", + "start": 92.98, + "end": 93.0, + "confidence": 0.986 + }, + { + "text": "got", + "start": 93.26, + "end": 93.28, + "confidence": 0.929 + }, + { + "text": "them,", + "start": 93.28, + "end": 93.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 93.3, + "end": 93.32, + "confidence": 0.931 + }, + { + "text": "them,", + "start": 93.32, + "end": 93.34, + "confidence": 0.987 + }, + { + "text": "got", + "start": 93.34, + "end": 93.36, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 93.36, + "end": 93.38, + "confidence": 0.988 + }, + { + "text": "got", + "start": 93.38, + "end": 93.4, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 93.4, + "end": 93.42, + "confidence": 0.989 + }, + { + "text": "got", + "start": 94.02, + "end": 96.72, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 96.72, + "end": 96.74, + "confidence": 0.99 + }, + { + "text": "got", + "start": 96.74, + "end": 96.76, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 96.76, + "end": 96.78, + "confidence": 0.99 + }, + { + "text": "got", + "start": 96.78, + "end": 96.8, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 96.8, + "end": 96.82, + "confidence": 0.991 + }, + { + "text": "got", + "start": 96.82, + "end": 96.84, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 96.84, + "end": 96.86, + "confidence": 0.991 + }, + { + "text": "got", + "start": 96.86, + "end": 96.88, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 96.88, + "end": 96.9, + "confidence": 0.992 + }, + { + "text": "got", + "start": 96.9, + "end": 99.68, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 99.68, + "end": 99.7, + "confidence": 0.992 + }, + { + "text": "got", + "start": 99.78, + "end": 99.8, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 99.8, + "end": 100.0, + "confidence": 0.993 + }, + { + "text": "got", + "start": 100.66, + "end": 100.68, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 100.68, + "end": 101.44, + "confidence": 0.993 + }, + { + "text": "got", + "start": 101.48, + "end": 101.5, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 101.5, + "end": 101.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.52, + "end": 101.54, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 101.54, + "end": 101.56, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.56, + "end": 101.58, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 101.58, + "end": 101.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.6, + "end": 101.62, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 101.62, + "end": 101.64, + "confidence": 0.994 + }, + { + "text": "got", + "start": 101.64, + "end": 101.66, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 101.66, + "end": 101.68, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.68, + "end": 101.7, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 101.7, + "end": 101.72, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.72, + "end": 101.74, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 101.74, + "end": 101.88, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.88, + "end": 101.9, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 101.9, + "end": 101.92, + "confidence": 0.995 + }, + { + "text": "got", + "start": 101.92, + "end": 101.94, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 101.94, + "end": 101.96, + "confidence": 0.996 + }, + { + "text": "got", + "start": 101.96, + "end": 101.98, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 101.98, + "end": 102.0, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.0, + "end": 102.02, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 102.02, + "end": 102.04, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.04, + "end": 102.06, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 102.06, + "end": 102.08, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.08, + "end": 102.1, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 102.1, + "end": 102.12, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.12, + "end": 102.14, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 102.14, + "end": 102.18, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.18, + "end": 102.2, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 102.2, + "end": 102.22, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.22, + "end": 102.24, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 102.24, + "end": 102.26, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.26, + "end": 102.28, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 102.28, + "end": 102.3, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.3, + "end": 102.32, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 102.32, + "end": 102.34, + "confidence": 0.996 + }, + { + "text": "got", + "start": 102.34, + "end": 102.36, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 102.36, + "end": 103.8, + "confidence": 0.997 + }, + { + "text": "got", + "start": 103.8, + "end": 103.82, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 103.82, + "end": 105.08, + "confidence": 0.997 + }, + { + "text": "got", + "start": 105.08, + "end": 105.1, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 105.1, + "end": 110.38, + "confidence": 0.997 + }, + { + "text": "got", + "start": 110.38, + "end": 110.4, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 110.4, + "end": 111.78, + "confidence": 0.997 + }, + { + "text": "got", + "start": 111.78, + "end": 111.8, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 111.8, + "end": 114.74, + "confidence": 0.997 + }, + { + "text": "got", + "start": 114.74, + "end": 114.98, + "confidence": 0.985 + }, + { + "text": "them", + "start": 114.98, + "end": 115.0, + "confidence": 0.997 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto.cpu/empty.mp3.words.json b/tests/expected/medium_auto.cpu/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..4343ff36131a9ff2cd0f90ee1eb0dcace7c2c1e4 --- /dev/null +++ b/tests/expected/medium_auto.cpu/empty.mp3.words.json @@ -0,0 +1,46 @@ +{ + "text": " Thanks for watching!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.02, + "end": 3.06, + "text": " Thanks for watching!", + "tokens": [ + 50364, + 2561, + 337, + 1976, + 0, + 50514 + ], + "temperature": 0.0, + "avg_logprob": -0.8994035720825195, + "compression_ratio": 0.7142857142857143, + "no_speech_prob": 0.6661779880523682, + "confidence": 0.373, + "words": [ + { + "text": "Thanks", + "start": 0.02, + "end": 0.44, + "confidence": 0.06 + }, + { + "text": "for", + "start": 0.44, + "end": 1.02, + "confidence": 0.916 + }, + { + "text": "watching!", + "start": 1.02, + "end": 3.06, + "confidence": 0.936 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto.cpu/radio_short.mp3.words.json b/tests/expected/medium_auto.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b5ad9acab69d8c24acd10bd904b99b423c6c97ec --- /dev/null +++ b/tests/expected/medium_auto.cpu/radio_short.mp3.words.json @@ -0,0 +1,1616 @@ +{ + "text": "3212122222222211111111111111111111111111111111111111111111111111111111", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.04, + "end": 0.64, + "text": "3", + "tokens": [ + 18 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.029, + "words": [ + { + "text": "3", + "start": 0.04, + "end": 0.64, + "confidence": 0.029 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.52, + "end": 3.0, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.688, + "words": [ + { + "text": "2", + "start": 1.52, + "end": 3.0, + "confidence": 0.688 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.68, + "end": 5.32, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.95, + "words": [ + { + "text": "1", + "start": 3.68, + "end": 5.32, + "confidence": 0.95 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 6.5, + "end": 7.02, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.278, + "words": [ + { + "text": "2", + "start": 6.5, + "end": 7.02, + "confidence": 0.278 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 8.08, + "end": 9.68, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.702, + "words": [ + { + "text": "1", + "start": 8.08, + "end": 9.68, + "confidence": 0.702 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 10.38, + "end": 11.04, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.919, + "words": [ + { + "text": "2", + "start": 10.38, + "end": 11.04, + "confidence": 0.919 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 11.54, + "end": 13.04, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.657, + "words": [ + { + "text": "2", + "start": 11.54, + "end": 13.04, + "confidence": 0.657 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.52, + "end": 15.57, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.772, + "words": [ + { + "text": "2", + "start": 13.52, + "end": 15.57, + "confidence": 0.772 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.57, + "end": 16.98, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.905, + "words": [ + { + "text": "2", + "start": 15.57, + "end": 16.98, + "confidence": 0.905 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.52, + "end": 19.22, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.808, + "words": [ + { + "text": "2", + "start": 17.52, + "end": 19.22, + "confidence": 0.808 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 19.62, + "end": 20.68, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.626, + "words": [ + { + "text": "2", + "start": 19.62, + "end": 20.68, + "confidence": 0.626 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 22.22, + "end": 22.86, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.835, + "words": [ + { + "text": "2", + "start": 22.22, + "end": 22.86, + "confidence": 0.835 + } + ] + }, + { + "id": 12, + "seek": 0, + "start": 23.52, + "end": 24.68, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.974, + "words": [ + { + "text": "2", + "start": 23.52, + "end": 24.68, + "confidence": 0.974 + } + ] + }, + { + "id": 13, + "seek": 0, + "start": 25.7, + "end": 27.1, + "text": "2", + "tokens": [ + 17 + ], + "temperature": 0.0, + "avg_logprob": -0.28690400990572845, + "compression_ratio": 1.0, + "no_speech_prob": 0.8406417965888977, + "confidence": 0.97, + "words": [ + { + "text": "2", + "start": 25.7, + "end": 27.1, + "confidence": 0.97 + } + ] + }, + { + "id": 14, + "seek": 2800, + "start": 28.02, + "end": 28.86, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.271, + "words": [ + { + "text": "1", + "start": 28.02, + "end": 28.86, + "confidence": 0.271 + } + ] + }, + { + "id": 15, + "seek": 2800, + "start": 30.5, + "end": 31.12, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.504, + "words": [ + { + "text": "1", + "start": 30.5, + "end": 31.12, + "confidence": 0.504 + } + ] + }, + { + "id": 16, + "seek": 2800, + "start": 31.52, + "end": 33.55, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.911, + "words": [ + { + "text": "1", + "start": 31.52, + "end": 33.55, + "confidence": 0.911 + } + ] + }, + { + "id": 17, + "seek": 2800, + "start": 33.55, + "end": 35.02, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.894, + "words": [ + { + "text": "1", + "start": 33.55, + "end": 35.02, + "confidence": 0.894 + } + ] + }, + { + "id": 18, + "seek": 2800, + "start": 36.24, + "end": 37.52, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 36.24, + "end": 37.52, + "confidence": 0.957 + } + ] + }, + { + "id": 19, + "seek": 2800, + "start": 37.68, + "end": 39.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.958, + "words": [ + { + "text": "1", + "start": 37.68, + "end": 39.8, + "confidence": 0.958 + } + ] + }, + { + "id": 20, + "seek": 2800, + "start": 39.8, + "end": 41.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 39.8, + "end": 41.2, + "confidence": 0.967 + } + ] + }, + { + "id": 21, + "seek": 2800, + "start": 41.52, + "end": 44.02, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.973, + "words": [ + { + "text": "1", + "start": 41.52, + "end": 44.02, + "confidence": 0.973 + } + ] + }, + { + "id": 22, + "seek": 2800, + "start": 44.02, + "end": 45.04, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.944, + "words": [ + { + "text": "1", + "start": 44.02, + "end": 45.04, + "confidence": 0.944 + } + ] + }, + { + "id": 23, + "seek": 2800, + "start": 45.52, + "end": 47.53, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.959, + "words": [ + { + "text": "1", + "start": 45.52, + "end": 47.53, + "confidence": 0.959 + } + ] + }, + { + "id": 24, + "seek": 2800, + "start": 47.53, + "end": 48.94, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 47.53, + "end": 48.94, + "confidence": 0.966 + } + ] + }, + { + "id": 25, + "seek": 2800, + "start": 50.2, + "end": 52.06, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.968, + "words": [ + { + "text": "1", + "start": 50.2, + "end": 52.06, + "confidence": 0.968 + } + ] + }, + { + "id": 26, + "seek": 2800, + "start": 52.32, + "end": 52.84, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.964, + "words": [ + { + "text": "1", + "start": 52.32, + "end": 52.84, + "confidence": 0.964 + } + ] + }, + { + "id": 27, + "seek": 2800, + "start": 53.64, + "end": 54.9, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12449877912347967, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.022917350754141808, + "confidence": 0.944, + "words": [ + { + "text": "1", + "start": 53.64, + "end": 54.9, + "confidence": 0.944 + } + ] + }, + { + "id": 28, + "seek": 5600, + "start": 56.02, + "end": 57.06, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.377, + "words": [ + { + "text": "1", + "start": 56.02, + "end": 57.06, + "confidence": 0.377 + } + ] + }, + { + "id": 29, + "seek": 5600, + "start": 57.52, + "end": 58.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.763, + "words": [ + { + "text": "1", + "start": 57.52, + "end": 58.8, + "confidence": 0.763 + } + ] + }, + { + "id": 30, + "seek": 5600, + "start": 59.8, + "end": 61.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 59.8, + "end": 61.08, + "confidence": 0.952 + } + ] + }, + { + "id": 31, + "seek": 5600, + "start": 61.52, + "end": 63.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.949, + "words": [ + { + "text": "1", + "start": 61.52, + "end": 63.08, + "confidence": 0.949 + } + ] + }, + { + "id": 32, + "seek": 5600, + "start": 64.04, + "end": 65.18, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 64.04, + "end": 65.18, + "confidence": 0.954 + } + ] + }, + { + "id": 33, + "seek": 5600, + "start": 65.52, + "end": 66.7, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 65.52, + "end": 66.7, + "confidence": 0.952 + } + ] + }, + { + "id": 34, + "seek": 5600, + "start": 67.52, + "end": 69.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 67.52, + "end": 69.08, + "confidence": 0.966 + } + ] + }, + { + "id": 35, + "seek": 5600, + "start": 69.58, + "end": 71.65, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.958, + "words": [ + { + "text": "1", + "start": 69.58, + "end": 71.65, + "confidence": 0.958 + } + ] + }, + { + "id": 36, + "seek": 5600, + "start": 71.65, + "end": 73.62, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 71.65, + "end": 73.62, + "confidence": 0.957 + } + ] + }, + { + "id": 37, + "seek": 5600, + "start": 73.62, + "end": 75.85, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.953, + "words": [ + { + "text": "1", + "start": 73.62, + "end": 75.85, + "confidence": 0.953 + } + ] + }, + { + "id": 38, + "seek": 5600, + "start": 75.85, + "end": 77.12, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.942, + "words": [ + { + "text": "1", + "start": 75.85, + "end": 77.12, + "confidence": 0.942 + } + ] + }, + { + "id": 39, + "seek": 5600, + "start": 78.22, + "end": 78.78, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.936, + "words": [ + { + "text": "1", + "start": 78.22, + "end": 78.78, + "confidence": 0.936 + } + ] + }, + { + "id": 40, + "seek": 5600, + "start": 80.08, + "end": 80.86, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.929, + "words": [ + { + "text": "1", + "start": 80.08, + "end": 80.86, + "confidence": 0.929 + } + ] + }, + { + "id": 41, + "seek": 5600, + "start": 81.52, + "end": 82.7, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.12232361056587913, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.015843844041228294, + "confidence": 0.903, + "words": [ + { + "text": "1", + "start": 81.52, + "end": 82.7, + "confidence": 0.903 + } + ] + }, + { + "id": 42, + "seek": 8400, + "start": 84.88, + "end": 85.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.82, + "words": [ + { + "text": "1", + "start": 84.88, + "end": 85.08, + "confidence": 0.82 + } + ] + }, + { + "id": 43, + "seek": 8400, + "start": 85.58, + "end": 86.86, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.884, + "words": [ + { + "text": "1", + "start": 85.58, + "end": 86.86, + "confidence": 0.884 + } + ] + }, + { + "id": 44, + "seek": 8400, + "start": 88.02, + "end": 89.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 88.02, + "end": 89.2, + "confidence": 0.957 + } + ] + }, + { + "id": 45, + "seek": 8400, + "start": 89.52, + "end": 90.98, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 89.52, + "end": 90.98, + "confidence": 0.952 + } + ] + }, + { + "id": 46, + "seek": 8400, + "start": 92.5, + "end": 93.04, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.917, + "words": [ + { + "text": "1", + "start": 92.5, + "end": 93.04, + "confidence": 0.917 + } + ] + }, + { + "id": 47, + "seek": 8400, + "start": 93.58, + "end": 95.56, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.947, + "words": [ + { + "text": "1", + "start": 93.58, + "end": 95.56, + "confidence": 0.947 + } + ] + }, + { + "id": 48, + "seek": 8400, + "start": 95.6, + "end": 97.95, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.97, + "words": [ + { + "text": "1", + "start": 95.6, + "end": 97.95, + "confidence": 0.97 + } + ] + }, + { + "id": 49, + "seek": 8400, + "start": 97.95, + "end": 99.14, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.976, + "words": [ + { + "text": "1", + "start": 97.95, + "end": 99.14, + "confidence": 0.976 + } + ] + }, + { + "id": 50, + "seek": 8400, + "start": 99.52, + "end": 101.08, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.984, + "words": [ + { + "text": "1", + "start": 99.52, + "end": 101.08, + "confidence": 0.984 + } + ] + }, + { + "id": 51, + "seek": 8400, + "start": 101.52, + "end": 103.62, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.985, + "words": [ + { + "text": "1", + "start": 101.52, + "end": 103.62, + "confidence": 0.985 + } + ] + }, + { + "id": 52, + "seek": 8400, + "start": 103.62, + "end": 105.7, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.988, + "words": [ + { + "text": "1", + "start": 103.62, + "end": 105.7, + "confidence": 0.988 + } + ] + }, + { + "id": 53, + "seek": 8400, + "start": 106.5, + "end": 107.12, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 106.5, + "end": 107.12, + "confidence": 0.957 + } + ] + }, + { + "id": 54, + "seek": 8400, + "start": 108.5, + "end": 109.22, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.946, + "words": [ + { + "text": "1", + "start": 108.5, + "end": 109.22, + "confidence": 0.946 + } + ] + }, + { + "id": 55, + "seek": 8400, + "start": 109.52, + "end": 111.36, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10775410045276988, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.02908501960337162, + "confidence": 0.89, + "words": [ + { + "text": "1", + "start": 109.52, + "end": 111.36, + "confidence": 0.89 + } + ] + }, + { + "id": 56, + "seek": 11200, + "start": 112.98, + "end": 113.34, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.895, + "words": [ + { + "text": "1", + "start": 112.98, + "end": 113.34, + "confidence": 0.895 + } + ] + }, + { + "id": 57, + "seek": 11200, + "start": 113.88, + "end": 114.52, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.871, + "words": [ + { + "text": "1", + "start": 113.88, + "end": 114.52, + "confidence": 0.871 + } + ] + }, + { + "id": 58, + "seek": 11200, + "start": 115.94, + "end": 117.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.977, + "words": [ + { + "text": "1", + "start": 115.94, + "end": 117.2, + "confidence": 0.977 + } + ] + }, + { + "id": 59, + "seek": 11200, + "start": 117.52, + "end": 118.64, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.975, + "words": [ + { + "text": "1", + "start": 117.52, + "end": 118.64, + "confidence": 0.975 + } + ] + }, + { + "id": 60, + "seek": 11200, + "start": 120.5, + "end": 120.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 120.5, + "end": 120.8, + "confidence": 0.957 + } + ] + }, + { + "id": 61, + "seek": 11200, + "start": 121.58, + "end": 123.74, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.959, + "words": [ + { + "text": "1", + "start": 121.58, + "end": 123.74, + "confidence": 0.959 + } + ] + }, + { + "id": 62, + "seek": 11200, + "start": 123.9, + "end": 125.56, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 123.9, + "end": 125.56, + "confidence": 0.967 + } + ] + }, + { + "id": 63, + "seek": 11200, + "start": 125.56, + "end": 127.34, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.968, + "words": [ + { + "text": "1", + "start": 125.56, + "end": 127.34, + "confidence": 0.968 + } + ] + }, + { + "id": 64, + "seek": 11200, + "start": 127.52, + "end": 129.2, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.96, + "words": [ + { + "text": "1", + "start": 127.52, + "end": 129.2, + "confidence": 0.96 + } + ] + }, + { + "id": 65, + "seek": 11200, + "start": 129.52, + "end": 131.06, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.965, + "words": [ + { + "text": "1", + "start": 129.52, + "end": 131.06, + "confidence": 0.965 + } + ] + }, + { + "id": 66, + "seek": 11200, + "start": 131.52, + "end": 133.3, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.969, + "words": [ + { + "text": "1", + "start": 131.52, + "end": 133.3, + "confidence": 0.969 + } + ] + }, + { + "id": 67, + "seek": 11200, + "start": 133.52, + "end": 135.73, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 133.52, + "end": 135.73, + "confidence": 0.966 + } + ] + }, + { + "id": 68, + "seek": 11200, + "start": 135.73, + "end": 137.34, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.971, + "words": [ + { + "text": "1", + "start": 135.73, + "end": 137.34, + "confidence": 0.971 + } + ] + }, + { + "id": 69, + "seek": 11200, + "start": 137.52, + "end": 139.8, + "text": "1", + "tokens": [ + 16 + ], + "temperature": 0.0, + "avg_logprob": -0.10804855823516846, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.04961525276303291, + "confidence": 0.925, + "words": [ + { + "text": "1", + "start": 137.52, + "end": 139.8, + "confidence": 0.925 + } + ] + } + ], + "language": "zh" +} \ No newline at end of file diff --git a/tests/expected/medium_auto.cpu/smartphone.mp3.words.json b/tests/expected/medium_auto.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9faa961d5cf4a38b6ac04b76010aeaddb28cee89 --- /dev/null +++ b/tests/expected/medium_auto.cpu/smartphone.mp3.words.json @@ -0,0 +1,4802 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc, à objet inédit, rapport inédit. Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Et bien, le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 3.62, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.38, + "end": 0.58, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.58, + "end": 0.88, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.88, + "end": 1.02, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.02, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.78, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.78, + "end": 1.9, + "confidence": 0.979 + }, + { + "text": "je", + "start": 1.9, + "end": 2.24, + "confidence": 0.982 + }, + { + "text": "ne", + "start": 2.24, + "end": 2.34, + "confidence": 0.835 + }, + { + "text": "me", + "start": 2.34, + "end": 2.38, + "confidence": 0.82 + }, + { + "text": "l'étais", + "start": 2.38, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.84, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.84, + "end": 3.26, + "confidence": 0.909 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.42, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.42, + "end": 3.62, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.08, + "end": 7.92, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.08, + "end": 4.26, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.34, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.48, + "confidence": 0.565 + }, + { + "text": "la", + "start": 4.48, + "end": 4.66, + "confidence": 0.971 + }, + { + "text": "force", + "start": 4.66, + "end": 5.0, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.0, + "end": 5.2, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.88, + "confidence": 0.911 + }, + { + "text": "c'est", + "start": 5.88, + "end": 6.12, + "confidence": 0.88 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.26, + "confidence": 0.992 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.52, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.52, + "end": 7.38, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.38, + "end": 7.56, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 7.92, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.44, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.44, + "end": 8.6, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.6, + "end": 8.9, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.9, + "end": 9.1, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.1, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.32, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.32, + "end": 10.58, + "confidence": 0.956 + }, + { + "text": "elles.", + "start": 10.58, + "end": 10.88, + "confidence": 0.99 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.607 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.22, + "confidence": 0.765 + }, + { + "text": "dit", + "start": 11.22, + "end": 11.4, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.4, + "end": 11.56, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.56, + "end": 11.72, + "confidence": 0.477 + }, + { + "text": "la", + "start": 11.72, + "end": 11.78, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.78, + "end": 12.12, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.12, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.42, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.42, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 16.02, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.913, + "words": [ + { + "text": "Alors", + "start": 13.34, + "end": 13.62, + "confidence": 0.585 + }, + { + "text": "évidemment,", + "start": 13.62, + "end": 14.34, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.34, + "end": 14.38, + "confidence": 0.952 + }, + { + "text": "faudrait", + "start": 14.38, + "end": 14.74, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.74, + "end": 15.16, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.16, + "end": 15.52, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.52, + "end": 16.02, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.7, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.7, + "end": 17.06, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.06, + "end": 17.26, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.26, + "end": 17.88, + "confidence": 0.975 + }, + { + "text": "beaucoup", + "start": 17.88, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 19.84, + "end": 25.26, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 19.84, + "end": 20.22, + "confidence": 0.944 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.52, + "confidence": 0.667 + }, + { + "text": "il", + "start": 20.52, + "end": 20.6, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.6, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.84, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.84, + "end": 20.96, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 20.96, + "end": 21.26, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.68, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.68, + "end": 21.9, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.9, + "end": 22.1, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.1, + "end": 22.32, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.48, + "confidence": 0.72 + }, + { + "text": "lesquels", + "start": 22.48, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.04, + "confidence": 0.709 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.965 + }, + { + "text": "nous", + "start": 23.54, + "end": 23.72, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.72, + "end": 23.82, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.82, + "end": 24.1, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.1, + "end": 24.24, + "confidence": 0.829 + }, + { + "text": "cliquer", + "start": 24.24, + "end": 24.5, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.5, + "end": 24.66, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.66, + "end": 24.94, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.94, + "end": 25.26, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.42, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Sauf", + "start": 25.42, + "end": 25.76, + "confidence": 0.99 + }, + { + "text": "que", + "start": 25.76, + "end": 26.26, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.26, + "end": 26.66, + "confidence": 0.631 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.06, + "confidence": 0.996 + }, + { + "text": "ajoute", + "start": 27.06, + "end": 27.44, + "confidence": 0.991 + }, + { + "text": "le", + "start": 27.44, + "end": 27.62, + "confidence": 0.992 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.18, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 28.18, + "end": 28.22, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 28.22, + "end": 28.28, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.28, + "end": 28.48, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.48, + "end": 28.68, + "confidence": 0.993 + }, + { + "text": "contact", + "start": 28.68, + "end": 29.1, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.1, + "end": 29.46, + "confidence": 0.985 + }, + { + "text": "direct,", + "start": 29.46, + "end": 30.22, + "confidence": 0.995 + }, + { + "text": "plus", + "start": 30.22, + "end": 30.26, + "confidence": 0.994 + }, + { + "text": "sensible.", + "start": 30.26, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.892, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.22, + "confidence": 0.97 + }, + { + "text": "puis", + "start": 31.22, + "end": 31.36, + "confidence": 0.971 + }, + { + "text": "évidemment,", + "start": 31.36, + "end": 31.7, + "confidence": 0.875 + }, + { + "text": "il", + "start": 31.7, + "end": 31.74, + "confidence": 0.993 + }, + { + "text": "faudrait", + "start": 31.74, + "end": 31.94, + "confidence": 0.995 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.12, + "confidence": 0.84 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.34, + "confidence": 0.977 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.994 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.9, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.9, + "end": 33.18, + "confidence": 0.481 + }, + { + "text": "permettent", + "start": 33.18, + "end": 33.74, + "confidence": 0.992 + }, + { + "text": "de", + "start": 33.74, + "end": 33.96, + "confidence": 0.885 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.42, + "confidence": 0.958 + }, + { + "text": "le", + "start": 34.42, + "end": 34.52, + "confidence": 0.775 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.8, + "confidence": 0.984 + }, + { + "text": "touffu", + "start": 34.8, + "end": 35.32, + "confidence": 0.741 + }, + { + "text": "de", + "start": 35.32, + "end": 35.72, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.72, + "end": 35.78, + "confidence": 0.992 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.994 + }, + { + "text": "web", + "start": 36.24, + "end": 36.6, + "confidence": 0.854 + }, + { + "text": "pour", + "start": 36.6, + "end": 36.78, + "confidence": 0.585 + }, + { + "text": "aller", + "start": 36.78, + "end": 36.98, + "confidence": 0.987 + }, + { + "text": "directement", + "start": 36.98, + "end": 37.52, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.52, + "end": 37.68, + "confidence": 0.967 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.82, + "end": 46.54, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.974, + "words": [ + { + "text": "Bref,", + "start": 37.82, + "end": 38.76, + "confidence": 0.987 + }, + { + "text": "tout", + "start": 38.76, + "end": 38.98, + "confidence": 0.711 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.42, + "end": 39.7, + "confidence": 0.993 + }, + { + "text": "sont", + "start": 39.7, + "end": 39.88, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.88, + "end": 40.16, + "confidence": 0.991 + }, + { + "text": "conditions", + "start": 40.16, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.68, + "end": 40.96, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.46, + "confidence": 0.997 + }, + { + "text": "de", + "start": 41.46, + "end": 41.6, + "confidence": 0.998 + }, + { + "text": "créer", + "start": 41.6, + "end": 42.06, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.06, + "end": 42.38, + "confidence": 0.998 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.6, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.6, + "end": 42.8, + "confidence": 0.779 + }, + { + "text": "Nicolas", + "start": 42.8, + "end": 43.26, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 43.26, + "end": 43.5, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.5, + "end": 43.7, + "confidence": 0.983 + }, + { + "text": "est", + "start": 43.7, + "end": 43.88, + "confidence": 0.991 + }, + { + "text": "vraisemblablement", + "start": 43.88, + "end": 44.98, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 44.98, + "end": 45.38, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 45.38, + "end": 45.7, + "confidence": 0.969 + }, + { + "text": "l'histoire", + "start": 45.7, + "end": 45.98, + "confidence": 0.957 + }, + { + "text": "de", + "start": 45.98, + "end": 46.18, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.18, + "end": 46.54, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 46.54, + "end": 48.82, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 46.54, + "end": 47.24, + "confidence": 0.846 + }, + { + "text": "ça,", + "start": 47.24, + "end": 47.72, + "confidence": 0.935 + }, + { + "text": "ça", + "start": 47.72, + "end": 47.76, + "confidence": 0.977 + }, + { + "text": "soulève", + "start": 47.76, + "end": 47.84, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.84, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.26, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.26, + "end": 48.82, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.22, + "end": 55.46, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit?", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.988, + "words": [ + { + "text": "Est-ce", + "start": 49.22, + "end": 49.62, + "confidence": 0.982 + }, + { + "text": "que", + "start": 49.62, + "end": 49.72, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.72, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 49.98, + "confidence": 0.999 + }, + { + "text": "que", + "start": 49.98, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.991 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.8, + "confidence": 0.996 + }, + { + "text": "induit", + "start": 51.8, + "end": 52.32, + "confidence": 0.977 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.983 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.72, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 52.72, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.44, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.66, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.66, + "end": 54.02, + "confidence": 0.916 + }, + { + "text": "aussi", + "start": 54.02, + "end": 54.54, + "confidence": 0.995 + }, + { + "text": "un", + "start": 54.54, + "end": 54.7, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 54.7, + "end": 55.0, + "confidence": 0.996 + }, + { + "text": "inédit?", + "start": 55.0, + "end": 55.46, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.46, + "end": 63.12, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone?", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.965, + "words": [ + { + "text": "Je", + "start": 55.46, + "end": 55.9, + "confidence": 0.88 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.988 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.24, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.24, + "end": 56.36, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.36, + "end": 56.42, + "confidence": 0.99 + }, + { + "text": "le", + "start": 56.42, + "end": 56.58, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.88, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.88, + "end": 57.04, + "confidence": 0.986 + }, + { + "text": "a", + "start": 57.04, + "end": 57.18, + "confidence": 0.989 + }, + { + "text": "au", + "start": 57.18, + "end": 57.28, + "confidence": 0.968 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.994 + }, + { + "text": "est", + "start": 57.6, + "end": 57.92, + "confidence": 0.95 + }, + { + "text": "comparable", + "start": 57.92, + "end": 58.24, + "confidence": 0.997 + }, + { + "text": "à", + "start": 58.24, + "end": 58.48, + "confidence": 0.949 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.9, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.9, + "end": 59.32, + "confidence": 0.929 + }, + { + "text": "à", + "start": 59.32, + "end": 59.46, + "confidence": 0.958 + }, + { + "text": "d'autres", + "start": 59.46, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.96, + "confidence": 0.991 + }, + { + "text": "techniques", + "start": 59.96, + "end": 60.46, + "confidence": 0.983 + }, + { + "text": "comme", + "start": 60.46, + "end": 60.88, + "confidence": 0.587 + }, + { + "text": "la", + "start": 60.88, + "end": 61.5, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.5, + "end": 62.06, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.06, + "end": 62.36, + "confidence": 0.945 + }, + { + "text": "le", + "start": 62.36, + "end": 62.68, + "confidence": 0.998 + }, + { + "text": "téléphone?", + "start": 62.68, + "end": 63.12, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 63.36, + "end": 66.66, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.936, + "words": [ + { + "text": "Il", + "start": 63.36, + "end": 65.42, + "confidence": 0.779 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.48, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.48, + "end": 65.54, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 65.54, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.22, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.22, + "end": 66.42, + "confidence": 0.601 + }, + { + "text": "fait.", + "start": 66.42, + "end": 66.66, + "confidence": 0.996 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.52, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.953, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 66.98, + "confidence": 0.599 + }, + { + "text": "donc", + "start": 66.98, + "end": 67.08, + "confidence": 0.901 + }, + { + "text": "cette", + "start": 67.08, + "end": 67.28, + "confidence": 0.71 + }, + { + "text": "espèce", + "start": 67.28, + "end": 67.54, + "confidence": 0.995 + }, + { + "text": "de", + "start": 67.54, + "end": 67.68, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.68, + "end": 68.48, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.66, + "confidence": 0.98 + }, + { + "text": "la", + "start": 68.66, + "end": 68.94, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.94, + "end": 69.22, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.22, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.24, + "confidence": 0.997 + }, + { + "text": "c'est", + "start": 70.24, + "end": 70.38, + "confidence": 0.98 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.64, + "confidence": 0.978 + }, + { + "text": "et", + "start": 70.64, + "end": 70.76, + "confidence": 0.964 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.52, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 4884, + "start": 71.62, + "end": 76.48, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet,", + "tokens": [ + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 1719, + 368, + 8603, + 14964, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.813, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.86, + "confidence": 0.512 + }, + { + "text": "qu'on", + "start": 71.86, + "end": 72.12, + "confidence": 0.937 + }, + { + "text": "a", + "start": 72.12, + "end": 72.44, + "confidence": 0.982 + }, + { + "text": "l'impression,", + "start": 72.44, + "end": 73.56, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 73.56, + "end": 73.84, + "confidence": 0.964 + }, + { + "text": "le", + "start": 73.84, + "end": 74.0, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.0, + "end": 74.2, + "confidence": 0.998 + }, + { + "text": "les", + "start": 74.2, + "end": 74.4, + "confidence": 0.994 + }, + { + "text": "utilisateurs", + "start": 74.4, + "end": 74.84, + "confidence": 0.997 + }, + { + "text": "et", + "start": 74.84, + "end": 74.96, + "confidence": 0.329 + }, + { + "text": "les", + "start": 74.96, + "end": 75.0, + "confidence": 0.773 + }, + { + "text": "services,", + "start": 75.0, + "end": 75.22, + "confidence": 0.214 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.42, + "confidence": 0.766 + }, + { + "text": "dépendants", + "start": 75.42, + "end": 75.96, + "confidence": 0.789 + }, + { + "text": "de", + "start": 75.96, + "end": 76.08, + "confidence": 0.986 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.996 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.48, + "confidence": 0.996 + } + ] + }, + { + "id": 16, + "seek": 7684, + "start": 76.86, + "end": 83.26, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.917, + "words": [ + { + "text": "d'induire", + "start": 76.86, + "end": 77.08, + "confidence": 0.841 + }, + { + "text": "en", + "start": 77.08, + "end": 77.24, + "confidence": 0.618 + }, + { + "text": "fait", + "start": 77.24, + "end": 77.34, + "confidence": 0.994 + }, + { + "text": "une", + "start": 77.34, + "end": 77.52, + "confidence": 0.983 + }, + { + "text": "espèce", + "start": 77.52, + "end": 77.88, + "confidence": 0.996 + }, + { + "text": "de", + "start": 77.88, + "end": 78.48, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.48, + "end": 78.6, + "confidence": 0.596 + }, + { + "text": "de", + "start": 78.6, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.74, + "confidence": 0.967 + }, + { + "text": "le", + "start": 79.74, + "end": 79.92, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 79.92, + "end": 80.64, + "confidence": 0.992 + }, + { + "text": "qui", + "start": 80.64, + "end": 81.1, + "confidence": 0.696 + }, + { + "text": "rend", + "start": 81.1, + "end": 81.64, + "confidence": 0.91 + }, + { + "text": "de", + "start": 81.64, + "end": 81.78, + "confidence": 0.712 + }, + { + "text": "l'ampleur", + "start": 81.78, + "end": 82.02, + "confidence": 0.987 + }, + { + "text": "et", + "start": 82.02, + "end": 82.12, + "confidence": 0.931 + }, + { + "text": "qui", + "start": 82.12, + "end": 82.24, + "confidence": 0.976 + }, + { + "text": "amène", + "start": 82.24, + "end": 82.36, + "confidence": 0.973 + }, + { + "text": "aussi", + "start": 82.36, + "end": 82.56, + "confidence": 0.938 + }, + { + "text": "à", + "start": 82.56, + "end": 82.62, + "confidence": 0.941 + }, + { + "text": "des", + "start": 82.62, + "end": 82.72, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.72, + "end": 82.9, + "confidence": 0.993 + }, + { + "text": "de", + "start": 82.9, + "end": 83.02, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.02, + "end": 83.26, + "confidence": 0.883 + } + ] + }, + { + "id": 17, + "seek": 7684, + "start": 83.94, + "end": 87.8, + "text": " Donc, à objet inédit, rapport inédit.", + "tokens": [ + 7477, + 11, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.962, + "words": [ + { + "text": "Donc,", + "start": 83.94, + "end": 84.94, + "confidence": 0.971 + }, + { + "text": "à", + "start": 84.94, + "end": 84.98, + "confidence": 0.88 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.36, + "confidence": 0.828 + }, + { + "text": "inédit,", + "start": 85.36, + "end": 86.56, + "confidence": 0.993 + }, + { + "text": "rapport", + "start": 86.56, + "end": 87.0, + "confidence": 0.981 + }, + { + "text": "inédit.", + "start": 87.0, + "end": 87.8, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7684, + "start": 88.02, + "end": 95.14, + "text": " Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.972, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.48, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 88.48, + "end": 88.86, + "confidence": 0.975 + }, + { + "text": "rapport,", + "start": 88.86, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.84, + "confidence": 0.996 + }, + { + "text": "crois", + "start": 89.84, + "end": 89.88, + "confidence": 0.984 + }, + { + "text": "Nicolas,", + "start": 89.88, + "end": 90.54, + "confidence": 0.681 + }, + { + "text": "serait", + "start": 90.54, + "end": 90.94, + "confidence": 0.885 + }, + { + "text": "caractérisé", + "start": 90.94, + "end": 91.8, + "confidence": 0.993 + }, + { + "text": "par", + "start": 91.8, + "end": 92.12, + "confidence": 0.997 + }, + { + "text": "un", + "start": 92.12, + "end": 92.44, + "confidence": 0.997 + }, + { + "text": "mélange", + "start": 92.44, + "end": 92.98, + "confidence": 0.999 + }, + { + "text": "de", + "start": 92.98, + "end": 93.4, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.4, + "end": 94.24, + "confidence": 0.953 + }, + { + "text": "et", + "start": 94.24, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.68, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.68, + "end": 95.14, + "confidence": 0.993 + } + ] + }, + { + "id": 19, + "seek": 7684, + "start": 95.78, + "end": 102.86, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.941, + "words": [ + { + "text": "Bon,", + "start": 95.78, + "end": 96.38, + "confidence": 0.792 + }, + { + "text": "en", + "start": 96.38, + "end": 96.52, + "confidence": 0.998 + }, + { + "text": "vrai,", + "start": 96.52, + "end": 97.14, + "confidence": 0.994 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.998 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.997 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.08, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.08, + "end": 98.6, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.6, + "end": 98.7, + "confidence": 0.768 + }, + { + "text": "finement", + "start": 98.7, + "end": 99.32, + "confidence": 0.849 + }, + { + "text": "toute", + "start": 99.32, + "end": 99.7, + "confidence": 0.984 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.06, + "confidence": 0.997 + }, + { + "text": "des", + "start": 100.06, + "end": 100.24, + "confidence": 0.998 + }, + { + "text": "objets", + "start": 100.24, + "end": 100.48, + "confidence": 0.999 + }, + { + "text": "techniques", + "start": 100.48, + "end": 101.02, + "confidence": 0.984 + }, + { + "text": "et", + "start": 101.02, + "end": 101.48, + "confidence": 0.527 + }, + { + "text": "de", + "start": 101.48, + "end": 101.68, + "confidence": 0.994 + }, + { + "text": "leur", + "start": 101.68, + "end": 101.84, + "confidence": 0.833 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.32, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.32, + "end": 102.48, + "confidence": 0.99 + }, + { + "text": "nos", + "start": 102.48, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.86, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 7684, + "start": 102.9, + "end": 105.74, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.981, + "words": [ + { + "text": "pour", + "start": 102.9, + "end": 103.06, + "confidence": 0.822 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.66, + "confidence": 0.997 + }, + { + "text": "si", + "start": 103.66, + "end": 103.76, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.997 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.26, + "end": 104.74, + "confidence": 0.998 + }, + { + "text": "totalement", + "start": 104.74, + "end": 105.3, + "confidence": 0.999 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.74, + "confidence": 0.999 + } + ] + }, + { + "id": 21, + "seek": 10584, + "start": 106.1, + "end": 109.34, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.94, + "words": [ + { + "text": "Mais", + "start": 106.1, + "end": 106.36, + "confidence": 0.947 + }, + { + "text": "j'ai", + "start": 106.36, + "end": 106.92, + "confidence": 0.941 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.36, + "confidence": 0.996 + }, + { + "text": "comme", + "start": 107.36, + "end": 107.56, + "confidence": 0.641 + }, + { + "text": "ça", + "start": 107.56, + "end": 107.82, + "confidence": 0.978 + }, + { + "text": "que", + "start": 107.82, + "end": 107.96, + "confidence": 0.976 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.46, + "confidence": 0.985 + }, + { + "text": "ne", + "start": 108.46, + "end": 108.66, + "confidence": 0.726 + }, + { + "text": "se", + "start": 108.66, + "end": 108.7, + "confidence": 0.991 + }, + { + "text": "trompe", + "start": 108.7, + "end": 108.88, + "confidence": 0.995 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.34, + "confidence": 0.991 + } + ] + }, + { + "id": 22, + "seek": 10584, + "start": 109.88, + "end": 114.98, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.969, + "words": [ + { + "text": "Pour", + "start": 109.88, + "end": 110.08, + "confidence": 0.997 + }, + { + "text": "autant", + "start": 110.08, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.42, + "confidence": 0.988 + }, + { + "text": "je", + "start": 110.42, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 111.14, + "confidence": 0.963 + }, + { + "text": "il", + "start": 111.14, + "end": 111.18, + "confidence": 0.997 + }, + { + "text": "y", + "start": 111.18, + "end": 111.32, + "confidence": 0.992 + }, + { + "text": "a", + "start": 111.32, + "end": 111.36, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.36, + "end": 111.68, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.68, + "end": 111.88, + "confidence": 0.974 + }, + { + "text": "de", + "start": 111.88, + "end": 112.06, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.06, + "end": 112.6, + "confidence": 0.799 + }, + { + "text": "autour", + "start": 112.6, + "end": 112.94, + "confidence": 0.995 + }, + { + "text": "de", + "start": 112.94, + "end": 113.46, + "confidence": 0.997 + }, + { + "text": "la", + "start": 113.46, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.86, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.86, + "end": 114.06, + "confidence": 0.765 + }, + { + "text": "même", + "start": 114.06, + "end": 114.44, + "confidence": 0.996 + }, + { + "text": "du", + "start": 114.44, + "end": 114.6, + "confidence": 0.995 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 114.98, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 10584, + "start": 115.34, + "end": 119.84, + "text": " Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.972, + "words": [ + { + "text": "Mais", + "start": 115.34, + "end": 115.72, + "confidence": 0.994 + }, + { + "text": "la", + "start": 115.72, + "end": 116.02, + "confidence": 0.937 + }, + { + "text": "dépendance", + "start": 116.02, + "end": 116.4, + "confidence": 0.997 + }, + { + "text": "n'était", + "start": 116.4, + "end": 116.62, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.16, + "confidence": 0.995 + }, + { + "text": "même", + "start": 117.16, + "end": 117.46, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 117.46, + "end": 117.78, + "confidence": 0.999 + }, + { + "text": "Donc", + "start": 117.78, + "end": 117.98, + "confidence": 0.804 + }, + { + "text": "le", + "start": 117.98, + "end": 118.34, + "confidence": 0.715 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.62, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.62, + "end": 118.78, + "confidence": 0.975 + }, + { + "text": "plus", + "start": 118.78, + "end": 118.94, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.12, + "confidence": 0.987 + }, + { + "text": "pas", + "start": 119.12, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.38, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.38, + "end": 119.56, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.56, + "end": 119.84, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 10584, + "start": 119.98, + "end": 123.02, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.991, + "words": [ + { + "text": "On", + "start": 119.98, + "end": 120.18, + "confidence": 0.996 + }, + { + "text": "peut", + "start": 120.18, + "end": 120.38, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.38, + "end": 120.66, + "confidence": 0.99 + }, + { + "text": "sa", + "start": 120.66, + "end": 120.88, + "confidence": 0.985 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.46, + "confidence": 0.984 + }, + { + "text": "en", + "start": 121.46, + "end": 121.56, + "confidence": 0.989 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.74, + "confidence": 0.998 + }, + { + "text": "besoin", + "start": 121.74, + "end": 122.1, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.1, + "end": 122.34, + "confidence": 0.987 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.68, + "confidence": 0.989 + }, + { + "text": "de", + "start": 122.68, + "end": 122.8, + "confidence": 0.998 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.02, + "confidence": 0.989 + } + ] + }, + { + "id": 25, + "seek": 10584, + "start": 123.28, + "end": 126.36, + "text": " Et bien, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3610, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.895, + "words": [ + { + "text": "Et", + "start": 123.28, + "end": 123.46, + "confidence": 0.667 + }, + { + "text": "bien,", + "start": 123.46, + "end": 123.86, + "confidence": 0.469 + }, + { + "text": "le", + "start": 123.86, + "end": 123.98, + "confidence": 0.996 + }, + { + "text": "soir,", + "start": 123.98, + "end": 124.68, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 124.68, + "end": 124.9, + "confidence": 0.997 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.998 + }, + { + "text": "va", + "start": 125.02, + "end": 125.14, + "confidence": 0.996 + }, + { + "text": "se", + "start": 125.14, + "end": 125.38, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.38, + "end": 125.8, + "confidence": 0.987 + }, + { + "text": "on", + "start": 125.8, + "end": 126.02, + "confidence": 0.995 + }, + { + "text": "la", + "start": 126.02, + "end": 126.22, + "confidence": 0.802 + }, + { + "text": "laisse.", + "start": 126.22, + "end": 126.36, + "confidence": 0.999 + } + ] + }, + { + "id": 26, + "seek": 10584, + "start": 126.98, + "end": 130.48, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.914, + "words": [ + { + "text": "On", + "start": 126.98, + "end": 127.32, + "confidence": 0.954 + }, + { + "text": "ne", + "start": 127.32, + "end": 127.36, + "confidence": 0.803 + }, + { + "text": "l'a", + "start": 127.36, + "end": 127.48, + "confidence": 0.974 + }, + { + "text": "pas", + "start": 127.48, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.8, + "confidence": 0.996 + }, + { + "text": "la", + "start": 127.8, + "end": 128.06, + "confidence": 0.994 + }, + { + "text": "main", + "start": 128.06, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.44, + "confidence": 0.935 + }, + { + "text": "on", + "start": 128.44, + "end": 128.62, + "confidence": 0.997 + }, + { + "text": "est", + "start": 128.62, + "end": 128.68, + "confidence": 0.993 + }, + { + "text": "au", + "start": 128.68, + "end": 129.04, + "confidence": 0.984 + }, + { + "text": "lit,", + "start": 129.04, + "end": 129.14, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.14, + "end": 129.26, + "confidence": 0.427 + }, + { + "text": "ne", + "start": 129.26, + "end": 129.3, + "confidence": 0.963 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.5, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 129.5, + "end": 129.68, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.68, + "end": 129.86, + "confidence": 0.674 + }, + { + "text": "chiottes.", + "start": 129.86, + "end": 130.48, + "confidence": 0.828 + } + ] + }, + { + "id": 27, + "seek": 13084, + "start": 130.86, + "end": 136.9, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.955, + "words": [ + { + "text": "On", + "start": 130.86, + "end": 131.04, + "confidence": 0.983 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.28, + "end": 131.48, + "confidence": 0.996 + }, + { + "text": "énervé", + "start": 131.48, + "end": 132.22, + "confidence": 0.906 + }, + { + "text": "par", + "start": 132.22, + "end": 132.44, + "confidence": 0.991 + }, + { + "text": "son", + "start": 132.44, + "end": 132.7, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.7, + "end": 133.1, + "confidence": 0.832 + }, + { + "text": "qui", + "start": 133.1, + "end": 133.34, + "confidence": 0.919 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.76, + "confidence": 0.991 + }, + { + "text": "la", + "start": 133.76, + "end": 133.8, + "confidence": 0.992 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.08, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.08, + "end": 134.14, + "confidence": 0.997 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.6, + "confidence": 0.985 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.82, + "confidence": 0.981 + }, + { + "text": "une", + "start": 134.82, + "end": 135.2, + "confidence": 0.87 + }, + { + "text": "heure", + "start": 135.2, + "end": 135.36, + "confidence": 0.998 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.54, + "confidence": 0.991 + }, + { + "text": "soir", + "start": 135.54, + "end": 135.8, + "confidence": 0.996 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.96, + "confidence": 0.79 + }, + { + "text": "discuter", + "start": 135.96, + "end": 136.28, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.28, + "end": 136.48, + "confidence": 0.996 + }, + { + "text": "un", + "start": 136.48, + "end": 136.6, + "confidence": 0.999 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.9, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 13084, + "start": 137.28, + "end": 141.88, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.959, + "words": [ + { + "text": "Mais", + "start": 137.28, + "end": 137.46, + "confidence": 0.993 + }, + { + "text": "ça", + "start": 137.46, + "end": 137.68, + "confidence": 0.938 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.94, + "confidence": 0.998 + }, + { + "text": "ressemblait", + "start": 137.94, + "end": 138.4, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 138.4, + "end": 138.76, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 138.94, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.1, + "confidence": 0.986 + }, + { + "text": "peut", + "start": 139.1, + "end": 139.48, + "confidence": 0.988 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.12, + "confidence": 0.997 + }, + { + "text": "à", + "start": 140.12, + "end": 140.32, + "confidence": 0.498 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.855 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.68, + "confidence": 0.989 + }, + { + "text": "même", + "start": 140.68, + "end": 140.94, + "confidence": 0.984 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.34, + "confidence": 0.998 + }, + { + "text": "aujourd'hui,", + "start": 141.34, + "end": 141.88, + "confidence": 0.988 + } + ] + }, + { + "id": 29, + "seek": 13084, + "start": 142.14, + "end": 146.3, + "text": " continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe,", + "tokens": [ + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.963, + "words": [ + { + "text": "continuellement", + "start": 142.14, + "end": 142.94, + "confidence": 0.971 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.18, + "confidence": 0.805 + }, + { + "text": "son", + "start": 143.18, + "end": 143.38, + "confidence": 0.995 + }, + { + "text": "smartphone", + "start": 143.38, + "end": 143.76, + "confidence": 0.978 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.94, + "confidence": 0.979 + }, + { + "text": "la", + "start": 143.94, + "end": 144.0, + "confidence": 0.996 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.36, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 144.36, + "end": 144.52, + "confidence": 0.835 + }, + { + "text": "si", + "start": 144.52, + "end": 144.64, + "confidence": 0.975 + }, + { + "text": "c'était", + "start": 144.64, + "end": 144.82, + "confidence": 0.991 + }, + { + "text": "une", + "start": 144.82, + "end": 145.06, + "confidence": 0.989 + }, + { + "text": "sorte", + "start": 145.06, + "end": 145.22, + "confidence": 0.997 + }, + { + "text": "de", + "start": 145.22, + "end": 145.3, + "confidence": 0.982 + }, + { + "text": "pacemaker", + "start": 145.3, + "end": 145.82, + "confidence": 0.917 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.3, + "confidence": 0.992 + } + ] + }, + { + "id": 30, + "seek": 13084, + "start": 146.34, + "end": 148.84, + "text": " comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.989, + "words": [ + { + "text": "comme", + "start": 146.34, + "end": 146.58, + "confidence": 0.996 + }, + { + "text": "si", + "start": 146.58, + "end": 146.76, + "confidence": 0.994 + }, + { + "text": "le", + "start": 146.76, + "end": 146.84, + "confidence": 0.997 + }, + { + "text": "lâcher", + "start": 146.84, + "end": 147.36, + "confidence": 0.969 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.56, + "confidence": 0.993 + }, + { + "text": "entraîner", + "start": 147.56, + "end": 147.86, + "confidence": 0.978 + }, + { + "text": "sa", + "start": 147.86, + "end": 148.0, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.0, + "end": 148.22, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.22, + "end": 148.84, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 13084, + "start": 149.04, + "end": 151.96, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.974, + "words": [ + { + "text": "Bon,", + "start": 149.04, + "end": 149.28, + "confidence": 0.918 + }, + { + "text": "je", + "start": 149.28, + "end": 149.32, + "confidence": 0.934 + }, + { + "text": "dis", + "start": 149.32, + "end": 149.46, + "confidence": 0.988 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.994 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.74, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.74, + "end": 149.88, + "confidence": 0.995 + }, + { + "text": "môme,", + "start": 149.88, + "end": 150.32, + "confidence": 0.998 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.52, + "confidence": 0.791 + }, + { + "text": "c'est", + "start": 150.52, + "end": 150.82, + "confidence": 0.981 + }, + { + "text": "évidemment", + "start": 150.82, + "end": 151.14, + "confidence": 0.98 + }, + { + "text": "valable", + "start": 151.14, + "end": 151.48, + "confidence": 0.998 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.62, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 151.62, + "end": 151.76, + "confidence": 0.999 + }, + { + "text": "aussi.", + "start": 151.76, + "end": 151.96, + "confidence": 0.996 + } + ] + }, + { + "id": 32, + "seek": 13084, + "start": 152.34, + "end": 158.22, + "text": " Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais?", + "tokens": [ + 7477, + 11, + 18018, + 294, + 7811, + 270, + 13, + 413, + 6, + 19947, + 13, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.948, + "words": [ + { + "text": "Donc,", + "start": 152.34, + "end": 153.46, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 153.46, + "end": 153.66, + "confidence": 0.976 + }, + { + "text": "inédit.", + "start": 153.66, + "end": 154.24, + "confidence": 0.996 + }, + { + "text": "D'accord.", + "start": 154.24, + "end": 155.48, + "confidence": 0.978 + }, + { + "text": "Mais", + "start": 155.48, + "end": 155.82, + "confidence": 0.557 + }, + { + "text": "pourquoi", + "start": 155.82, + "end": 156.32, + "confidence": 0.994 + }, + { + "text": "a-t-on", + "start": 156.32, + "end": 156.68, + "confidence": 0.94 + }, + { + "text": "l'impression", + "start": 156.68, + "end": 157.06, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.26, + "confidence": 0.995 + }, + { + "text": "n'en", + "start": 157.26, + "end": 157.44, + "confidence": 0.878 + }, + { + "text": "sortira", + "start": 157.44, + "end": 157.9, + "confidence": 0.989 + }, + { + "text": "jamais?", + "start": 157.9, + "end": 158.22, + "confidence": 0.998 + } + ] + }, + { + "id": 33, + "seek": 15884, + "start": 158.86, + "end": 165.32, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux?", + "tokens": [ + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.962, + "words": [ + { + "text": "Est-ce", + "start": 158.86, + "end": 159.34, + "confidence": 0.981 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.997 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.62, + "confidence": 0.999 + }, + { + "text": "en", + "start": 159.62, + "end": 159.78, + "confidence": 0.97 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.12, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.12, + "end": 160.34, + "confidence": 0.998 + }, + { + "text": "faute", + "start": 160.34, + "end": 160.66, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.66, + "end": 160.94, + "confidence": 0.995 + }, + { + "text": "les", + "start": 160.94, + "end": 161.28, + "confidence": 0.995 + }, + { + "text": "gens", + "start": 161.28, + "end": 161.44, + "confidence": 1.0 + }, + { + "text": "qui", + "start": 161.44, + "end": 161.58, + "confidence": 0.984 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.72, + "confidence": 0.998 + }, + { + "text": "créé", + "start": 161.72, + "end": 162.3, + "confidence": 0.99 + }, + { + "text": "cet", + "start": 162.3, + "end": 162.46, + "confidence": 0.852 + }, + { + "text": "outil", + "start": 162.46, + "end": 162.78, + "confidence": 0.99 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.34, + "confidence": 0.994 + }, + { + "text": "et", + "start": 163.34, + "end": 163.5, + "confidence": 0.954 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.86, + "confidence": 0.992 + }, + { + "text": "et", + "start": 163.86, + "end": 163.92, + "confidence": 0.54 + }, + { + "text": "diabolique", + "start": 163.92, + "end": 164.4, + "confidence": 0.951 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.703 + }, + { + "text": "que", + "start": 164.66, + "end": 164.84, + "confidence": 0.99 + }, + { + "text": "merveilleux?", + "start": 164.84, + "end": 165.32, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15884, + "start": 166.34, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.984, + "words": [ + { + "text": "Les", + "start": 166.34, + "end": 167.04, + "confidence": 0.926 + }, + { + "text": "économistes", + "start": 167.04, + "end": 167.48, + "confidence": 0.998 + }, + { + "text": "parlent", + "start": 167.48, + "end": 167.68, + "confidence": 0.995 + }, + { + "text": "de", + "start": 167.68, + "end": 167.82, + "confidence": 0.995 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.36, + "confidence": 0.985 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.997 + }, + { + "text": "sentier.", + "start": 168.5, + "end": 168.82, + "confidence": 0.978 + } + ] + }, + { + "id": 35, + "seek": 16884, + "start": 168.86, + "end": 177.42, + "text": " C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 50364, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 871, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.10415018598238628, + "compression_ratio": 1.3916666666666666, + "no_speech_prob": 3.958350498578511e-05, + "confidence": 0.93, + "words": [ + { + "text": "C'est", + "start": 168.86, + "end": 169.1, + "confidence": 0.952 + }, + { + "text": "l'idée", + "start": 169.1, + "end": 169.34, + "confidence": 0.992 + }, + { + "text": "qu'on", + "start": 169.34, + "end": 169.7, + "confidence": 0.825 + }, + { + "text": "est", + "start": 169.7, + "end": 169.88, + "confidence": 0.631 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.0, + "confidence": 0.993 + }, + { + "text": "un", + "start": 170.0, + "end": 170.54, + "confidence": 0.998 + }, + { + "text": "sentier", + "start": 170.54, + "end": 170.78, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 170.78, + "end": 170.84, + "confidence": 0.961 + }, + { + "text": "a", + "start": 170.84, + "end": 170.94, + "confidence": 0.98 + }, + { + "text": "été", + "start": 170.94, + "end": 171.12, + "confidence": 0.996 + }, + { + "text": "établi,", + "start": 171.12, + "end": 171.9, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.12, + "confidence": 0.989 + }, + { + "text": "volontairement", + "start": 172.12, + "end": 172.72, + "confidence": 0.965 + }, + { + "text": "en", + "start": 172.72, + "end": 172.8, + "confidence": 0.941 + }, + { + "text": "marchant", + "start": 172.8, + "end": 173.06, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.06, + "end": 174.24, + "confidence": 0.972 + }, + { + "text": "soit", + "start": 174.24, + "end": 174.92, + "confidence": 0.996 + }, + { + "text": "en", + "start": 174.92, + "end": 175.36, + "confidence": 0.991 + }, + { + "text": "définissant", + "start": 175.36, + "end": 175.5, + "confidence": 0.983 + }, + { + "text": "des", + "start": 175.5, + "end": 175.76, + "confidence": 0.984 + }, + { + "text": "bornes,", + "start": 175.76, + "end": 176.04, + "confidence": 0.975 + }, + { + "text": "en", + "start": 176.04, + "end": 176.08, + "confidence": 0.68 + }, + { + "text": "définissant", + "start": 176.08, + "end": 176.58, + "confidence": 0.997 + }, + { + "text": "une", + "start": 176.58, + "end": 176.82, + "confidence": 0.944 + }, + { + "text": "signalétique.", + "start": 176.82, + "end": 177.42, + "confidence": 0.642 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/bonjour.wav.words.json b/tests/expected/medium_auto/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e0e9ea07a37c2520ea8ae3e722c2da4b0422e019 --- /dev/null +++ b/tests/expected/medium_auto/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7049755573272705, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08610370755195618, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/medium_auto/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..5f571a743d7dc16d453a1c173965af6e6189638d --- /dev/null +++ b/tests/expected/medium_auto/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,134 @@ +{ + "text": " Bonjour ! Est-ce que vous allez bien ? Bonjour ! Est-ce que vous allez bien ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.46, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.3616662392249474, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.07169149070978165, + "confidence": 0.936, + "words": [ + { + "text": "Bonjour !", + "start": 0.42, + "end": 1.92, + "confidence": 0.874 + }, + { + "text": "Est-ce", + "start": 1.92, + "end": 2.16, + "confidence": 0.885 + }, + { + "text": "que", + "start": 2.16, + "end": 2.24, + "confidence": 0.988 + }, + { + "text": "vous", + "start": 2.24, + "end": 2.38, + "confidence": 0.996 + }, + { + "text": "allez", + "start": 2.38, + "end": 2.58, + "confidence": 0.99 + }, + { + "text": "bien ?", + "start": 2.58, + "end": 3.46, + "confidence": 0.999 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.94, + "end": 35.86, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3065794431246244, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.40451279282569885, + "confidence": 0.933, + "words": [ + { + "text": "Bonjour !", + "start": 32.94, + "end": 34.44, + "confidence": 0.741 + }, + { + "text": "Est-ce", + "start": 34.44, + "end": 34.7, + "confidence": 0.92 + }, + { + "text": "que", + "start": 34.7, + "end": 34.76, + "confidence": 0.996 + }, + { + "text": "vous", + "start": 34.76, + "end": 34.9, + "confidence": 0.998 + }, + { + "text": "allez", + "start": 34.9, + "end": 35.1, + "confidence": 0.997 + }, + { + "text": "bien ?", + "start": 35.1, + "end": 35.86, + "confidence": 0.999 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/empty.mp3.words.json b/tests/expected/medium_auto/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2d7c98b0cfb1095180ba8e4f84476aa762ebf4ea --- /dev/null +++ b/tests/expected/medium_auto/empty.mp3.words.json @@ -0,0 +1,46 @@ +{ + "text": " Thanks for watching!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 2.52, + "text": " Thanks for watching!", + "tokens": [ + 50364, + 2561, + 337, + 1976, + 0, + 50518 + ], + "temperature": 0.0, + "avg_logprob": -0.8085990633283343, + "compression_ratio": 0.7142857142857143, + "no_speech_prob": 0.43768733739852905, + "confidence": 0.369, + "words": [ + { + "text": "Thanks", + "start": 0.0, + "end": 1.5, + "confidence": 0.056 + }, + { + "text": "for", + "start": 1.5, + "end": 2.46, + "confidence": 0.948 + }, + { + "text": "watching!", + "start": 2.46, + "end": 2.52, + "confidence": 0.948 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/gaenswein15.mp3.words.json b/tests/expected/medium_auto/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..95877a2b6496c641672bd0060809b02564e7bd4c --- /dev/null +++ b/tests/expected/medium_auto/gaenswein15.mp3.words.json @@ -0,0 +1,316 @@ +{ + "text": " Die Wiederzulassung des Messbuchs von 1962 als Missale für die außerordentliche Form des römischen Rethus ist dann nicht so weitergegangen, wie sich Papst Benediktas gewünscht hatte. Das hat er als Emeritor so gemacht.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.84, + "end": 8.56, + "text": " Die Wiederzulassung des Messbuchs von 1962 als Missale für die außerordentliche Form des römischen Rethus", + "tokens": [ + 50364, + 3229, + 45742, + 89, + 425, + 40828, + 730, + 9847, + 65, + 37503, + 2957, + 39498, + 3907, + 5275, + 1220, + 2959, + 978, + 39428, + 765, + 7698, + 68, + 10126, + 730, + 367, + 32374, + 6282, + 497, + 3293, + 301, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.2817046825702374, + "compression_ratio": 1.247191011235955, + "no_speech_prob": 0.2541781961917877, + "confidence": 0.869, + "words": [ + { + "text": "Die", + "start": 0.84, + "end": 1.12, + "confidence": 0.872 + }, + { + "text": "Wiederzulassung", + "start": 1.12, + "end": 1.92, + "confidence": 0.976 + }, + { + "text": "des", + "start": 1.92, + "end": 2.16, + "confidence": 0.986 + }, + { + "text": "Messbuchs", + "start": 2.16, + "end": 2.74, + "confidence": 0.947 + }, + { + "text": "von", + "start": 2.74, + "end": 3.28, + "confidence": 0.971 + }, + { + "text": "1962", + "start": 3.28, + "end": 4.9, + "confidence": 0.981 + }, + { + "text": "als", + "start": 4.9, + "end": 5.26, + "confidence": 0.925 + }, + { + "text": "Missale", + "start": 5.26, + "end": 5.76, + "confidence": 0.905 + }, + { + "text": "für", + "start": 5.76, + "end": 5.96, + "confidence": 0.955 + }, + { + "text": "die", + "start": 5.96, + "end": 6.12, + "confidence": 0.99 + }, + { + "text": "außerordentliche", + "start": 6.12, + "end": 7.06, + "confidence": 0.943 + }, + { + "text": "Form", + "start": 7.06, + "end": 7.32, + "confidence": 0.834 + }, + { + "text": "des", + "start": 7.32, + "end": 7.64, + "confidence": 0.863 + }, + { + "text": "römischen", + "start": 7.64, + "end": 8.04, + "confidence": 0.676 + }, + { + "text": "Rethus", + "start": 8.04, + "end": 8.56, + "confidence": 0.629 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 9.44, + "end": 12.74, + "text": " ist dann nicht so weitergegangen, wie sich Papst Benediktas gewünscht hatte.", + "tokens": [ + 50814, + 1418, + 3594, + 1979, + 370, + 8988, + 432, + 47152, + 11, + 3355, + 3041, + 15919, + 372, + 39753, + 9874, + 296, + 6906, + 3412, + 82, + 4701, + 13299, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.2817046825702374, + "compression_ratio": 1.247191011235955, + "no_speech_prob": 0.2541781961917877, + "confidence": 0.889, + "words": [ + { + "text": "ist", + "start": 9.44, + "end": 9.66, + "confidence": 0.975 + }, + { + "text": "dann", + "start": 9.66, + "end": 9.82, + "confidence": 0.688 + }, + { + "text": "nicht", + "start": 9.82, + "end": 10.0, + "confidence": 0.996 + }, + { + "text": "so", + "start": 10.0, + "end": 10.16, + "confidence": 0.992 + }, + { + "text": "weitergegangen,", + "start": 10.16, + "end": 10.84, + "confidence": 0.811 + }, + { + "text": "wie", + "start": 10.9, + "end": 11.06, + "confidence": 0.988 + }, + { + "text": "sich", + "start": 11.06, + "end": 11.26, + "confidence": 0.964 + }, + { + "text": "Papst", + "start": 11.26, + "end": 11.56, + "confidence": 0.884 + }, + { + "text": "Benediktas", + "start": 11.56, + "end": 12.08, + "confidence": 0.764 + }, + { + "text": "gewünscht", + "start": 12.08, + "end": 12.56, + "confidence": 0.992 + }, + { + "text": "hatte.", + "start": 12.56, + "end": 12.74, + "confidence": 0.952 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 14.0, + "end": 15.48, + "text": " Das hat er als Emeritor so gemacht.", + "tokens": [ + 51014, + 2846, + 2385, + 1189, + 3907, + 18477, + 3029, + 370, + 12293, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.2817046825702374, + "compression_ratio": 1.247191011235955, + "no_speech_prob": 0.2541781961917877, + "confidence": 0.442, + "words": [ + { + "text": "Das", + "start": 14.0, + "end": 14.2, + "confidence": 0.968 + }, + { + "text": "hat", + "start": 14.2, + "end": 14.36, + "confidence": 0.845 + }, + { + "text": "er", + "start": 14.36, + "end": 14.5, + "confidence": 0.99 + }, + { + "text": "als", + "start": 14.5, + "end": 14.7, + "confidence": 0.977 + }, + { + "text": "Emeritor", + "start": 14.7, + "end": 15.24, + "confidence": 0.429 + }, + { + "text": "so", + "start": 15.24, + "end": 15.42, + "confidence": 0.111 + }, + { + "text": "gemacht.", + "start": 15.42, + "end": 15.48, + "confidence": 0.09 + } + ] + } + ], + "language": "de" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/gloria.mp3.words.json b/tests/expected/medium_auto/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..bf8edd2aeb5c62e31d63133e4b035fb81e3ac8af --- /dev/null +++ b/tests/expected/medium_auto/gloria.mp3.words.json @@ -0,0 +1,538 @@ +{ + "text": " Hello. How are you? Love. How are you? I'm okay. I will be. I said she could stay with us tomorrow, she feels better. Of course she can. This won't be for long. Well, you can stay as long as you want, my love. I really miss you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.4, + "end": 1.74, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.285, + "words": [ + { + "text": "Hello.", + "start": 1.4, + "end": 1.74, + "confidence": 0.285 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 2.32, + "end": 3.5, + "text": " How are you?", + "tokens": [ + 50464, + 1012, + 366, + 291, + 30, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.786, + "words": [ + { + "text": "How", + "start": 2.32, + "end": 2.82, + "confidence": 0.551 + }, + { + "text": "are", + "start": 2.82, + "end": 3.24, + "confidence": 0.989 + }, + { + "text": "you?", + "start": 3.24, + "end": 3.5, + "confidence": 0.893 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.5, + "end": 4.26, + "text": " Love.", + "tokens": [ + 50564, + 5956, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.244, + "words": [ + { + "text": "Love.", + "start": 3.5, + "end": 4.26, + "confidence": 0.244 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 5.5, + "end": 6.38, + "text": " How are you?", + "tokens": [ + 50664, + 1012, + 366, + 291, + 30, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.926, + "words": [ + { + "text": "How", + "start": 5.5, + "end": 5.68, + "confidence": 0.851 + }, + { + "text": "are", + "start": 5.68, + "end": 6.08, + "confidence": 0.998 + }, + { + "text": "you?", + "start": 6.08, + "end": 6.38, + "confidence": 0.935 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 7.5, + "end": 9.18, + "text": " I'm okay. I will be.", + "tokens": [ + 50764, + 286, + 478, + 1392, + 13, + 286, + 486, + 312, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.769, + "words": [ + { + "text": "I'm", + "start": 7.5, + "end": 7.52, + "confidence": 0.767 + }, + { + "text": "okay.", + "start": 7.52, + "end": 8.22, + "confidence": 0.488 + }, + { + "text": "I", + "start": 8.42, + "end": 8.64, + "confidence": 0.765 + }, + { + "text": "will", + "start": 8.64, + "end": 8.94, + "confidence": 0.949 + }, + { + "text": "be.", + "start": 8.94, + "end": 9.18, + "confidence": 0.997 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.5, + "end": 11.52, + "text": " I said she could stay with us tomorrow, she feels better.", + "tokens": [ + 50864, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 4153, + 11, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.786, + "words": [ + { + "text": "I", + "start": 9.5, + "end": 9.52, + "confidence": 0.883 + }, + { + "text": "said", + "start": 9.52, + "end": 9.64, + "confidence": 0.856 + }, + { + "text": "she", + "start": 9.64, + "end": 9.78, + "confidence": 0.928 + }, + { + "text": "could", + "start": 9.78, + "end": 9.96, + "confidence": 0.909 + }, + { + "text": "stay", + "start": 9.96, + "end": 10.16, + "confidence": 0.994 + }, + { + "text": "with", + "start": 10.16, + "end": 10.28, + "confidence": 0.977 + }, + { + "text": "us", + "start": 10.28, + "end": 10.46, + "confidence": 0.993 + }, + { + "text": "tomorrow,", + "start": 10.46, + "end": 10.66, + "confidence": 0.514 + }, + { + "text": "she", + "start": 10.72, + "end": 10.86, + "confidence": 0.279 + }, + { + "text": "feels", + "start": 10.86, + "end": 11.16, + "confidence": 0.809 + }, + { + "text": "better.", + "start": 11.16, + "end": 11.52, + "confidence": 0.992 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 12.06, + "end": 13.36, + "text": " Of course she can.", + "tokens": [ + 50964, + 2720, + 1164, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.928, + "words": [ + { + "text": "Of", + "start": 12.06, + "end": 12.34, + "confidence": 0.788 + }, + { + "text": "course", + "start": 12.34, + "end": 12.56, + "confidence": 0.989 + }, + { + "text": "she", + "start": 12.56, + "end": 12.88, + "confidence": 0.958 + }, + { + "text": "can.", + "start": 12.88, + "end": 13.36, + "confidence": 0.993 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.5, + "end": 15.26, + "text": " This won't be for long.", + "tokens": [ + 51064, + 639, + 1582, + 380, + 312, + 337, + 938, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.892, + "words": [ + { + "text": "This", + "start": 13.5, + "end": 14.28, + "confidence": 0.543 + }, + { + "text": "won't", + "start": 14.28, + "end": 14.6, + "confidence": 0.975 + }, + { + "text": "be", + "start": 14.6, + "end": 14.8, + "confidence": 0.996 + }, + { + "text": "for", + "start": 14.8, + "end": 14.96, + "confidence": 0.984 + }, + { + "text": "long.", + "start": 14.96, + "end": 15.26, + "confidence": 0.996 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.5, + "end": 17.62, + "text": " Well, you can stay as long as you want, my love.", + "tokens": [ + 51164, + 1042, + 11, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 11, + 452, + 959, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.927, + "words": [ + { + "text": "Well,", + "start": 15.5, + "end": 15.56, + "confidence": 0.675 + }, + { + "text": "you", + "start": 15.68, + "end": 15.7, + "confidence": 0.744 + }, + { + "text": "can", + "start": 15.7, + "end": 15.94, + "confidence": 0.982 + }, + { + "text": "stay", + "start": 15.94, + "end": 16.14, + "confidence": 0.985 + }, + { + "text": "as", + "start": 16.14, + "end": 16.28, + "confidence": 0.972 + }, + { + "text": "long", + "start": 16.28, + "end": 16.4, + "confidence": 0.994 + }, + { + "text": "as", + "start": 16.4, + "end": 16.48, + "confidence": 0.996 + }, + { + "text": "you", + "start": 16.48, + "end": 16.62, + "confidence": 0.995 + }, + { + "text": "want,", + "start": 16.62, + "end": 16.8, + "confidence": 0.995 + }, + { + "text": "my", + "start": 16.88, + "end": 17.22, + "confidence": 0.949 + }, + { + "text": "love.", + "start": 17.22, + "end": 17.62, + "confidence": 0.993 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.76, + "end": 19.26, + "text": " I really miss you.", + "tokens": [ + 51264, + 286, + 534, + 1713, + 291, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.239252830954159, + "compression_ratio": 1.4615384615384615, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.792, + "words": [ + { + "text": "I", + "start": 17.76, + "end": 17.98, + "confidence": 0.587 + }, + { + "text": "really", + "start": 17.98, + "end": 18.36, + "confidence": 0.944 + }, + { + "text": "miss", + "start": 18.36, + "end": 18.78, + "confidence": 0.905 + }, + { + "text": "you.", + "start": 18.78, + "end": 19.26, + "confidence": 0.787 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/laugh1.mp3.words.json b/tests/expected/medium_auto/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7064d10309a4a287d76cdab7755484825e5c750e --- /dev/null +++ b/tests/expected/medium_auto/laugh1.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " hahahaha", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.32, + "end": 1.58, + "text": " hahahaha", + "tokens": [ + 50364, + 17206, + 15380, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.267470932006836, + "compression_ratio": 0.6666666666666666, + "no_speech_prob": 0.5899588465690613, + "confidence": 0.135, + "words": [ + { + "text": "hahahaha", + "start": 0.32, + "end": 1.58, + "confidence": 0.135 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/laugh2.mp3.words.json b/tests/expected/medium_auto/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e5f230cf5c013509e0720146e6a854c190f18660 --- /dev/null +++ b/tests/expected/medium_auto/laugh2.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " Hehehe", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.22, + "end": 0.64, + "text": " Hehehe", + "tokens": [ + 50364, + 634, + 23500, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.008833885192871, + "compression_ratio": 0.5, + "no_speech_prob": 0.3699853718280792, + "confidence": 0.334, + "words": [ + { + "text": "Hehehe", + "start": 0.22, + "end": 0.64, + "confidence": 0.334 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/punctuations.mp3.words.json b/tests/expected/medium_auto/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..440342c5261720c292eb5ec0f1827124decf0caf --- /dev/null +++ b/tests/expected/medium_auto/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26328243928797107, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03882359713315964, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi,", + "start": 0.38, + "end": 1.1, + "confidence": 0.809 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole ?", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/radio_short.mp3.words.json b/tests/expected/medium_auto/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..ba165ce1628c259ec549b21be93a25d0db83293a --- /dev/null +++ b/tests/expected/medium_auto/radio_short.mp3.words.json @@ -0,0 +1,1531 @@ +{ + "text": "3212122222222211111111111111111111111111111111111111111111111", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.04, + "end": 0.7, + "text": "3", + "tokens": [ + 50364, + 18, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.029, + "words": [ + { + "text": "3", + "start": 0.04, + "end": 0.7, + "confidence": 0.029 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.96, + "end": 3.02, + "text": "2", + "tokens": [ + 50464, + 17, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.691, + "words": [ + { + "text": "2", + "start": 1.96, + "end": 3.02, + "confidence": 0.691 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 3.72, + "end": 4.92, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.949, + "words": [ + { + "text": "1", + "start": 3.72, + "end": 4.92, + "confidence": 0.949 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 5.5, + "end": 7.08, + "text": "2", + "tokens": [ + 50664, + 17, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.278, + "words": [ + { + "text": "2", + "start": 5.5, + "end": 7.08, + "confidence": 0.278 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 8.46, + "end": 9.34, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.702, + "words": [ + { + "text": "1", + "start": 8.46, + "end": 9.34, + "confidence": 0.702 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.78, + "end": 11.34, + "text": "2", + "tokens": [ + 50864, + 17, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.919, + "words": [ + { + "text": "2", + "start": 9.78, + "end": 11.34, + "confidence": 0.919 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 11.8, + "end": 13.4, + "text": "2", + "tokens": [ + 50964, + 17, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.657, + "words": [ + { + "text": "2", + "start": 11.8, + "end": 13.4, + "confidence": 0.657 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.68, + "end": 15.54, + "text": "2", + "tokens": [ + 51064, + 17, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.773, + "words": [ + { + "text": "2", + "start": 13.68, + "end": 15.54, + "confidence": 0.773 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.54, + "end": 17.2, + "text": "2", + "tokens": [ + 51164, + 17, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.905, + "words": [ + { + "text": "2", + "start": 15.54, + "end": 17.2, + "confidence": 0.905 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 17.5, + "end": 19.24, + "text": "2", + "tokens": [ + 51264, + 17, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.809, + "words": [ + { + "text": "2", + "start": 17.5, + "end": 19.24, + "confidence": 0.809 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 19.64, + "end": 20.8, + "text": "2", + "tokens": [ + 51364, + 17, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.626, + "words": [ + { + "text": "2", + "start": 19.64, + "end": 20.8, + "confidence": 0.626 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 22.06, + "end": 22.9, + "text": "2", + "tokens": [ + 51464, + 17, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.834, + "words": [ + { + "text": "2", + "start": 22.06, + "end": 22.9, + "confidence": 0.834 + } + ] + }, + { + "id": 12, + "seek": 0, + "start": 23.56, + "end": 25.08, + "text": "2", + "tokens": [ + 51564, + 17, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.974, + "words": [ + { + "text": "2", + "start": 23.56, + "end": 25.08, + "confidence": 0.974 + } + ] + }, + { + "id": 13, + "seek": 0, + "start": 26.48, + "end": 27.08, + "text": "2", + "tokens": [ + 51664, + 17, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.28562339869412506, + "compression_ratio": 1.0, + "no_speech_prob": 0.8409799933433533, + "confidence": 0.971, + "words": [ + { + "text": "2", + "start": 26.48, + "end": 27.08, + "confidence": 0.971 + } + ] + }, + { + "id": 14, + "seek": 2800, + "start": 28.12, + "end": 29.62, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.316, + "words": [ + { + "text": "1", + "start": 28.12, + "end": 29.62, + "confidence": 0.316 + } + ] + }, + { + "id": 15, + "seek": 2800, + "start": 30.84, + "end": 31.14, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.545, + "words": [ + { + "text": "1", + "start": 30.84, + "end": 31.14, + "confidence": 0.545 + } + ] + }, + { + "id": 16, + "seek": 2800, + "start": 32.06, + "end": 33.18, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.913, + "words": [ + { + "text": "1", + "start": 32.06, + "end": 33.18, + "confidence": 0.913 + } + ] + }, + { + "id": 17, + "seek": 2800, + "start": 33.5, + "end": 35.1, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.887, + "words": [ + { + "text": "1", + "start": 33.5, + "end": 35.1, + "confidence": 0.887 + } + ] + }, + { + "id": 18, + "seek": 2800, + "start": 36.18, + "end": 36.64, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 36.18, + "end": 36.64, + "confidence": 0.957 + } + ] + }, + { + "id": 19, + "seek": 2800, + "start": 37.5, + "end": 38.46, + "text": "1", + "tokens": [ + 50864, + 16, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.958, + "words": [ + { + "text": "1", + "start": 37.5, + "end": 38.46, + "confidence": 0.958 + } + ] + }, + { + "id": 20, + "seek": 2800, + "start": 39.5, + "end": 41.14, + "text": "1", + "tokens": [ + 50964, + 16, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.966, + "words": [ + { + "text": "1", + "start": 39.5, + "end": 41.14, + "confidence": 0.966 + } + ] + }, + { + "id": 21, + "seek": 2800, + "start": 41.5, + "end": 43.38, + "text": "1", + "tokens": [ + 51064, + 16, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.973, + "words": [ + { + "text": "1", + "start": 41.5, + "end": 43.38, + "confidence": 0.973 + } + ] + }, + { + "id": 22, + "seek": 2800, + "start": 43.88, + "end": 44.68, + "text": "1", + "tokens": [ + 51164, + 16, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.946, + "words": [ + { + "text": "1", + "start": 43.88, + "end": 44.68, + "confidence": 0.946 + } + ] + }, + { + "id": 23, + "seek": 2800, + "start": 45.5, + "end": 47.16, + "text": "1", + "tokens": [ + 51264, + 16, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.96, + "words": [ + { + "text": "1", + "start": 45.5, + "end": 47.16, + "confidence": 0.96 + } + ] + }, + { + "id": 24, + "seek": 2800, + "start": 47.94, + "end": 48.8, + "text": "1", + "tokens": [ + 51364, + 16, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.965, + "words": [ + { + "text": "1", + "start": 47.94, + "end": 48.8, + "confidence": 0.965 + } + ] + }, + { + "id": 25, + "seek": 2800, + "start": 50.22, + "end": 50.96, + "text": "1", + "tokens": [ + 51464, + 16, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 50.22, + "end": 50.96, + "confidence": 0.967 + } + ] + }, + { + "id": 26, + "seek": 2800, + "start": 52.22, + "end": 52.84, + "text": "1", + "tokens": [ + 51564, + 16, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.963, + "words": [ + { + "text": "1", + "start": 52.22, + "end": 52.84, + "confidence": 0.963 + } + ] + }, + { + "id": 27, + "seek": 2800, + "start": 53.5, + "end": 54.78, + "text": "1", + "tokens": [ + 51664, + 16, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.11717794158241966, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.18292438983917236, + "confidence": 0.945, + "words": [ + { + "text": "1", + "start": 53.5, + "end": 54.78, + "confidence": 0.945 + } + ] + }, + { + "id": 28, + "seek": 5600, + "start": 56.56, + "end": 57.24, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.425, + "words": [ + { + "text": "1", + "start": 56.56, + "end": 57.24, + "confidence": 0.425 + } + ] + }, + { + "id": 29, + "seek": 5600, + "start": 57.64, + "end": 58.86, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.789, + "words": [ + { + "text": "1", + "start": 57.64, + "end": 58.86, + "confidence": 0.789 + } + ] + }, + { + "id": 30, + "seek": 5600, + "start": 60.08, + "end": 60.92, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.953, + "words": [ + { + "text": "1", + "start": 60.08, + "end": 60.92, + "confidence": 0.953 + } + ] + }, + { + "id": 31, + "seek": 5600, + "start": 61.5, + "end": 62.62, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.949, + "words": [ + { + "text": "1", + "start": 61.5, + "end": 62.62, + "confidence": 0.949 + } + ] + }, + { + "id": 32, + "seek": 5600, + "start": 64.36, + "end": 65.06, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 64.36, + "end": 65.06, + "confidence": 0.954 + } + ] + }, + { + "id": 33, + "seek": 5600, + "start": 65.5, + "end": 66.92, + "text": "1", + "tokens": [ + 50864, + 16, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.952, + "words": [ + { + "text": "1", + "start": 65.5, + "end": 66.92, + "confidence": 0.952 + } + ] + }, + { + "id": 34, + "seek": 5600, + "start": 67.96, + "end": 69.06, + "text": "1", + "tokens": [ + 50964, + 16, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.965, + "words": [ + { + "text": "1", + "start": 67.96, + "end": 69.06, + "confidence": 0.965 + } + ] + }, + { + "id": 35, + "seek": 5600, + "start": 70.38, + "end": 71.69, + "text": "1", + "tokens": [ + 51064, + 16, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.956, + "words": [ + { + "text": "1", + "start": 70.38, + "end": 71.69, + "confidence": 0.956 + } + ] + }, + { + "id": 36, + "seek": 5600, + "start": 71.69, + "end": 73.56, + "text": "1", + "tokens": [ + 51164, + 16, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.957, + "words": [ + { + "text": "1", + "start": 71.69, + "end": 73.56, + "confidence": 0.957 + } + ] + }, + { + "id": 37, + "seek": 5600, + "start": 73.56, + "end": 75.34, + "text": "1", + "tokens": [ + 51264, + 16, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.955, + "words": [ + { + "text": "1", + "start": 73.56, + "end": 75.34, + "confidence": 0.955 + } + ] + }, + { + "id": 38, + "seek": 5600, + "start": 75.5, + "end": 76.42, + "text": "1", + "tokens": [ + 51364, + 16, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.939, + "words": [ + { + "text": "1", + "start": 75.5, + "end": 76.42, + "confidence": 0.939 + } + ] + }, + { + "id": 39, + "seek": 5600, + "start": 78.14, + "end": 78.78, + "text": "1", + "tokens": [ + 51464, + 16, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.936, + "words": [ + { + "text": "1", + "start": 78.14, + "end": 78.78, + "confidence": 0.936 + } + ] + }, + { + "id": 40, + "seek": 5600, + "start": 80.08, + "end": 80.86, + "text": "1", + "tokens": [ + 51564, + 16, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.922, + "words": [ + { + "text": "1", + "start": 80.08, + "end": 80.86, + "confidence": 0.922 + } + ] + }, + { + "id": 41, + "seek": 5600, + "start": 81.5, + "end": 82.84, + "text": "1", + "tokens": [ + 51664, + 16, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.12248417464169589, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1875864416360855, + "confidence": 0.896, + "words": [ + { + "text": "1", + "start": 81.5, + "end": 82.84, + "confidence": 0.896 + } + ] + }, + { + "id": 42, + "seek": 8400, + "start": 84.82, + "end": 85.6, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.793, + "words": [ + { + "text": "1", + "start": 84.82, + "end": 85.6, + "confidence": 0.793 + } + ] + }, + { + "id": 43, + "seek": 8400, + "start": 85.6, + "end": 86.8, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.88, + "words": [ + { + "text": "1", + "start": 85.6, + "end": 86.8, + "confidence": 0.88 + } + ] + }, + { + "id": 44, + "seek": 8400, + "start": 87.76, + "end": 88.72, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.954, + "words": [ + { + "text": "1", + "start": 87.76, + "end": 88.72, + "confidence": 0.954 + } + ] + }, + { + "id": 45, + "seek": 8400, + "start": 89.5, + "end": 90.52, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.951, + "words": [ + { + "text": "1", + "start": 89.5, + "end": 90.52, + "confidence": 0.951 + } + ] + }, + { + "id": 46, + "seek": 8400, + "start": 91.5, + "end": 93.02, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.91, + "words": [ + { + "text": "1", + "start": 91.5, + "end": 93.02, + "confidence": 0.91 + } + ] + }, + { + "id": 47, + "seek": 8400, + "start": 93.68, + "end": 95.36, + "text": "1", + "tokens": [ + 50864, + 16, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.944, + "words": [ + { + "text": "1", + "start": 93.68, + "end": 95.36, + "confidence": 0.944 + } + ] + }, + { + "id": 48, + "seek": 8400, + "start": 95.86, + "end": 97.84, + "text": "1", + "tokens": [ + 50964, + 16, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.967, + "words": [ + { + "text": "1", + "start": 95.86, + "end": 97.84, + "confidence": 0.967 + } + ] + }, + { + "id": 49, + "seek": 8400, + "start": 97.84, + "end": 98.98, + "text": "1", + "tokens": [ + 51064, + 16, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.976, + "words": [ + { + "text": "1", + "start": 97.84, + "end": 98.98, + "confidence": 0.976 + } + ] + }, + { + "id": 50, + "seek": 8400, + "start": 99.5, + "end": 101.1, + "text": "1", + "tokens": [ + 51164, + 16, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.981, + "words": [ + { + "text": "1", + "start": 99.5, + "end": 101.1, + "confidence": 0.981 + } + ] + }, + { + "id": 51, + "seek": 8400, + "start": 101.5, + "end": 103.48, + "text": "1", + "tokens": [ + 51264, + 16, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.985, + "words": [ + { + "text": "1", + "start": 101.5, + "end": 103.48, + "confidence": 0.985 + } + ] + }, + { + "id": 52, + "seek": 8400, + "start": 103.5, + "end": 105.44, + "text": "1", + "tokens": [ + 51364, + 16, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.987, + "words": [ + { + "text": "1", + "start": 103.5, + "end": 105.44, + "confidence": 0.987 + } + ] + }, + { + "id": 53, + "seek": 8400, + "start": 105.58, + "end": 107.08, + "text": "1", + "tokens": [ + 51464, + 16, + 51564 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.956, + "words": [ + { + "text": "1", + "start": 105.58, + "end": 107.08, + "confidence": 0.956 + } + ] + }, + { + "id": 54, + "seek": 8400, + "start": 108.32, + "end": 108.98, + "text": "1", + "tokens": [ + 51564, + 16, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.946, + "words": [ + { + "text": "1", + "start": 108.32, + "end": 108.98, + "confidence": 0.946 + } + ] + }, + { + "id": 55, + "seek": 8400, + "start": 109.5, + "end": 110.4, + "text": "1", + "tokens": [ + 51664, + 16, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.10892029242082076, + "compression_ratio": 1.2727272727272727, + "no_speech_prob": 0.1820167452096939, + "confidence": 0.888, + "words": [ + { + "text": "1", + "start": 109.5, + "end": 110.4, + "confidence": 0.888 + } + ] + }, + { + "id": 56, + "seek": 11200, + "start": 112.66, + "end": 113.32, + "text": "1", + "tokens": [ + 50364, + 16, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.13131670653820038, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.27970418334007263, + "confidence": 0.917, + "words": [ + { + "text": "1", + "start": 112.66, + "end": 113.32, + "confidence": 0.917 + } + ] + }, + { + "id": 57, + "seek": 11200, + "start": 113.98, + "end": 114.82, + "text": "1", + "tokens": [ + 50464, + 16, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.13131670653820038, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.27970418334007263, + "confidence": 0.888, + "words": [ + { + "text": "1", + "start": 113.98, + "end": 114.82, + "confidence": 0.888 + } + ] + }, + { + "id": 58, + "seek": 11200, + "start": 116.1, + "end": 117.3, + "text": "1", + "tokens": [ + 50564, + 16, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.13131670653820038, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.27970418334007263, + "confidence": 0.979, + "words": [ + { + "text": "1", + "start": 116.1, + "end": 117.3, + "confidence": 0.979 + } + ] + }, + { + "id": 59, + "seek": 11200, + "start": 117.5, + "end": 118.68, + "text": "1", + "tokens": [ + 50664, + 16, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.13131670653820038, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.27970418334007263, + "confidence": 0.981, + "words": [ + { + "text": "1", + "start": 117.5, + "end": 118.68, + "confidence": 0.981 + } + ] + }, + { + "id": 60, + "seek": 11200, + "start": 119.5, + "end": 120.86, + "text": "1", + "tokens": [ + 50764, + 16, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.13131670653820038, + "compression_ratio": 0.45454545454545453, + "no_speech_prob": 0.27970418334007263, + "confidence": 0.973, + "words": [ + { + "text": "1", + "start": 119.5, + "end": 120.86, + "confidence": 0.973 + } + ] + } + ], + "language": "zh" +} \ No newline at end of file diff --git a/tests/expected/medium_auto/smartphone.mp3.words.json b/tests/expected/medium_auto/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..487ef4822e529a0eedef411d70eb22aba4b88bab --- /dev/null +++ b/tests/expected/medium_auto/smartphone.mp3.words.json @@ -0,0 +1,4899 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc à objet inédit, rapport inédit. Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit, d'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.4, + "end": 3.66, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.4, + "end": 0.64, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.64, + "end": 0.9, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.9, + "end": 1.0, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.0, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.48, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.7, + "end": 2.04, + "confidence": 0.979 + }, + { + "text": "je", + "start": 2.04, + "end": 2.26, + "confidence": 0.981 + }, + { + "text": "ne", + "start": 2.26, + "end": 2.34, + "confidence": 0.837 + }, + { + "text": "me", + "start": 2.34, + "end": 2.36, + "confidence": 0.821 + }, + { + "text": "l'étais", + "start": 2.36, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.88, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.88, + "end": 3.26, + "confidence": 0.908 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.44, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.44, + "end": 3.66, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.16, + "end": 7.94, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 50549, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11, + 50756 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.16, + "end": 4.28, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.28, + "end": 4.36, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.36, + "end": 4.5, + "confidence": 0.567 + }, + { + "text": "la", + "start": 4.5, + "end": 4.76, + "confidence": 0.972 + }, + { + "text": "force", + "start": 4.76, + "end": 5.02, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.02, + "end": 5.22, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.22, + "end": 5.7, + "confidence": 0.912 + }, + { + "text": "c'est", + "start": 6.04, + "end": 6.16, + "confidence": 0.879 + }, + { + "text": "pas", + "start": 6.16, + "end": 6.26, + "confidence": 0.991 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.54, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.54, + "end": 7.4, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.4, + "end": 7.58, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.58, + "end": 7.94, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 50756, + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.48, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.48, + "end": 8.7, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.7, + "end": 8.94, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.94, + "end": 9.08, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.08, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.38, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.38, + "end": 10.7, + "confidence": 0.955 + }, + { + "text": "elles.", + "start": 10.7, + "end": 10.88, + "confidence": 0.989 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 50906, + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.608 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.26, + "confidence": 0.769 + }, + { + "text": "dit", + "start": 11.26, + "end": 11.38, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.38, + "end": 11.58, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.58, + "end": 11.72, + "confidence": 0.48 + }, + { + "text": "la", + "start": 11.72, + "end": 11.82, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.82, + "end": 12.0, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.14, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.48, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.48, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.38, + "end": 16.04, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13, + 51166 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.912, + "words": [ + { + "text": "Alors", + "start": 13.38, + "end": 13.58, + "confidence": 0.584 + }, + { + "text": "évidemment,", + "start": 13.58, + "end": 13.86, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.26, + "end": 14.42, + "confidence": 0.953 + }, + { + "text": "faudrait", + "start": 14.42, + "end": 14.76, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.2, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.2, + "end": 15.6, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.6, + "end": 16.04, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 51166, + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13, + 51331 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.78, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.78, + "end": 17.1, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.1, + "end": 17.3, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.3, + "end": 17.84, + "confidence": 0.974 + }, + { + "text": "beaucoup", + "start": 17.84, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 25.44, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 51331, + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13, + 51631 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.943 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.5, + "confidence": 0.666 + }, + { + "text": "il", + "start": 20.52, + "end": 20.62, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.62, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.82, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.82, + "end": 21.04, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 21.04, + "end": 21.3, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.3, + "end": 21.7, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.7, + "end": 21.94, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.94, + "end": 22.16, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.16, + "end": 22.3, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.3, + "end": 22.54, + "confidence": 0.723 + }, + { + "text": "lesquels", + "start": 22.54, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.14, + "confidence": 0.71 + }, + { + "text": "ordinateurs", + "start": 23.14, + "end": 23.58, + "confidence": 0.966 + }, + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.88, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.88, + "end": 24.12, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.828 + }, + { + "text": "cliquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.72, + "end": 24.92, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.92, + "end": 25.44, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.54, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.968, + "words": [ + { + "text": "Sauf", + "start": 25.54, + "end": 25.82, + "confidence": 0.974 + }, + { + "text": "que", + "start": 25.82, + "end": 26.3, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.3, + "end": 26.66, + "confidence": 0.671 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.08, + "confidence": 0.995 + }, + { + "text": "ajoute", + "start": 27.08, + "end": 27.48, + "confidence": 0.99 + }, + { + "text": "le", + "start": 27.48, + "end": 27.66, + "confidence": 0.989 + }, + { + "text": "toucher,", + "start": 27.66, + "end": 28.06, + "confidence": 0.987 + }, + { + "text": "ce", + "start": 28.14, + "end": 28.16, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 28.16, + "end": 28.3, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.3, + "end": 28.54, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.54, + "end": 28.8, + "confidence": 0.992 + }, + { + "text": "contact", + "start": 28.8, + "end": 29.16, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.16, + "end": 29.58, + "confidence": 0.983 + }, + { + "text": "direct,", + "start": 29.58, + "end": 30.0, + "confidence": 0.991 + }, + { + "text": "plus", + "start": 30.1, + "end": 30.24, + "confidence": 0.99 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 50639, + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.893, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.964 + }, + { + "text": "puis", + "start": 31.24, + "end": 31.34, + "confidence": 0.967 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.62, + "confidence": 0.87 + }, + { + "text": "il", + "start": 31.66, + "end": 31.72, + "confidence": 0.99 + }, + { + "text": "faudrait", + "start": 31.72, + "end": 31.94, + "confidence": 0.996 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.16, + "confidence": 0.864 + }, + { + "text": "aussi", + "start": 32.16, + "end": 32.34, + "confidence": 0.971 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.993 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.92, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.92, + "end": 33.22, + "confidence": 0.564 + }, + { + "text": "permettent", + "start": 33.22, + "end": 33.76, + "confidence": 0.99 + }, + { + "text": "de", + "start": 33.76, + "end": 33.98, + "confidence": 0.871 + }, + { + "text": "contourner", + "start": 33.98, + "end": 34.42, + "confidence": 0.953 + }, + { + "text": "le", + "start": 34.42, + "end": 34.54, + "confidence": 0.75 + }, + { + "text": "côté", + "start": 34.54, + "end": 34.78, + "confidence": 0.985 + }, + { + "text": "touffu", + "start": 34.78, + "end": 35.32, + "confidence": 0.726 + }, + { + "text": "de", + "start": 35.32, + "end": 35.7, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.7, + "end": 35.82, + "confidence": 0.991 + }, + { + "text": "navigation", + "start": 35.82, + "end": 36.3, + "confidence": 0.992 + }, + { + "text": "web", + "start": 36.3, + "end": 36.58, + "confidence": 0.847 + }, + { + "text": "pour", + "start": 36.58, + "end": 36.76, + "confidence": 0.59 + }, + { + "text": "aller", + "start": 36.76, + "end": 37.16, + "confidence": 0.981 + }, + { + "text": "directement", + "start": 37.16, + "end": 37.54, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.54, + "end": 37.7, + "confidence": 0.969 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.9, + "end": 46.6, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 50989, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51439 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.973, + "words": [ + { + "text": "Bref,", + "start": 37.9, + "end": 38.24, + "confidence": 0.984 + }, + { + "text": "tout", + "start": 38.88, + "end": 39.02, + "confidence": 0.7 + }, + { + "text": "ça,", + "start": 39.02, + "end": 39.4, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.4, + "end": 39.72, + "confidence": 0.989 + }, + { + "text": "sont", + "start": 39.72, + "end": 39.92, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.92, + "end": 40.22, + "confidence": 0.992 + }, + { + "text": "conditions", + "start": 40.22, + "end": 40.7, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.7, + "end": 41.02, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 41.02, + "end": 41.44, + "confidence": 0.996 + }, + { + "text": "de", + "start": 41.44, + "end": 41.74, + "confidence": 0.997 + }, + { + "text": "créer", + "start": 41.74, + "end": 42.1, + "confidence": 0.999 + }, + { + "text": "cet", + "start": 42.1, + "end": 42.38, + "confidence": 0.997 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.64, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.64, + "end": 42.84, + "confidence": 0.789 + }, + { + "text": "Nicolas", + "start": 42.84, + "end": 43.24, + "confidence": 0.988 + }, + { + "text": "dit", + "start": 43.24, + "end": 43.52, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.74, + "confidence": 0.982 + }, + { + "text": "est", + "start": 43.74, + "end": 43.94, + "confidence": 0.99 + }, + { + "text": "vraisemblablement", + "start": 43.94, + "end": 44.86, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.44, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 45.44, + "end": 45.72, + "confidence": 0.967 + }, + { + "text": "l'histoire", + "start": 45.72, + "end": 46.02, + "confidence": 0.958 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.6, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 47.02, + "end": 48.78, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 51439, + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 47.02, + "end": 47.28, + "confidence": 0.84 + }, + { + "text": "ça,", + "start": 47.28, + "end": 47.48, + "confidence": 0.942 + }, + { + "text": "ça", + "start": 47.62, + "end": 47.64, + "confidence": 0.976 + }, + { + "text": "soulève", + "start": 47.64, + "end": 47.86, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.86, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.2, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.2, + "end": 48.78, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.36, + "end": 55.5, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ?", + "tokens": [ + 50389, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506, + 50689 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.986, + "words": [ + { + "text": "Est-ce", + "start": 49.36, + "end": 49.64, + "confidence": 0.978 + }, + { + "text": "que", + "start": 49.64, + "end": 49.76, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.76, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.0, + "confidence": 0.999 + }, + { + "text": "que", + "start": 50.0, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.99 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.78, + "confidence": 0.995 + }, + { + "text": "induit", + "start": 51.78, + "end": 52.32, + "confidence": 0.955 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.984 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.78, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 52.78, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.46, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.46, + "end": 53.68, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.68, + "end": 54.14, + "confidence": 0.907 + }, + { + "text": "aussi", + "start": 54.14, + "end": 54.52, + "confidence": 0.996 + }, + { + "text": "un", + "start": 54.52, + "end": 54.72, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 54.72, + "end": 55.0, + "confidence": 0.997 + }, + { + "text": "inédit ?", + "start": 55.0, + "end": 55.5, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.76, + "end": 63.4, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50689, + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.96, + "words": [ + { + "text": "Je", + "start": 55.76, + "end": 55.9, + "confidence": 0.904 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.989 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.12, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.22, + "end": 56.38, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.38, + "end": 56.5, + "confidence": 0.991 + }, + { + "text": "le", + "start": 56.5, + "end": 56.66, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 56.66, + "end": 56.86, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.86, + "end": 57.08, + "confidence": 0.985 + }, + { + "text": "a", + "start": 57.08, + "end": 57.16, + "confidence": 0.99 + }, + { + "text": "au", + "start": 57.16, + "end": 57.28, + "confidence": 0.966 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.995 + }, + { + "text": "est", + "start": 57.6, + "end": 57.86, + "confidence": 0.91 + }, + { + "text": "comparable", + "start": 57.86, + "end": 58.3, + "confidence": 0.996 + }, + { + "text": "à", + "start": 58.3, + "end": 58.5, + "confidence": 0.946 + }, + { + "text": "celui", + "start": 58.5, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.96, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.96, + "end": 59.36, + "confidence": 0.926 + }, + { + "text": "à", + "start": 59.36, + "end": 59.48, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.48, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.98, + "confidence": 0.99 + }, + { + "text": "techniques", + "start": 59.98, + "end": 60.44, + "confidence": 0.984 + }, + { + "text": "comme", + "start": 60.44, + "end": 60.96, + "confidence": 0.496 + }, + { + "text": "la", + "start": 60.96, + "end": 61.52, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.52, + "end": 62.08, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.08, + "end": 62.52, + "confidence": 0.95 + }, + { + "text": "le", + "start": 62.52, + "end": 62.74, + "confidence": 0.998 + }, + { + "text": "téléphone ?", + "start": 62.74, + "end": 63.4, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 65.36, + "end": 66.62, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 51189, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13, + 51239 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.94, + "words": [ + { + "text": "Il", + "start": 65.36, + "end": 65.5, + "confidence": 0.849 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.979 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.3, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.3, + "end": 66.44, + "confidence": 0.582 + }, + { + "text": "fait.", + "start": 66.44, + "end": 66.62, + "confidence": 0.997 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.24, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 51239, + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.957, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 67.02, + "confidence": 0.654 + }, + { + "text": "donc", + "start": 67.02, + "end": 67.12, + "confidence": 0.901 + }, + { + "text": "cette", + "start": 67.12, + "end": 67.3, + "confidence": 0.713 + }, + { + "text": "espèce", + "start": 67.3, + "end": 67.54, + "confidence": 0.996 + }, + { + "text": "de", + "start": 67.54, + "end": 67.76, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.76, + "end": 68.48, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.84, + "confidence": 0.982 + }, + { + "text": "la", + "start": 68.84, + "end": 68.96, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.24, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.0, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 70.28, + "end": 70.38, + "confidence": 0.977 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.68, + "confidence": 0.98 + }, + { + "text": "et", + "start": 70.68, + "end": 70.76, + "confidence": 0.973 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.24, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 7134, + "start": 71.62, + "end": 76.42, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet,", + "tokens": [ + 50389, + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 394, + 368, + 8603, + 14964, + 11, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.862, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.9, + "confidence": 0.843 + }, + { + "text": "qu'on", + "start": 71.9, + "end": 72.4, + "confidence": 0.968 + }, + { + "text": "a", + "start": 72.4, + "end": 72.54, + "confidence": 0.979 + }, + { + "text": "l'impression,", + "start": 72.54, + "end": 72.9, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 73.66, + "end": 73.9, + "confidence": 0.982 + }, + { + "text": "le", + "start": 73.9, + "end": 74.02, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.02, + "end": 74.22, + "confidence": 0.997 + }, + { + "text": "les", + "start": 74.22, + "end": 74.52, + "confidence": 0.996 + }, + { + "text": "utilisateurs", + "start": 74.52, + "end": 74.84, + "confidence": 0.998 + }, + { + "text": "et", + "start": 74.84, + "end": 74.94, + "confidence": 0.845 + }, + { + "text": "les", + "start": 74.94, + "end": 75.02, + "confidence": 0.783 + }, + { + "text": "services,", + "start": 75.02, + "end": 75.18, + "confidence": 0.129 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.5, + "confidence": 0.935 + }, + { + "text": "dépendant", + "start": 75.5, + "end": 75.98, + "confidence": 0.709 + }, + { + "text": "de", + "start": 75.98, + "end": 76.08, + "confidence": 0.977 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.998 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.42, + "confidence": 0.997 + } + ] + }, + { + "id": 16, + "seek": 7134, + "start": 76.42, + "end": 83.3, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 50639, + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.923, + "words": [ + { + "text": "d'induire", + "start": 76.42, + "end": 77.1, + "confidence": 0.964 + }, + { + "text": "en", + "start": 77.1, + "end": 77.18, + "confidence": 0.72 + }, + { + "text": "fait", + "start": 77.18, + "end": 77.34, + "confidence": 0.997 + }, + { + "text": "une", + "start": 77.34, + "end": 77.5, + "confidence": 0.979 + }, + { + "text": "espèce", + "start": 77.5, + "end": 77.88, + "confidence": 0.997 + }, + { + "text": "de", + "start": 77.88, + "end": 78.28, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.28, + "end": 78.54, + "confidence": 0.422 + }, + { + "text": "de", + "start": 78.66, + "end": 78.96, + "confidence": 0.992 + }, + { + "text": "médiation", + "start": 78.96, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.76, + "confidence": 0.954 + }, + { + "text": "le", + "start": 79.76, + "end": 80.02, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 80.02, + "end": 80.3, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 80.3, + "end": 81.24, + "confidence": 0.661 + }, + { + "text": "rend", + "start": 81.24, + "end": 81.66, + "confidence": 0.968 + }, + { + "text": "de", + "start": 81.66, + "end": 81.74, + "confidence": 0.688 + }, + { + "text": "l'ampleur", + "start": 81.74, + "end": 82.02, + "confidence": 0.995 + }, + { + "text": "et", + "start": 82.02, + "end": 82.1, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 82.1, + "end": 82.2, + "confidence": 0.987 + }, + { + "text": "amène", + "start": 82.2, + "end": 82.38, + "confidence": 0.971 + }, + { + "text": "aussi", + "start": 82.38, + "end": 82.54, + "confidence": 0.946 + }, + { + "text": "à", + "start": 82.54, + "end": 82.62, + "confidence": 0.959 + }, + { + "text": "des", + "start": 82.62, + "end": 82.7, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.7, + "end": 82.88, + "confidence": 0.995 + }, + { + "text": "de", + "start": 82.88, + "end": 83.04, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.04, + "end": 83.3, + "confidence": 0.802 + } + ] + }, + { + "id": 17, + "seek": 7134, + "start": 83.92, + "end": 87.7, + "text": " Donc à objet inédit, rapport inédit.", + "tokens": [ + 50989, + 7477, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13, + 51189 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.901, + "words": [ + { + "text": "Donc", + "start": 83.92, + "end": 84.46, + "confidence": 0.984 + }, + { + "text": "à", + "start": 84.46, + "end": 84.98, + "confidence": 0.483 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.46, + "confidence": 0.77 + }, + { + "text": "inédit,", + "start": 85.46, + "end": 86.2, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 86.54, + "end": 86.9, + "confidence": 0.986 + }, + { + "text": "inédit.", + "start": 86.9, + "end": 87.7, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7134, + "start": 88.02, + "end": 94.92, + "text": " Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 51189, + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 1531, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.975, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.62, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 88.62, + "end": 88.9, + "confidence": 0.985 + }, + { + "text": "rapport,", + "start": 88.9, + "end": 89.32, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.78, + "confidence": 0.997 + }, + { + "text": "crois", + "start": 89.78, + "end": 89.88, + "confidence": 0.988 + }, + { + "text": "à", + "start": 89.88, + "end": 90.06, + "confidence": 0.724 + }, + { + "text": "Nicolas,", + "start": 90.06, + "end": 90.24, + "confidence": 0.997 + }, + { + "text": "serait", + "start": 90.7, + "end": 91.0, + "confidence": 0.904 + }, + { + "text": "caractérisé", + "start": 91.0, + "end": 91.8, + "confidence": 0.994 + }, + { + "text": "par", + "start": 91.8, + "end": 92.22, + "confidence": 0.995 + }, + { + "text": "un", + "start": 92.22, + "end": 92.52, + "confidence": 0.996 + }, + { + "text": "mélange", + "start": 92.52, + "end": 93.04, + "confidence": 0.999 + }, + { + "text": "de", + "start": 93.04, + "end": 93.46, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.46, + "end": 94.12, + "confidence": 0.938 + }, + { + "text": "et", + "start": 94.12, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.72, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.72, + "end": 94.92, + "confidence": 0.988 + } + ] + }, + { + "id": 19, + "seek": 9484, + "start": 95.74, + "end": 102.82, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 50389, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.932, + "words": [ + { + "text": "Bon,", + "start": 95.74, + "end": 96.0, + "confidence": 0.751 + }, + { + "text": "en", + "start": 96.34, + "end": 96.54, + "confidence": 0.992 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.86, + "confidence": 0.991 + }, + { + "text": "il", + "start": 97.06, + "end": 97.18, + "confidence": 0.992 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.996 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.06, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.06, + "end": 98.58, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.58, + "end": 98.84, + "confidence": 0.757 + }, + { + "text": "finement", + "start": 98.84, + "end": 99.38, + "confidence": 0.823 + }, + { + "text": "toute", + "start": 99.38, + "end": 99.7, + "confidence": 0.937 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.08, + "confidence": 0.996 + }, + { + "text": "des", + "start": 100.08, + "end": 100.26, + "confidence": 0.991 + }, + { + "text": "objets", + "start": 100.26, + "end": 100.52, + "confidence": 0.998 + }, + { + "text": "techniques", + "start": 100.52, + "end": 100.94, + "confidence": 0.982 + }, + { + "text": "et", + "start": 100.94, + "end": 101.54, + "confidence": 0.512 + }, + { + "text": "de", + "start": 101.54, + "end": 101.7, + "confidence": 0.984 + }, + { + "text": "leur", + "start": 101.7, + "end": 101.84, + "confidence": 0.818 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.34, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.969 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.82, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 9484, + "start": 102.86, + "end": 105.72, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 50739, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13, + 50889 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.995, + "words": [ + { + "text": "pour", + "start": 102.86, + "end": 103.1, + "confidence": 0.989 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.995 + }, + { + "text": "si", + "start": 103.64, + "end": 103.76, + "confidence": 0.986 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.26, + "end": 104.84, + "confidence": 0.997 + }, + { + "text": "totalement", + "start": 104.84, + "end": 105.3, + "confidence": 0.998 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.72, + "confidence": 0.998 + } + ] + }, + { + "id": 21, + "seek": 9484, + "start": 106.14, + "end": 109.32, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 50889, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.932, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.4, + "confidence": 0.975 + }, + { + "text": "j'ai", + "start": 106.4, + "end": 106.92, + "confidence": 0.956 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.38, + "confidence": 0.999 + }, + { + "text": "comme", + "start": 107.38, + "end": 107.58, + "confidence": 0.513 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.82, + "confidence": 0.969 + }, + { + "text": "que", + "start": 107.82, + "end": 108.14, + "confidence": 0.973 + }, + { + "text": "Nicolas", + "start": 108.14, + "end": 108.48, + "confidence": 0.983 + }, + { + "text": "ne", + "start": 108.48, + "end": 108.6, + "confidence": 0.72 + }, + { + "text": "se", + "start": 108.6, + "end": 108.72, + "confidence": 0.992 + }, + { + "text": "trompe", + "start": 108.72, + "end": 108.88, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.32, + "confidence": 0.989 + } + ] + }, + { + "id": 22, + "seek": 9484, + "start": 109.94, + "end": 115.06, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 51089, + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 51389 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.964, + "words": [ + { + "text": "Pour", + "start": 109.94, + "end": 110.1, + "confidence": 0.995 + }, + { + "text": "autant", + "start": 110.1, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.38, + "confidence": 0.984 + }, + { + "text": "je", + "start": 110.38, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 110.84, + "confidence": 0.952 + }, + { + "text": "il", + "start": 111.08, + "end": 111.16, + "confidence": 0.994 + }, + { + "text": "y", + "start": 111.16, + "end": 111.3, + "confidence": 0.995 + }, + { + "text": "a", + "start": 111.3, + "end": 111.32, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.32, + "end": 111.62, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.62, + "end": 111.9, + "confidence": 0.966 + }, + { + "text": "de", + "start": 111.9, + "end": 112.14, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.14, + "end": 112.66, + "confidence": 0.83 + }, + { + "text": "autour", + "start": 112.66, + "end": 113.02, + "confidence": 0.995 + }, + { + "text": "de", + "start": 113.02, + "end": 113.38, + "confidence": 0.996 + }, + { + "text": "la", + "start": 113.38, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.88, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.06, + "confidence": 0.69 + }, + { + "text": "même", + "start": 114.06, + "end": 114.34, + "confidence": 0.995 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.992 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.06, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 9484, + "start": 115.48, + "end": 117.7, + "text": " Mais la dépendance n'était pas du même ordre.", + "tokens": [ + 51389, + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.992, + "words": [ + { + "text": "Mais", + "start": 115.48, + "end": 115.78, + "confidence": 0.993 + }, + { + "text": "la", + "start": 115.78, + "end": 116.04, + "confidence": 0.943 + }, + { + "text": "dépendance", + "start": 116.04, + "end": 116.42, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.7, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.7, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.2, + "confidence": 0.996 + }, + { + "text": "même", + "start": 117.2, + "end": 117.44, + "confidence": 0.998 + }, + { + "text": "ordre.", + "start": 117.44, + "end": 117.7, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 9484, + "start": 117.72, + "end": 119.78, + "text": " Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 51489, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.979, + "words": [ + { + "text": "Donc", + "start": 117.72, + "end": 118.02, + "confidence": 0.959 + }, + { + "text": "le", + "start": 118.02, + "end": 118.4, + "confidence": 0.819 + }, + { + "text": "rejet", + "start": 118.4, + "end": 118.64, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.64, + "end": 118.82, + "confidence": 0.982 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.94, + "confidence": 0.994 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.18, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 119.18, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.42, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.42, + "end": 119.58, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.58, + "end": 119.78, + "confidence": 0.999 + } + ] + }, + { + "id": 25, + "seek": 9484, + "start": 120.04, + "end": 123.1, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 51589, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 51789 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.971, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.16, + "confidence": 0.757 + }, + { + "text": "peut", + "start": 120.16, + "end": 120.36, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.68, + "confidence": 0.991 + }, + { + "text": "sa", + "start": 120.68, + "end": 120.88, + "confidence": 0.98 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.34, + "confidence": 0.985 + }, + { + "text": "en", + "start": 121.42, + "end": 121.56, + "confidence": 0.971 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.8, + "confidence": 0.997 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.46, + "confidence": 0.989 + }, + { + "text": "plein", + "start": 122.46, + "end": 122.7, + "confidence": 0.987 + }, + { + "text": "de", + "start": 122.7, + "end": 122.78, + "confidence": 0.997 + }, + { + "text": "choses.", + "start": 122.78, + "end": 123.1, + "confidence": 0.99 + } + ] + }, + { + "id": 26, + "seek": 12334, + "start": 123.38, + "end": 126.44, + "text": " Le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50389, + 1456, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.94, + "words": [ + { + "text": "Le", + "start": 123.38, + "end": 123.96, + "confidence": 0.645 + }, + { + "text": "soir,", + "start": 123.96, + "end": 124.54, + "confidence": 0.998 + }, + { + "text": "quand", + "start": 124.74, + "end": 124.94, + "confidence": 0.982 + }, + { + "text": "on", + "start": 124.94, + "end": 125.06, + "confidence": 0.996 + }, + { + "text": "va", + "start": 125.06, + "end": 125.16, + "confidence": 0.99 + }, + { + "text": "se", + "start": 125.16, + "end": 125.26, + "confidence": 0.989 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.7, + "confidence": 0.989 + }, + { + "text": "on", + "start": 125.94, + "end": 126.04, + "confidence": 0.992 + }, + { + "text": "la", + "start": 126.04, + "end": 126.18, + "confidence": 0.852 + }, + { + "text": "laisse.", + "start": 126.18, + "end": 126.44, + "confidence": 0.999 + } + ] + }, + { + "id": 27, + "seek": 12334, + "start": 127.0, + "end": 130.3, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 50539, + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.918, + "words": [ + { + "text": "On", + "start": 127.0, + "end": 127.34, + "confidence": 0.967 + }, + { + "text": "ne", + "start": 127.34, + "end": 127.46, + "confidence": 0.801 + }, + { + "text": "l'a", + "start": 127.46, + "end": 127.56, + "confidence": 0.929 + }, + { + "text": "pas", + "start": 127.56, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.86, + "confidence": 0.992 + }, + { + "text": "la", + "start": 127.86, + "end": 128.08, + "confidence": 0.996 + }, + { + "text": "main", + "start": 128.08, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.48, + "confidence": 0.936 + }, + { + "text": "on", + "start": 128.48, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.74, + "confidence": 0.992 + }, + { + "text": "au", + "start": 128.74, + "end": 128.94, + "confidence": 0.976 + }, + { + "text": "lit,", + "start": 128.94, + "end": 129.1, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.16, + "end": 129.28, + "confidence": 0.43 + }, + { + "text": "ne", + "start": 129.28, + "end": 129.3, + "confidence": 0.949 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.58, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 129.58, + "end": 129.7, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.7, + "end": 129.88, + "confidence": 0.72 + }, + { + "text": "chiottes.", + "start": 129.88, + "end": 130.3, + "confidence": 0.886 + } + ] + }, + { + "id": 28, + "seek": 12334, + "start": 130.84, + "end": 136.88, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 50739, + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51039 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.925, + "words": [ + { + "text": "On", + "start": 130.84, + "end": 131.06, + "confidence": 0.996 + }, + { + "text": "pouvait", + "start": 131.06, + "end": 131.26, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.26, + "end": 131.58, + "confidence": 0.995 + }, + { + "text": "énervé", + "start": 131.58, + "end": 132.22, + "confidence": 0.896 + }, + { + "text": "par", + "start": 132.22, + "end": 132.46, + "confidence": 0.992 + }, + { + "text": "son", + "start": 132.46, + "end": 132.72, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.72, + "end": 133.08, + "confidence": 0.758 + }, + { + "text": "qui", + "start": 133.08, + "end": 133.34, + "confidence": 0.915 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.74, + "confidence": 0.992 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.986 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.06, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.06, + "end": 134.22, + "confidence": 0.995 + }, + { + "text": "téléphone", + "start": 134.22, + "end": 134.6, + "confidence": 0.992 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.92, + "confidence": 0.709 + }, + { + "text": "une", + "start": 134.92, + "end": 135.16, + "confidence": 0.78 + }, + { + "text": "heure", + "start": 135.16, + "end": 135.34, + "confidence": 0.995 + }, + { + "text": "chaque", + "start": 135.34, + "end": 135.58, + "confidence": 0.982 + }, + { + "text": "soir", + "start": 135.58, + "end": 135.8, + "confidence": 0.995 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.98, + "confidence": 0.66 + }, + { + "text": "discuter", + "start": 135.98, + "end": 136.3, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.3, + "end": 136.5, + "confidence": 0.995 + }, + { + "text": "un", + "start": 136.5, + "end": 136.66, + "confidence": 0.997 + }, + { + "text": "copain.", + "start": 136.66, + "end": 136.88, + "confidence": 0.996 + } + ] + }, + { + "id": 29, + "seek": 12334, + "start": 137.26, + "end": 141.86, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 51039, + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11, + 51289 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.956, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.5, + "confidence": 0.989 + }, + { + "text": "ça", + "start": 137.5, + "end": 137.68, + "confidence": 0.928 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.88, + "confidence": 0.999 + }, + { + "text": "ressemblait", + "start": 137.88, + "end": 138.42, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 138.42, + "end": 138.78, + "confidence": 0.994 + }, + { + "text": "à", + "start": 138.78, + "end": 138.9, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 138.9, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.12, + "confidence": 0.976 + }, + { + "text": "peut", + "start": 139.12, + "end": 139.48, + "confidence": 0.979 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.1, + "confidence": 0.996 + }, + { + "text": "à", + "start": 140.1, + "end": 140.32, + "confidence": 0.575 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.728 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.7, + "confidence": 0.985 + }, + { + "text": "même", + "start": 140.7, + "end": 140.94, + "confidence": 0.982 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.28, + "confidence": 0.997 + }, + { + "text": "aujourd'hui,", + "start": 141.28, + "end": 141.86, + "confidence": 0.994 + } + ] + }, + { + "id": 30, + "seek": 12334, + "start": 142.16, + "end": 144.22, + "text": " continuellement avec son smartphone dans la main,", + "tokens": [ + 51289, + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.961, + "words": [ + { + "text": "continuellement", + "start": 142.16, + "end": 142.94, + "confidence": 0.963 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.2, + "confidence": 0.844 + }, + { + "text": "son", + "start": 143.2, + "end": 143.42, + "confidence": 0.994 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.76, + "confidence": 0.972 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.92, + "confidence": 0.963 + }, + { + "text": "la", + "start": 143.92, + "end": 144.02, + "confidence": 0.997 + }, + { + "text": "main,", + "start": 144.02, + "end": 144.22, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 12334, + "start": 144.34, + "end": 148.8, + "text": " comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 51414, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11, + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13, + 51639 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.973, + "words": [ + { + "text": "comme", + "start": 144.34, + "end": 144.56, + "confidence": 0.985 + }, + { + "text": "si", + "start": 144.56, + "end": 144.66, + "confidence": 0.971 + }, + { + "text": "c'était", + "start": 144.66, + "end": 144.86, + "confidence": 0.987 + }, + { + "text": "une", + "start": 144.86, + "end": 144.98, + "confidence": 0.983 + }, + { + "text": "sorte", + "start": 144.98, + "end": 145.14, + "confidence": 0.992 + }, + { + "text": "de", + "start": 145.14, + "end": 145.34, + "confidence": 0.989 + }, + { + "text": "pacemaker", + "start": 145.34, + "end": 145.82, + "confidence": 0.931 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.4, + "confidence": 0.99 + }, + { + "text": "comme", + "start": 146.44, + "end": 146.6, + "confidence": 0.721 + }, + { + "text": "si", + "start": 146.6, + "end": 146.74, + "confidence": 0.993 + }, + { + "text": "le", + "start": 146.74, + "end": 147.0, + "confidence": 0.996 + }, + { + "text": "lâcher", + "start": 147.0, + "end": 147.36, + "confidence": 0.968 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.58, + "confidence": 0.991 + }, + { + "text": "entraîner", + "start": 147.58, + "end": 147.88, + "confidence": 0.982 + }, + { + "text": "sa", + "start": 147.88, + "end": 148.02, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.02, + "end": 148.3, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.3, + "end": 148.8, + "confidence": 0.998 + } + ] + }, + { + "id": 32, + "seek": 12334, + "start": 148.94, + "end": 152.02, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 51639, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13, + 51839 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.973, + "words": [ + { + "text": "Bon,", + "start": 148.94, + "end": 149.24, + "confidence": 0.804 + }, + { + "text": "je", + "start": 149.26, + "end": 149.34, + "confidence": 0.933 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.46, + "confidence": 0.982 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.992 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.78, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.78, + "end": 149.9, + "confidence": 0.994 + }, + { + "text": "môme,", + "start": 149.9, + "end": 150.1, + "confidence": 0.999 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.5, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 150.5, + "end": 150.84, + "confidence": 0.979 + }, + { + "text": "évidemment", + "start": 150.84, + "end": 151.18, + "confidence": 0.979 + }, + { + "text": "valable", + "start": 151.18, + "end": 151.48, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.64, + "confidence": 0.996 + }, + { + "text": "nous", + "start": 151.64, + "end": 151.78, + "confidence": 0.998 + }, + { + "text": "aussi.", + "start": 151.78, + "end": 152.02, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 15284, + "start": 153.14, + "end": 154.68, + "text": " Donc, rapport inédit, d'accord.", + "tokens": [ + 50389, + 7477, + 11, + 18018, + 294, + 7811, + 270, + 11, + 274, + 6, + 19947, + 13, + 50489 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.813, + "words": [ + { + "text": "Donc,", + "start": 153.14, + "end": 153.24, + "confidence": 0.232 + }, + { + "text": "rapport", + "start": 153.26, + "end": 153.62, + "confidence": 0.855 + }, + { + "text": "inédit,", + "start": 153.62, + "end": 154.18, + "confidence": 0.992 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.68, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15284, + "start": 155.64, + "end": 158.52, + "text": " Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ?", + "tokens": [ + 50489, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.958, + "words": [ + { + "text": "Mais", + "start": 155.64, + "end": 155.88, + "confidence": 0.991 + }, + { + "text": "pourquoi", + "start": 155.88, + "end": 156.36, + "confidence": 0.992 + }, + { + "text": "a-t-on", + "start": 156.36, + "end": 156.7, + "confidence": 0.95 + }, + { + "text": "l'impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.99 + }, + { + "text": "n'en", + "start": 157.28, + "end": 157.5, + "confidence": 0.859 + }, + { + "text": "sortira", + "start": 157.5, + "end": 157.86, + "confidence": 0.974 + }, + { + "text": "jamais ?", + "start": 157.86, + "end": 158.52, + "confidence": 0.997 + } + ] + }, + { + "id": 35, + "seek": 15284, + "start": 159.12, + "end": 165.36, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ?", + "tokens": [ + 50639, + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.959, + "words": [ + { + "text": "Est-ce", + "start": 159.12, + "end": 159.34, + "confidence": 0.987 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.993 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.64, + "confidence": 0.998 + }, + { + "text": "en", + "start": 159.64, + "end": 159.88, + "confidence": 0.961 + }, + { + "text": "remettre", + "start": 159.88, + "end": 160.14, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.14, + "end": 160.46, + "confidence": 0.995 + }, + { + "text": "faute", + "start": 160.46, + "end": 160.64, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.64, + "end": 161.06, + "confidence": 0.982 + }, + { + "text": "les", + "start": 161.06, + "end": 161.3, + "confidence": 0.991 + }, + { + "text": "gens", + "start": 161.3, + "end": 161.46, + "confidence": 0.998 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.56, + "confidence": 0.724 + }, + { + "text": "ont", + "start": 161.56, + "end": 161.82, + "confidence": 0.996 + }, + { + "text": "créé", + "start": 161.82, + "end": 162.28, + "confidence": 0.985 + }, + { + "text": "cet", + "start": 162.28, + "end": 162.48, + "confidence": 0.816 + }, + { + "text": "outil", + "start": 162.48, + "end": 162.78, + "confidence": 0.993 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.36, + "confidence": 0.981 + }, + { + "text": "et", + "start": 163.36, + "end": 163.5, + "confidence": 0.983 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.84, + "confidence": 0.991 + }, + { + "text": "et", + "start": 163.86, + "end": 164.0, + "confidence": 0.555 + }, + { + "text": "diabolique", + "start": 164.0, + "end": 164.4, + "confidence": 0.949 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.927 + }, + { + "text": "que", + "start": 164.66, + "end": 164.86, + "confidence": 0.979 + }, + { + "text": "merveilleux ?", + "start": 164.86, + "end": 165.36, + "confidence": 0.997 + } + ] + }, + { + "id": 36, + "seek": 15284, + "start": 166.84, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 51039, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13, + 51139 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.986, + "words": [ + { + "text": "Les", + "start": 166.84, + "end": 167.08, + "confidence": 0.945 + }, + { + "text": "économistes", + "start": 167.08, + "end": 167.46, + "confidence": 0.997 + }, + { + "text": "parlent", + "start": 167.46, + "end": 167.72, + "confidence": 0.993 + }, + { + "text": "de", + "start": 167.72, + "end": 167.88, + "confidence": 0.992 + }, + { + "text": "dépendance", + "start": 167.88, + "end": 168.34, + "confidence": 0.986 + }, + { + "text": "du", + "start": 168.34, + "end": 168.52, + "confidence": 0.996 + }, + { + "text": "sentier.", + "start": 168.52, + "end": 168.82, + "confidence": 0.983 + } + ] + }, + { + "id": 37, + "seek": 15284, + "start": 168.98, + "end": 177.38, + "text": " C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 51139, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 1131, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.907, + "words": [ + { + "text": "C'est", + "start": 168.98, + "end": 169.2, + "confidence": 0.996 + }, + { + "text": "l'idée", + "start": 169.2, + "end": 169.38, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 169.38, + "end": 169.74, + "confidence": 0.7 + }, + { + "text": "met", + "start": 169.74, + "end": 169.88, + "confidence": 0.648 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.1, + "confidence": 0.995 + }, + { + "text": "un", + "start": 170.1, + "end": 170.52, + "confidence": 0.997 + }, + { + "text": "sentier", + "start": 170.52, + "end": 170.84, + "confidence": 0.991 + }, + { + "text": "qui", + "start": 170.84, + "end": 170.9, + "confidence": 0.955 + }, + { + "text": "a", + "start": 170.9, + "end": 171.0, + "confidence": 0.969 + }, + { + "text": "été", + "start": 171.0, + "end": 171.14, + "confidence": 0.994 + }, + { + "text": "établi,", + "start": 171.14, + "end": 171.5, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.76, + "end": 172.1, + "confidence": 0.527 + }, + { + "text": "volontairement", + "start": 172.1, + "end": 172.7, + "confidence": 0.987 + }, + { + "text": "en", + "start": 172.7, + "end": 172.86, + "confidence": 0.927 + }, + { + "text": "marchant", + "start": 172.86, + "end": 173.1, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.1, + "end": 173.48, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 173.86, + "end": 174.28, + "confidence": 0.749 + }, + { + "text": "en", + "start": 174.28, + "end": 175.16, + "confidence": 0.967 + }, + { + "text": "définissant", + "start": 175.16, + "end": 175.54, + "confidence": 0.981 + }, + { + "text": "des", + "start": 175.54, + "end": 175.74, + "confidence": 0.99 + }, + { + "text": "bornes,", + "start": 175.74, + "end": 175.98, + "confidence": 0.977 + }, + { + "text": "en", + "start": 176.04, + "end": 176.14, + "confidence": 0.672 + }, + { + "text": "définissant", + "start": 176.14, + "end": 176.66, + "confidence": 0.996 + }, + { + "text": "une", + "start": 176.66, + "end": 176.94, + "confidence": 0.983 + }, + { + "text": "signalétique.", + "start": 176.94, + "end": 177.38, + "confidence": 0.686 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr.cpu/radio_short.mp3.words.json b/tests/expected/medium_fr.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..55a81cf3b19c05b84cbe3d2f0fc3db1d81a0f42e --- /dev/null +++ b/tests/expected/medium_fr.cpu/radio_short.mp3.words.json @@ -0,0 +1,1977 @@ +{ + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites. Winamax, la référence du poker en ligne. Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité. Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons. Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt? Question posée par les leaders de la CFDT et la CGT. Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT. Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National. La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story. Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite. Musique L'actualité c'est aussi aujourd'hui un dernier adieu, dernier adieu à Laurent Fignon, c'était au cimetière du Père Lachaise à Paris. L'ancien double vainqueur du Tour de France, vaincu par le cancer à 50 ans, a été incinéré en petit comité aujourd'hui.", + "segments": [ + { + "id": 0, + "seek": 3000, + "start": 30.86, + "end": 34.26, + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites.", + "tokens": [ + 1456, + 1804, + 1021, + 1609, + 36863, + 1769, + 408, + 4900, + 1736, + 1512, + 5467, + 279, + 11, + 269, + 6, + 377, + 1769, + 631, + 2630, + 465, + 29902, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.92, + "words": [ + { + "text": "Le", + "start": 30.86, + "end": 31.02, + "confidence": 0.535 + }, + { + "text": "plus", + "start": 31.02, + "end": 31.12, + "confidence": 0.99 + }, + { + "text": "important", + "start": 31.12, + "end": 31.48, + "confidence": 0.993 + }, + { + "text": "au", + "start": 31.48, + "end": 31.64, + "confidence": 0.946 + }, + { + "text": "poker", + "start": 31.64, + "end": 31.84, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 31.84, + "end": 32.02, + "confidence": 0.581 + }, + { + "text": "ne", + "start": 32.02, + "end": 32.06, + "confidence": 0.961 + }, + { + "text": "sont", + "start": 32.06, + "end": 32.16, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 32.16, + "end": 32.28, + "confidence": 0.991 + }, + { + "text": "les", + "start": 32.28, + "end": 32.42, + "confidence": 0.969 + }, + { + "text": "cartes,", + "start": 32.42, + "end": 33.56, + "confidence": 0.996 + }, + { + "text": "c'est", + "start": 33.56, + "end": 33.62, + "confidence": 0.95 + }, + { + "text": "ce", + "start": 33.62, + "end": 33.66, + "confidence": 0.99 + }, + { + "text": "que", + "start": 33.66, + "end": 33.7, + "confidence": 0.991 + }, + { + "text": "vous", + "start": 33.7, + "end": 33.86, + "confidence": 0.991 + }, + { + "text": "en", + "start": 33.86, + "end": 34.04, + "confidence": 0.982 + }, + { + "text": "faites.", + "start": 34.04, + "end": 34.26, + "confidence": 0.894 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 36.18, + "end": 38.76, + "text": " Winamax, la référence du poker en ligne.", + "tokens": [ + 10427, + 2404, + 87, + 11, + 635, + 30170, + 41635, + 1581, + 36863, + 465, + 34207, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.967, + "words": [ + { + "text": "Winamax,", + "start": 36.18, + "end": 37.28, + "confidence": 0.905 + }, + { + "text": "la", + "start": 37.28, + "end": 37.32, + "confidence": 0.988 + }, + { + "text": "référence", + "start": 37.32, + "end": 37.78, + "confidence": 0.998 + }, + { + "text": "du", + "start": 37.78, + "end": 37.96, + "confidence": 0.992 + }, + { + "text": "poker", + "start": 37.96, + "end": 38.22, + "confidence": 0.998 + }, + { + "text": "en", + "start": 38.22, + "end": 38.4, + "confidence": 0.993 + }, + { + "text": "ligne.", + "start": 38.4, + "end": 38.76, + "confidence": 0.993 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 44.94, + "end": 51.38, + "text": " Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité.", + "tokens": [ + 7368, + 539, + 347, + 1531, + 14437, + 1030, + 8317, + 11, + 2630, + 18935, + 1022, + 363, + 37, + 44, + 3558, + 11, + 4666, + 25232, + 465, + 2047, + 11, + 269, + 6, + 377, + 3610, + 18143, + 363, + 37, + 44, + 1657, + 4163, + 3486, + 1769, + 1956, + 3887, + 287, + 6, + 578, + 901, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.907, + "words": [ + { + "text": "Bonsoir", + "start": 44.94, + "end": 45.4, + "confidence": 0.955 + }, + { + "text": "à", + "start": 45.4, + "end": 45.84, + "confidence": 0.54 + }, + { + "text": "toutes", + "start": 45.84, + "end": 46.06, + "confidence": 0.917 + }, + { + "text": "et", + "start": 46.06, + "end": 46.14, + "confidence": 0.988 + }, + { + "text": "tous,", + "start": 46.14, + "end": 46.7, + "confidence": 0.981 + }, + { + "text": "vous", + "start": 46.7, + "end": 46.74, + "confidence": 0.97 + }, + { + "text": "êtes", + "start": 46.74, + "end": 46.9, + "confidence": 0.995 + }, + { + "text": "sur", + "start": 46.9, + "end": 47.04, + "confidence": 0.989 + }, + { + "text": "BFM", + "start": 47.04, + "end": 47.56, + "confidence": 0.987 + }, + { + "text": "TV,", + "start": 47.56, + "end": 47.88, + "confidence": 0.853 + }, + { + "text": "nous", + "start": 47.88, + "end": 47.92, + "confidence": 0.829 + }, + { + "text": "sommes", + "start": 47.92, + "end": 48.14, + "confidence": 0.988 + }, + { + "text": "en", + "start": 48.14, + "end": 48.2, + "confidence": 0.986 + }, + { + "text": "direct,", + "start": 48.2, + "end": 48.74, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 48.74, + "end": 48.8, + "confidence": 0.939 + }, + { + "text": "bien", + "start": 48.8, + "end": 48.94, + "confidence": 0.971 + }, + { + "text": "sûr", + "start": 48.94, + "end": 49.2, + "confidence": 0.906 + }, + { + "text": "BFM", + "start": 49.2, + "end": 49.84, + "confidence": 0.95 + }, + { + "text": "story", + "start": 49.84, + "end": 50.04, + "confidence": 0.356 + }, + { + "text": "avec", + "start": 50.04, + "end": 50.24, + "confidence": 0.553 + }, + { + "text": "tout", + "start": 50.24, + "end": 50.42, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 50.42, + "end": 50.46, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 50.46, + "end": 50.56, + "confidence": 0.952 + }, + { + "text": "fait", + "start": 50.56, + "end": 50.72, + "confidence": 0.961 + }, + { + "text": "l'actualité.", + "start": 50.72, + "end": 51.38, + "confidence": 0.994 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 51.5, + "end": 56.11, + "text": " Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons.", + "tokens": [ + 13710, + 394, + 4060, + 2077, + 1769, + 4900, + 730, + 18638, + 5482, + 11, + 730, + 37560, + 11, + 730, + 3960, + 12299, + 631, + 4666, + 2630, + 7532, + 892, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.1935427300283842, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5593915581703186, + "confidence": 0.947, + "words": [ + { + "text": "Durant", + "start": 51.5, + "end": 52.3, + "confidence": 0.935 + }, + { + "text": "60", + "start": 52.3, + "end": 52.7, + "confidence": 0.922 + }, + { + "text": "minutes", + "start": 52.7, + "end": 53.08, + "confidence": 0.912 + }, + { + "text": "ce", + "start": 53.08, + "end": 53.26, + "confidence": 0.752 + }, + { + "text": "sont", + "start": 53.26, + "end": 53.58, + "confidence": 0.979 + }, + { + "text": "des", + "start": 53.58, + "end": 53.86, + "confidence": 0.99 + }, + { + "text": "gros", + "start": 53.86, + "end": 54.02, + "confidence": 0.969 + }, + { + "text": "plans,", + "start": 54.02, + "end": 54.32, + "confidence": 0.958 + }, + { + "text": "des", + "start": 54.32, + "end": 54.44, + "confidence": 0.986 + }, + { + "text": "analyses,", + "start": 54.44, + "end": 54.78, + "confidence": 0.913 + }, + { + "text": "des", + "start": 54.78, + "end": 54.86, + "confidence": 0.995 + }, + { + "text": "réactions", + "start": 54.86, + "end": 55.22, + "confidence": 0.996 + }, + { + "text": "que", + "start": 55.22, + "end": 55.4, + "confidence": 0.882 + }, + { + "text": "nous", + "start": 55.4, + "end": 55.54, + "confidence": 0.947 + }, + { + "text": "vous", + "start": 55.54, + "end": 55.64, + "confidence": 0.99 + }, + { + "text": "proposons.", + "start": 55.64, + "end": 56.11, + "confidence": 0.991 + } + ] + }, + { + "id": 4, + "seek": 5600, + "start": 56.11, + "end": 63.98, + "text": " Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt?", + "tokens": [ + 16328, + 9336, + 4281, + 392, + 5977, + 12, + 388, + 10122, + 29350, + 268, + 347, + 635, + 3960, + 44562, + 730, + 49356, + 3324, + 11246, + 421, + 6, + 388, + 871, + 1741, + 2781, + 526, + 2680, + 601, + 35221, + 2096, + 9020, + 11, + 287, + 6, + 2518, + 9020, + 4281, + 392, + 12, + 33, + 3093, + 4463, + 33403, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.858, + "words": [ + { + "text": "Comment", + "start": 56.11, + "end": 56.94, + "confidence": 0.966 + }, + { + "text": "Eric", + "start": 56.94, + "end": 57.2, + "confidence": 0.888 + }, + { + "text": "Verth", + "start": 57.2, + "end": 57.36, + "confidence": 0.376 + }, + { + "text": "peut-il", + "start": 57.36, + "end": 57.8, + "confidence": 0.951 + }, + { + "text": "encore", + "start": 57.8, + "end": 58.36, + "confidence": 0.971 + }, + { + "text": "soutenir", + "start": 58.36, + "end": 58.62, + "confidence": 0.997 + }, + { + "text": "la", + "start": 58.62, + "end": 58.72, + "confidence": 0.98 + }, + { + "text": "réforme", + "start": 58.72, + "end": 59.18, + "confidence": 0.996 + }, + { + "text": "des", + "start": 59.18, + "end": 59.38, + "confidence": 0.994 + }, + { + "text": "retraites", + "start": 59.38, + "end": 59.78, + "confidence": 0.988 + }, + { + "text": "alors", + "start": 59.78, + "end": 59.94, + "confidence": 0.508 + }, + { + "text": "qu'il", + "start": 59.94, + "end": 60.72, + "confidence": 0.965 + }, + { + "text": "est", + "start": 60.72, + "end": 60.86, + "confidence": 0.975 + }, + { + "text": "englué", + "start": 60.86, + "end": 61.74, + "confidence": 0.945 + }, + { + "text": "dans", + "start": 61.74, + "end": 61.92, + "confidence": 0.97 + }, + { + "text": "sa", + "start": 61.92, + "end": 62.42, + "confidence": 0.871 + }, + { + "text": "propre", + "start": 62.42, + "end": 62.74, + "confidence": 0.992 + }, + { + "text": "affaire,", + "start": 62.74, + "end": 62.98, + "confidence": 0.993 + }, + { + "text": "l'affaire", + "start": 62.98, + "end": 63.24, + "confidence": 0.903 + }, + { + "text": "Verth-Bettancourt?", + "start": 63.24, + "end": 63.98, + "confidence": 0.714 + } + ] + }, + { + "id": 5, + "seek": 5600, + "start": 64.0, + "end": 67.18, + "text": " Question posée par les leaders de la CFDT et la CGT.", + "tokens": [ + 14464, + 1366, + 3856, + 971, + 1512, + 3523, + 368, + 635, + 21792, + 35, + 51, + 1030, + 635, + 38007, + 51, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.896, + "words": [ + { + "text": "Question", + "start": 64.0, + "end": 64.3, + "confidence": 0.639 + }, + { + "text": "posée", + "start": 64.3, + "end": 64.76, + "confidence": 0.977 + }, + { + "text": "par", + "start": 64.76, + "end": 64.94, + "confidence": 0.989 + }, + { + "text": "les", + "start": 64.94, + "end": 65.08, + "confidence": 0.966 + }, + { + "text": "leaders", + "start": 65.08, + "end": 65.66, + "confidence": 0.977 + }, + { + "text": "de", + "start": 65.66, + "end": 65.9, + "confidence": 0.988 + }, + { + "text": "la", + "start": 65.9, + "end": 66.1, + "confidence": 0.967 + }, + { + "text": "CFDT", + "start": 66.1, + "end": 66.54, + "confidence": 0.994 + }, + { + "text": "et", + "start": 66.54, + "end": 66.62, + "confidence": 0.586 + }, + { + "text": "la", + "start": 66.62, + "end": 66.66, + "confidence": 0.627 + }, + { + "text": "CGT.", + "start": 66.66, + "end": 67.18, + "confidence": 0.987 + } + ] + }, + { + "id": 6, + "seek": 5600, + "start": 67.44, + "end": 76.42, + "text": " Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT.", + "tokens": [ + 41587, + 3739, + 368, + 38268, + 318, + 809, + 78, + 1229, + 11, + 9336, + 4281, + 392, + 1515, + 23833, + 476, + 2795, + 11980, + 1022, + 1512, + 49356, + 3324, + 11, + 322, + 465, + 18508, + 2680, + 363, + 37, + 44, + 1657, + 4163, + 476, + 49525, + 368, + 635, + 21792, + 35, + 51, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.968, + "words": [ + { + "text": "Réponse", + "start": 67.44, + "end": 67.94, + "confidence": 0.957 + }, + { + "text": "de", + "start": 67.94, + "end": 68.1, + "confidence": 0.966 + }, + { + "text": "Nicolas", + "start": 68.1, + "end": 68.36, + "confidence": 0.934 + }, + { + "text": "Sarkozy,", + "start": 68.36, + "end": 69.06, + "confidence": 0.988 + }, + { + "text": "Eric", + "start": 69.06, + "end": 69.24, + "confidence": 0.962 + }, + { + "text": "Verth", + "start": 69.24, + "end": 69.8, + "confidence": 0.988 + }, + { + "text": "portera", + "start": 69.8, + "end": 70.64, + "confidence": 0.966 + }, + { + "text": "le", + "start": 70.64, + "end": 71.0, + "confidence": 0.99 + }, + { + "text": "débat", + "start": 71.0, + "end": 71.4, + "confidence": 0.996 + }, + { + "text": "sur", + "start": 71.4, + "end": 71.66, + "confidence": 0.99 + }, + { + "text": "les", + "start": 71.66, + "end": 71.82, + "confidence": 0.997 + }, + { + "text": "retraites,", + "start": 71.82, + "end": 72.34, + "confidence": 0.996 + }, + { + "text": "on", + "start": 72.34, + "end": 72.46, + "confidence": 0.842 + }, + { + "text": "en", + "start": 72.46, + "end": 72.74, + "confidence": 0.948 + }, + { + "text": "parle", + "start": 72.74, + "end": 72.92, + "confidence": 0.996 + }, + { + "text": "dans", + "start": 72.92, + "end": 73.42, + "confidence": 0.983 + }, + { + "text": "BFM", + "start": 73.42, + "end": 74.6, + "confidence": 0.992 + }, + { + "text": "story", + "start": 74.6, + "end": 74.7, + "confidence": 0.756 + }, + { + "text": "avec", + "start": 74.7, + "end": 75.08, + "confidence": 0.837 + }, + { + "text": "le", + "start": 75.08, + "end": 75.26, + "confidence": 0.984 + }, + { + "text": "numéro", + "start": 75.26, + "end": 75.52, + "confidence": 0.997 + }, + { + "text": "de", + "start": 75.52, + "end": 75.76, + "confidence": 0.996 + }, + { + "text": "la", + "start": 75.76, + "end": 75.9, + "confidence": 0.99 + }, + { + "text": "CFDT.", + "start": 75.9, + "end": 76.42, + "confidence": 0.998 + } + ] + }, + { + "id": 7, + "seek": 5600, + "start": 76.5, + "end": 83.0, + "text": " Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National.", + "tokens": [ + 3790, + 9093, + 1930, + 288, + 257, + 2251, + 272, + 3274, + 3409, + 1956, + 257, + 22761, + 2284, + 526, + 11, + 25722, + 3962, + 20415, + 1456, + 10571, + 1030, + 23046, + 460, + 2912, + 480, + 11, + 635, + 272, + 3274, + 3409, + 368, + 635, + 36624, + 368, + 13854, + 12, + 16639, + 414, + 1456, + 10571, + 1531, + 635, + 24661, + 1581, + 17348, + 4862, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.12757631117297757, + "compression_ratio": 1.5705329153605017, + "no_speech_prob": 6.333013880066574e-05, + "confidence": 0.872, + "words": [ + { + "text": "Et", + "start": 76.5, + "end": 76.96, + "confidence": 0.912 + }, + { + "text": "puis", + "start": 76.96, + "end": 77.08, + "confidence": 0.958 + }, + { + "text": "il", + "start": 77.08, + "end": 77.14, + "confidence": 0.794 + }, + { + "text": "y", + "start": 77.14, + "end": 77.18, + "confidence": 0.994 + }, + { + "text": "a", + "start": 77.18, + "end": 77.22, + "confidence": 0.986 + }, + { + "text": "une", + "start": 77.22, + "end": 77.3, + "confidence": 0.996 + }, + { + "text": "bataille", + "start": 77.3, + "end": 77.56, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 77.56, + "end": 77.68, + "confidence": 0.997 + }, + { + "text": "a", + "start": 77.68, + "end": 77.72, + "confidence": 0.985 + }, + { + "text": "démarré,", + "start": 77.72, + "end": 78.16, + "confidence": 0.987 + }, + { + "text": "celle", + "start": 78.16, + "end": 78.38, + "confidence": 0.997 + }, + { + "text": "entre", + "start": 78.38, + "end": 78.64, + "confidence": 0.976 + }, + { + "text": "Marine", + "start": 78.64, + "end": 78.96, + "confidence": 0.987 + }, + { + "text": "Le", + "start": 78.96, + "end": 79.12, + "confidence": 0.986 + }, + { + "text": "Pen", + "start": 79.12, + "end": 79.16, + "confidence": 0.987 + }, + { + "text": "et", + "start": 79.16, + "end": 79.32, + "confidence": 0.997 + }, + { + "text": "Bruno", + "start": 79.32, + "end": 79.54, + "confidence": 0.986 + }, + { + "text": "Gognich,", + "start": 79.54, + "end": 80.2, + "confidence": 0.24 + }, + { + "text": "la", + "start": 80.2, + "end": 80.32, + "confidence": 0.676 + }, + { + "text": "bataille", + "start": 80.32, + "end": 80.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 80.54, + "end": 80.72, + "confidence": 0.975 + }, + { + "text": "la", + "start": 80.72, + "end": 80.92, + "confidence": 0.988 + }, + { + "text": "succession", + "start": 80.92, + "end": 81.18, + "confidence": 0.983 + }, + { + "text": "de", + "start": 81.18, + "end": 81.44, + "confidence": 0.984 + }, + { + "text": "Jean-Marie", + "start": 81.44, + "end": 81.7, + "confidence": 0.95 + }, + { + "text": "Le", + "start": 81.7, + "end": 81.94, + "confidence": 0.996 + }, + { + "text": "Pen", + "start": 81.94, + "end": 81.98, + "confidence": 0.999 + }, + { + "text": "à", + "start": 81.98, + "end": 82.12, + "confidence": 0.976 + }, + { + "text": "la", + "start": 82.12, + "end": 82.28, + "confidence": 0.995 + }, + { + "text": "tête", + "start": 82.28, + "end": 82.32, + "confidence": 0.926 + }, + { + "text": "du", + "start": 82.32, + "end": 82.48, + "confidence": 0.997 + }, + { + "text": "Front", + "start": 82.48, + "end": 82.64, + "confidence": 0.775 + }, + { + "text": "National.", + "start": 82.64, + "end": 83.0, + "confidence": 0.836 + } + ] + }, + { + "id": 8, + "seek": 8300, + "start": 83.3, + "end": 88.82, + "text": " La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story.", + "tokens": [ + 2369, + 3512, + 77, + 3856, + 368, + 2255, + 13887, + 368, + 20415, + 1456, + 10571, + 18137, + 14023, + 6, + 10556, + 2680, + 476, + 14662, + 11, + 20415, + 1456, + 10571, + 15021, + 465, + 2047, + 2680, + 363, + 37, + 44, + 1657, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10741670781915838, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.00011412434105295688, + "confidence": 0.958, + "words": [ + { + "text": "La", + "start": 83.3, + "end": 83.56, + "confidence": 0.98 + }, + { + "text": "tournée", + "start": 83.56, + "end": 84.14, + "confidence": 0.995 + }, + { + "text": "de", + "start": 84.14, + "end": 84.36, + "confidence": 0.947 + }, + { + "text": "campagne", + "start": 84.36, + "end": 84.84, + "confidence": 0.984 + }, + { + "text": "de", + "start": 84.84, + "end": 84.9, + "confidence": 0.977 + }, + { + "text": "Marine", + "start": 84.9, + "end": 85.18, + "confidence": 0.997 + }, + { + "text": "Le", + "start": 85.18, + "end": 85.26, + "confidence": 0.998 + }, + { + "text": "Pen", + "start": 85.26, + "end": 85.44, + "confidence": 0.999 + }, + { + "text": "commence", + "start": 85.44, + "end": 85.78, + "confidence": 0.98 + }, + { + "text": "aujourd'hui", + "start": 85.78, + "end": 86.14, + "confidence": 0.991 + }, + { + "text": "dans", + "start": 86.14, + "end": 86.24, + "confidence": 0.984 + }, + { + "text": "le", + "start": 86.24, + "end": 86.38, + "confidence": 0.913 + }, + { + "text": "Var,", + "start": 86.38, + "end": 86.82, + "confidence": 0.523 + }, + { + "text": "Marine", + "start": 86.82, + "end": 86.96, + "confidence": 0.993 + }, + { + "text": "Le", + "start": 86.96, + "end": 87.04, + "confidence": 0.993 + }, + { + "text": "Pen", + "start": 87.04, + "end": 87.22, + "confidence": 0.999 + }, + { + "text": "sera", + "start": 87.22, + "end": 87.48, + "confidence": 0.991 + }, + { + "text": "en", + "start": 87.48, + "end": 87.68, + "confidence": 0.991 + }, + { + "text": "direct", + "start": 87.68, + "end": 87.98, + "confidence": 0.997 + }, + { + "text": "dans", + "start": 87.98, + "end": 88.3, + "confidence": 0.935 + }, + { + "text": "BFM", + "start": 88.3, + "end": 88.72, + "confidence": 0.997 + }, + { + "text": "story.", + "start": 88.72, + "end": 88.82, + "confidence": 0.827 + } + ] + }, + { + "id": 9, + "seek": 8300, + "start": 89.0, + "end": 92.15, + "text": " Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite.", + "tokens": [ + 13094, + 4371, + 4163, + 4666, + 20415, + 1456, + 10571, + 2680, + 13099, + 368, + 805, + 2077, + 11, + 1531, + 3486, + 368, + 14205, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10741670781915838, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.00011412434105295688, + "confidence": 0.937, + "words": [ + { + "text": "Restez", + "start": 89.0, + "end": 89.64, + "confidence": 0.966 + }, + { + "text": "avec", + "start": 89.64, + "end": 89.68, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 89.68, + "end": 89.94, + "confidence": 0.968 + }, + { + "text": "Marine", + "start": 89.94, + "end": 90.18, + "confidence": 0.906 + }, + { + "text": "Le", + "start": 90.18, + "end": 90.4, + "confidence": 0.998 + }, + { + "text": "Pen", + "start": 90.4, + "end": 90.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 90.68, + "end": 90.88, + "confidence": 0.836 + }, + { + "text": "moins", + "start": 90.88, + "end": 91.18, + "confidence": 0.993 + }, + { + "text": "de", + "start": 91.18, + "end": 91.28, + "confidence": 0.991 + }, + { + "text": "3", + "start": 91.28, + "end": 91.46, + "confidence": 0.609 + }, + { + "text": "minutes,", + "start": 91.46, + "end": 91.86, + "confidence": 0.986 + }, + { + "text": "à", + "start": 91.86, + "end": 91.9, + "confidence": 0.962 + }, + { + "text": "tout", + "start": 91.9, + "end": 92.04, + "confidence": 0.918 + }, + { + "text": "de", + "start": 92.04, + "end": 92.1, + "confidence": 0.999 + }, + { + "text": "suite.", + "start": 92.1, + "end": 92.15, + "confidence": 0.999 + } + ] + }, + { + "id": 10, + "seek": 9200, + "start": 92.15, + "end": 93.74, + "text": " Musique", + "tokens": [ + 3569, + 1925 + ], + "temperature": 0.0, + "avg_logprob": -0.15070751414579503, + "compression_ratio": 1.4545454545454546, + "no_speech_prob": 8.21087378426455e-05, + "confidence": 0.328, + "words": [ + { + "text": "Musique", + "start": 92.15, + "end": 93.74, + "confidence": 0.328 + } + ] + }, + { + "id": 11, + "seek": 9200, + "start": 106.7, + "end": 113.5, + "text": " L'actualité c'est aussi aujourd'hui un dernier adieu, dernier adieu à Laurent Fignon, c'était au cimetière du Père Lachaise à Paris.", + "tokens": [ + 441, + 6, + 578, + 901, + 5066, + 269, + 6, + 377, + 6212, + 14023, + 6, + 10556, + 517, + 29332, + 614, + 19347, + 11, + 29332, + 614, + 19347, + 1531, + 49357, + 479, + 41846, + 11, + 269, + 6, + 9743, + 1609, + 269, + 26123, + 10195, + 1581, + 430, + 4212, + 441, + 27442, + 908, + 1531, + 8380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.15070751414579503, + "compression_ratio": 1.4545454545454546, + "no_speech_prob": 8.21087378426455e-05, + "confidence": 0.898, + "words": [ + { + "text": "L'actualité", + "start": 106.7, + "end": 107.44, + "confidence": 0.868 + }, + { + "text": "c'est", + "start": 107.44, + "end": 107.66, + "confidence": 0.84 + }, + { + "text": "aussi", + "start": 107.66, + "end": 108.02, + "confidence": 0.804 + }, + { + "text": "aujourd'hui", + "start": 108.02, + "end": 108.38, + "confidence": 0.928 + }, + { + "text": "un", + "start": 108.38, + "end": 108.48, + "confidence": 0.887 + }, + { + "text": "dernier", + "start": 108.48, + "end": 108.8, + "confidence": 0.991 + }, + { + "text": "adieu,", + "start": 108.8, + "end": 109.2, + "confidence": 0.982 + }, + { + "text": "dernier", + "start": 109.2, + "end": 109.38, + "confidence": 0.501 + }, + { + "text": "adieu", + "start": 109.38, + "end": 109.96, + "confidence": 0.997 + }, + { + "text": "à", + "start": 109.96, + "end": 110.12, + "confidence": 0.978 + }, + { + "text": "Laurent", + "start": 110.12, + "end": 110.42, + "confidence": 0.94 + }, + { + "text": "Fignon,", + "start": 110.42, + "end": 111.12, + "confidence": 0.816 + }, + { + "text": "c'était", + "start": 111.12, + "end": 111.24, + "confidence": 0.972 + }, + { + "text": "au", + "start": 111.24, + "end": 111.46, + "confidence": 0.99 + }, + { + "text": "cimetière", + "start": 111.46, + "end": 111.94, + "confidence": 0.984 + }, + { + "text": "du", + "start": 111.94, + "end": 112.14, + "confidence": 0.963 + }, + { + "text": "Père", + "start": 112.14, + "end": 112.36, + "confidence": 0.892 + }, + { + "text": "Lachaise", + "start": 112.36, + "end": 112.88, + "confidence": 0.864 + }, + { + "text": "à", + "start": 112.88, + "end": 113.12, + "confidence": 0.795 + }, + { + "text": "Paris.", + "start": 113.12, + "end": 113.5, + "confidence": 0.998 + } + ] + }, + { + "id": 12, + "seek": 11400, + "start": 114.02, + "end": 120.46, + "text": " L'ancien double vainqueur du Tour de France, vaincu par le cancer à 50 ans, a été incinéré en petit comité aujourd'hui.", + "tokens": [ + 50364, + 441, + 6, + 38840, + 268, + 3834, + 22240, + 1077, + 374, + 1581, + 13077, + 368, + 6190, + 11, + 22240, + 12032, + 971, + 476, + 5592, + 1531, + 2625, + 1567, + 11, + 257, + 8862, + 834, + 259, + 29071, + 465, + 9686, + 395, + 5066, + 14023, + 6, + 10556, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.10655183541147333, + "compression_ratio": 1.1160714285714286, + "no_speech_prob": 0.0002510416379664093, + "confidence": 0.933, + "words": [ + { + "text": "L'ancien", + "start": 114.02, + "end": 114.36, + "confidence": 0.735 + }, + { + "text": "double", + "start": 114.36, + "end": 114.64, + "confidence": 0.991 + }, + { + "text": "vainqueur", + "start": 114.64, + "end": 115.08, + "confidence": 0.996 + }, + { + "text": "du", + "start": 115.08, + "end": 115.18, + "confidence": 0.971 + }, + { + "text": "Tour", + "start": 115.18, + "end": 115.36, + "confidence": 0.828 + }, + { + "text": "de", + "start": 115.36, + "end": 115.5, + "confidence": 0.992 + }, + { + "text": "France,", + "start": 115.5, + "end": 116.32, + "confidence": 0.999 + }, + { + "text": "vaincu", + "start": 116.32, + "end": 116.48, + "confidence": 0.989 + }, + { + "text": "par", + "start": 116.48, + "end": 116.6, + "confidence": 0.997 + }, + { + "text": "le", + "start": 116.6, + "end": 116.74, + "confidence": 0.994 + }, + { + "text": "cancer", + "start": 116.74, + "end": 117.06, + "confidence": 0.966 + }, + { + "text": "à", + "start": 117.06, + "end": 117.24, + "confidence": 0.964 + }, + { + "text": "50", + "start": 117.24, + "end": 117.54, + "confidence": 0.977 + }, + { + "text": "ans,", + "start": 117.54, + "end": 118.34, + "confidence": 0.989 + }, + { + "text": "a", + "start": 118.34, + "end": 118.38, + "confidence": 0.989 + }, + { + "text": "été", + "start": 118.38, + "end": 118.56, + "confidence": 0.996 + }, + { + "text": "incinéré", + "start": 118.56, + "end": 119.24, + "confidence": 0.965 + }, + { + "text": "en", + "start": 119.24, + "end": 119.48, + "confidence": 0.913 + }, + { + "text": "petit", + "start": 119.48, + "end": 119.74, + "confidence": 0.883 + }, + { + "text": "comité", + "start": 119.74, + "end": 120.18, + "confidence": 0.978 + }, + { + "text": "aujourd'hui.", + "start": 120.18, + "end": 120.46, + "confidence": 0.929 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr.cpu/smartphone.mp3.words.json b/tests/expected/medium_fr.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..9faa961d5cf4a38b6ac04b76010aeaddb28cee89 --- /dev/null +++ b/tests/expected/medium_fr.cpu/smartphone.mp3.words.json @@ -0,0 +1,4802 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc, à objet inédit, rapport inédit. Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Et bien, le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 3.62, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.38, + "end": 0.58, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.58, + "end": 0.88, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.88, + "end": 1.02, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.02, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.78, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.78, + "end": 1.9, + "confidence": 0.979 + }, + { + "text": "je", + "start": 1.9, + "end": 2.24, + "confidence": 0.982 + }, + { + "text": "ne", + "start": 2.24, + "end": 2.34, + "confidence": 0.835 + }, + { + "text": "me", + "start": 2.34, + "end": 2.38, + "confidence": 0.82 + }, + { + "text": "l'étais", + "start": 2.38, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.84, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.84, + "end": 3.26, + "confidence": 0.909 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.42, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.42, + "end": 3.62, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.08, + "end": 7.92, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.08, + "end": 4.26, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.34, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.48, + "confidence": 0.565 + }, + { + "text": "la", + "start": 4.48, + "end": 4.66, + "confidence": 0.971 + }, + { + "text": "force", + "start": 4.66, + "end": 5.0, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.0, + "end": 5.2, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.88, + "confidence": 0.911 + }, + { + "text": "c'est", + "start": 5.88, + "end": 6.12, + "confidence": 0.88 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.26, + "confidence": 0.992 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.52, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.52, + "end": 7.38, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.38, + "end": 7.56, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 7.92, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.44, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.44, + "end": 8.6, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.6, + "end": 8.9, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.9, + "end": 9.1, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.1, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.32, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.32, + "end": 10.58, + "confidence": 0.956 + }, + { + "text": "elles.", + "start": 10.58, + "end": 10.88, + "confidence": 0.99 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.607 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.22, + "confidence": 0.765 + }, + { + "text": "dit", + "start": 11.22, + "end": 11.4, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.4, + "end": 11.56, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.56, + "end": 11.72, + "confidence": 0.477 + }, + { + "text": "la", + "start": 11.72, + "end": 11.78, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.78, + "end": 12.12, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.12, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.42, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.42, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.34, + "end": 16.02, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.913, + "words": [ + { + "text": "Alors", + "start": 13.34, + "end": 13.62, + "confidence": 0.585 + }, + { + "text": "évidemment,", + "start": 13.62, + "end": 14.34, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.34, + "end": 14.38, + "confidence": 0.952 + }, + { + "text": "faudrait", + "start": 14.38, + "end": 14.74, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.74, + "end": 15.16, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.16, + "end": 15.52, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.52, + "end": 16.02, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.7, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.7, + "end": 17.06, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.06, + "end": 17.26, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.26, + "end": 17.88, + "confidence": 0.975 + }, + { + "text": "beaucoup", + "start": 17.88, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 19.84, + "end": 25.26, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.23666970461409614, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.1567964404821396, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 19.84, + "end": 20.22, + "confidence": 0.944 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.52, + "confidence": 0.667 + }, + { + "text": "il", + "start": 20.52, + "end": 20.6, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.6, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.84, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.84, + "end": 20.96, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 20.96, + "end": 21.26, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.68, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.68, + "end": 21.9, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.9, + "end": 22.1, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.1, + "end": 22.32, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.48, + "confidence": 0.72 + }, + { + "text": "lesquels", + "start": 22.48, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.04, + "confidence": 0.709 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.965 + }, + { + "text": "nous", + "start": 23.54, + "end": 23.72, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.72, + "end": 23.82, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.82, + "end": 24.1, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.1, + "end": 24.24, + "confidence": 0.829 + }, + { + "text": "cliquer", + "start": 24.24, + "end": 24.5, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.5, + "end": 24.66, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.66, + "end": 24.94, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.94, + "end": 25.26, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.42, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Sauf", + "start": 25.42, + "end": 25.76, + "confidence": 0.99 + }, + { + "text": "que", + "start": 25.76, + "end": 26.26, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.26, + "end": 26.66, + "confidence": 0.631 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.06, + "confidence": 0.996 + }, + { + "text": "ajoute", + "start": 27.06, + "end": 27.44, + "confidence": 0.991 + }, + { + "text": "le", + "start": 27.44, + "end": 27.62, + "confidence": 0.992 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.18, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 28.18, + "end": 28.22, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 28.22, + "end": 28.28, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.28, + "end": 28.48, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.48, + "end": 28.68, + "confidence": 0.993 + }, + { + "text": "contact", + "start": 28.68, + "end": 29.1, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.1, + "end": 29.46, + "confidence": 0.985 + }, + { + "text": "direct,", + "start": 29.46, + "end": 30.22, + "confidence": 0.995 + }, + { + "text": "plus", + "start": 30.22, + "end": 30.26, + "confidence": 0.994 + }, + { + "text": "sensible.", + "start": 30.26, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.892, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.22, + "confidence": 0.97 + }, + { + "text": "puis", + "start": 31.22, + "end": 31.36, + "confidence": 0.971 + }, + { + "text": "évidemment,", + "start": 31.36, + "end": 31.7, + "confidence": 0.875 + }, + { + "text": "il", + "start": 31.7, + "end": 31.74, + "confidence": 0.993 + }, + { + "text": "faudrait", + "start": 31.74, + "end": 31.94, + "confidence": 0.995 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.12, + "confidence": 0.84 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.34, + "confidence": 0.977 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.994 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.9, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.9, + "end": 33.18, + "confidence": 0.481 + }, + { + "text": "permettent", + "start": 33.18, + "end": 33.74, + "confidence": 0.992 + }, + { + "text": "de", + "start": 33.74, + "end": 33.96, + "confidence": 0.885 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.42, + "confidence": 0.958 + }, + { + "text": "le", + "start": 34.42, + "end": 34.52, + "confidence": 0.775 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.8, + "confidence": 0.984 + }, + { + "text": "touffu", + "start": 34.8, + "end": 35.32, + "confidence": 0.741 + }, + { + "text": "de", + "start": 35.32, + "end": 35.72, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.72, + "end": 35.78, + "confidence": 0.992 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.994 + }, + { + "text": "web", + "start": 36.24, + "end": 36.6, + "confidence": 0.854 + }, + { + "text": "pour", + "start": 36.6, + "end": 36.78, + "confidence": 0.585 + }, + { + "text": "aller", + "start": 36.78, + "end": 36.98, + "confidence": 0.987 + }, + { + "text": "directement", + "start": 36.98, + "end": 37.52, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.52, + "end": 37.68, + "confidence": 0.967 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.82, + "end": 46.54, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.974, + "words": [ + { + "text": "Bref,", + "start": 37.82, + "end": 38.76, + "confidence": 0.987 + }, + { + "text": "tout", + "start": 38.76, + "end": 38.98, + "confidence": 0.711 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.42, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.42, + "end": 39.7, + "confidence": 0.993 + }, + { + "text": "sont", + "start": 39.7, + "end": 39.88, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.88, + "end": 40.16, + "confidence": 0.991 + }, + { + "text": "conditions", + "start": 40.16, + "end": 40.68, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.68, + "end": 40.96, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.46, + "confidence": 0.997 + }, + { + "text": "de", + "start": 41.46, + "end": 41.6, + "confidence": 0.998 + }, + { + "text": "créer", + "start": 41.6, + "end": 42.06, + "confidence": 0.998 + }, + { + "text": "cet", + "start": 42.06, + "end": 42.38, + "confidence": 0.998 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.6, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.6, + "end": 42.8, + "confidence": 0.779 + }, + { + "text": "Nicolas", + "start": 42.8, + "end": 43.26, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 43.26, + "end": 43.5, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.5, + "end": 43.7, + "confidence": 0.983 + }, + { + "text": "est", + "start": 43.7, + "end": 43.88, + "confidence": 0.991 + }, + { + "text": "vraisemblablement", + "start": 43.88, + "end": 44.98, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 44.98, + "end": 45.38, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 45.38, + "end": 45.7, + "confidence": 0.969 + }, + { + "text": "l'histoire", + "start": 45.7, + "end": 45.98, + "confidence": 0.957 + }, + { + "text": "de", + "start": 45.98, + "end": 46.18, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.18, + "end": 46.54, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 46.54, + "end": 48.82, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07915337880452473, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 3.457161710684886e-06, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 46.54, + "end": 47.24, + "confidence": 0.846 + }, + { + "text": "ça,", + "start": 47.24, + "end": 47.72, + "confidence": 0.935 + }, + { + "text": "ça", + "start": 47.72, + "end": 47.76, + "confidence": 0.977 + }, + { + "text": "soulève", + "start": 47.76, + "end": 47.84, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.84, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.26, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.26, + "end": 48.82, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.22, + "end": 55.46, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit?", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.988, + "words": [ + { + "text": "Est-ce", + "start": 49.22, + "end": 49.62, + "confidence": 0.982 + }, + { + "text": "que", + "start": 49.62, + "end": 49.72, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.72, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 49.98, + "confidence": 0.999 + }, + { + "text": "que", + "start": 49.98, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.991 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.8, + "confidence": 0.996 + }, + { + "text": "induit", + "start": 51.8, + "end": 52.32, + "confidence": 0.977 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.983 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.72, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 52.72, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.44, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.66, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.66, + "end": 54.02, + "confidence": 0.916 + }, + { + "text": "aussi", + "start": 54.02, + "end": 54.54, + "confidence": 0.995 + }, + { + "text": "un", + "start": 54.54, + "end": 54.7, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 54.7, + "end": 55.0, + "confidence": 0.996 + }, + { + "text": "inédit?", + "start": 55.0, + "end": 55.46, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.46, + "end": 63.12, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone?", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.965, + "words": [ + { + "text": "Je", + "start": 55.46, + "end": 55.9, + "confidence": 0.88 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.988 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.24, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.24, + "end": 56.36, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.36, + "end": 56.42, + "confidence": 0.99 + }, + { + "text": "le", + "start": 56.42, + "end": 56.58, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.88, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.88, + "end": 57.04, + "confidence": 0.986 + }, + { + "text": "a", + "start": 57.04, + "end": 57.18, + "confidence": 0.989 + }, + { + "text": "au", + "start": 57.18, + "end": 57.28, + "confidence": 0.968 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.994 + }, + { + "text": "est", + "start": 57.6, + "end": 57.92, + "confidence": 0.95 + }, + { + "text": "comparable", + "start": 57.92, + "end": 58.24, + "confidence": 0.997 + }, + { + "text": "à", + "start": 58.24, + "end": 58.48, + "confidence": 0.949 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.9, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.9, + "end": 59.32, + "confidence": 0.929 + }, + { + "text": "à", + "start": 59.32, + "end": 59.46, + "confidence": 0.958 + }, + { + "text": "d'autres", + "start": 59.46, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.96, + "confidence": 0.991 + }, + { + "text": "techniques", + "start": 59.96, + "end": 60.46, + "confidence": 0.983 + }, + { + "text": "comme", + "start": 60.46, + "end": 60.88, + "confidence": 0.587 + }, + { + "text": "la", + "start": 60.88, + "end": 61.5, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.5, + "end": 62.06, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.06, + "end": 62.36, + "confidence": 0.945 + }, + { + "text": "le", + "start": 62.36, + "end": 62.68, + "confidence": 0.998 + }, + { + "text": "téléphone?", + "start": 62.68, + "end": 63.12, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 63.36, + "end": 66.66, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.936, + "words": [ + { + "text": "Il", + "start": 63.36, + "end": 65.42, + "confidence": 0.779 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.48, + "confidence": 0.978 + }, + { + "text": "a", + "start": 65.48, + "end": 65.54, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 65.54, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.22, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.22, + "end": 66.42, + "confidence": 0.601 + }, + { + "text": "fait.", + "start": 66.42, + "end": 66.66, + "confidence": 0.996 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.52, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.953, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 66.98, + "confidence": 0.599 + }, + { + "text": "donc", + "start": 66.98, + "end": 67.08, + "confidence": 0.901 + }, + { + "text": "cette", + "start": 67.08, + "end": 67.28, + "confidence": 0.71 + }, + { + "text": "espèce", + "start": 67.28, + "end": 67.54, + "confidence": 0.995 + }, + { + "text": "de", + "start": 67.54, + "end": 67.68, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.68, + "end": 68.48, + "confidence": 0.979 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.66, + "confidence": 0.98 + }, + { + "text": "la", + "start": 68.66, + "end": 68.94, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.94, + "end": 69.22, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.22, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.24, + "confidence": 0.997 + }, + { + "text": "c'est", + "start": 70.24, + "end": 70.38, + "confidence": 0.98 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.64, + "confidence": 0.978 + }, + { + "text": "et", + "start": 70.64, + "end": 70.76, + "confidence": 0.964 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.52, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 4884, + "start": 71.62, + "end": 76.48, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendants de cet objet,", + "tokens": [ + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 1719, + 368, + 8603, + 14964, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.10746372298689078, + "compression_ratio": 1.662379421221865, + "no_speech_prob": 1.3105129255563952e-05, + "confidence": 0.813, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.86, + "confidence": 0.512 + }, + { + "text": "qu'on", + "start": 71.86, + "end": 72.12, + "confidence": 0.937 + }, + { + "text": "a", + "start": 72.12, + "end": 72.44, + "confidence": 0.982 + }, + { + "text": "l'impression,", + "start": 72.44, + "end": 73.56, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 73.56, + "end": 73.84, + "confidence": 0.964 + }, + { + "text": "le", + "start": 73.84, + "end": 74.0, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.0, + "end": 74.2, + "confidence": 0.998 + }, + { + "text": "les", + "start": 74.2, + "end": 74.4, + "confidence": 0.994 + }, + { + "text": "utilisateurs", + "start": 74.4, + "end": 74.84, + "confidence": 0.997 + }, + { + "text": "et", + "start": 74.84, + "end": 74.96, + "confidence": 0.329 + }, + { + "text": "les", + "start": 74.96, + "end": 75.0, + "confidence": 0.773 + }, + { + "text": "services,", + "start": 75.0, + "end": 75.22, + "confidence": 0.214 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.42, + "confidence": 0.766 + }, + { + "text": "dépendants", + "start": 75.42, + "end": 75.96, + "confidence": 0.789 + }, + { + "text": "de", + "start": 75.96, + "end": 76.08, + "confidence": 0.986 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.996 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.48, + "confidence": 0.996 + } + ] + }, + { + "id": 16, + "seek": 7684, + "start": 76.86, + "end": 83.26, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.917, + "words": [ + { + "text": "d'induire", + "start": 76.86, + "end": 77.08, + "confidence": 0.841 + }, + { + "text": "en", + "start": 77.08, + "end": 77.24, + "confidence": 0.618 + }, + { + "text": "fait", + "start": 77.24, + "end": 77.34, + "confidence": 0.994 + }, + { + "text": "une", + "start": 77.34, + "end": 77.52, + "confidence": 0.983 + }, + { + "text": "espèce", + "start": 77.52, + "end": 77.88, + "confidence": 0.996 + }, + { + "text": "de", + "start": 77.88, + "end": 78.48, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.48, + "end": 78.6, + "confidence": 0.596 + }, + { + "text": "de", + "start": 78.6, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.74, + "confidence": 0.967 + }, + { + "text": "le", + "start": 79.74, + "end": 79.92, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 79.92, + "end": 80.64, + "confidence": 0.992 + }, + { + "text": "qui", + "start": 80.64, + "end": 81.1, + "confidence": 0.696 + }, + { + "text": "rend", + "start": 81.1, + "end": 81.64, + "confidence": 0.91 + }, + { + "text": "de", + "start": 81.64, + "end": 81.78, + "confidence": 0.712 + }, + { + "text": "l'ampleur", + "start": 81.78, + "end": 82.02, + "confidence": 0.987 + }, + { + "text": "et", + "start": 82.02, + "end": 82.12, + "confidence": 0.931 + }, + { + "text": "qui", + "start": 82.12, + "end": 82.24, + "confidence": 0.976 + }, + { + "text": "amène", + "start": 82.24, + "end": 82.36, + "confidence": 0.973 + }, + { + "text": "aussi", + "start": 82.36, + "end": 82.56, + "confidence": 0.938 + }, + { + "text": "à", + "start": 82.56, + "end": 82.62, + "confidence": 0.941 + }, + { + "text": "des", + "start": 82.62, + "end": 82.72, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.72, + "end": 82.9, + "confidence": 0.993 + }, + { + "text": "de", + "start": 82.9, + "end": 83.02, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.02, + "end": 83.26, + "confidence": 0.883 + } + ] + }, + { + "id": 17, + "seek": 7684, + "start": 83.94, + "end": 87.8, + "text": " Donc, à objet inédit, rapport inédit.", + "tokens": [ + 7477, + 11, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.962, + "words": [ + { + "text": "Donc,", + "start": 83.94, + "end": 84.94, + "confidence": 0.971 + }, + { + "text": "à", + "start": 84.94, + "end": 84.98, + "confidence": 0.88 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.36, + "confidence": 0.828 + }, + { + "text": "inédit,", + "start": 85.36, + "end": 86.56, + "confidence": 0.993 + }, + { + "text": "rapport", + "start": 86.56, + "end": 87.0, + "confidence": 0.981 + }, + { + "text": "inédit.", + "start": 87.0, + "end": 87.8, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7684, + "start": 88.02, + "end": 95.14, + "text": " Et ce rapport, si j'en crois Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.972, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.48, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 88.48, + "end": 88.86, + "confidence": 0.975 + }, + { + "text": "rapport,", + "start": 88.86, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.84, + "confidence": 0.996 + }, + { + "text": "crois", + "start": 89.84, + "end": 89.88, + "confidence": 0.984 + }, + { + "text": "Nicolas,", + "start": 89.88, + "end": 90.54, + "confidence": 0.681 + }, + { + "text": "serait", + "start": 90.54, + "end": 90.94, + "confidence": 0.885 + }, + { + "text": "caractérisé", + "start": 90.94, + "end": 91.8, + "confidence": 0.993 + }, + { + "text": "par", + "start": 91.8, + "end": 92.12, + "confidence": 0.997 + }, + { + "text": "un", + "start": 92.12, + "end": 92.44, + "confidence": 0.997 + }, + { + "text": "mélange", + "start": 92.44, + "end": 92.98, + "confidence": 0.999 + }, + { + "text": "de", + "start": 92.98, + "end": 93.4, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.4, + "end": 94.24, + "confidence": 0.953 + }, + { + "text": "et", + "start": 94.24, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.68, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.68, + "end": 95.14, + "confidence": 0.993 + } + ] + }, + { + "id": 19, + "seek": 7684, + "start": 95.78, + "end": 102.86, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.941, + "words": [ + { + "text": "Bon,", + "start": 95.78, + "end": 96.38, + "confidence": 0.792 + }, + { + "text": "en", + "start": 96.38, + "end": 96.52, + "confidence": 0.998 + }, + { + "text": "vrai,", + "start": 96.52, + "end": 97.14, + "confidence": 0.994 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.998 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.997 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.08, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.08, + "end": 98.6, + "confidence": 0.997 + }, + { + "text": "très", + "start": 98.6, + "end": 98.7, + "confidence": 0.768 + }, + { + "text": "finement", + "start": 98.7, + "end": 99.32, + "confidence": 0.849 + }, + { + "text": "toute", + "start": 99.32, + "end": 99.7, + "confidence": 0.984 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.06, + "confidence": 0.997 + }, + { + "text": "des", + "start": 100.06, + "end": 100.24, + "confidence": 0.998 + }, + { + "text": "objets", + "start": 100.24, + "end": 100.48, + "confidence": 0.999 + }, + { + "text": "techniques", + "start": 100.48, + "end": 101.02, + "confidence": 0.984 + }, + { + "text": "et", + "start": 101.02, + "end": 101.48, + "confidence": 0.527 + }, + { + "text": "de", + "start": 101.48, + "end": 101.68, + "confidence": 0.994 + }, + { + "text": "leur", + "start": 101.68, + "end": 101.84, + "confidence": 0.833 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.32, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.32, + "end": 102.48, + "confidence": 0.99 + }, + { + "text": "nos", + "start": 102.48, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.86, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 7684, + "start": 102.9, + "end": 105.74, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06937986261704389, + "compression_ratio": 1.6787003610108304, + "no_speech_prob": 5.948771558905719e-06, + "confidence": 0.981, + "words": [ + { + "text": "pour", + "start": 102.9, + "end": 103.06, + "confidence": 0.822 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.66, + "confidence": 0.997 + }, + { + "text": "si", + "start": 103.66, + "end": 103.76, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.997 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.26, + "end": 104.74, + "confidence": 0.998 + }, + { + "text": "totalement", + "start": 104.74, + "end": 105.3, + "confidence": 0.999 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.74, + "confidence": 0.999 + } + ] + }, + { + "id": 21, + "seek": 10584, + "start": 106.1, + "end": 109.34, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.94, + "words": [ + { + "text": "Mais", + "start": 106.1, + "end": 106.36, + "confidence": 0.947 + }, + { + "text": "j'ai", + "start": 106.36, + "end": 106.92, + "confidence": 0.941 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.36, + "confidence": 0.996 + }, + { + "text": "comme", + "start": 107.36, + "end": 107.56, + "confidence": 0.641 + }, + { + "text": "ça", + "start": 107.56, + "end": 107.82, + "confidence": 0.978 + }, + { + "text": "que", + "start": 107.82, + "end": 107.96, + "confidence": 0.976 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.46, + "confidence": 0.985 + }, + { + "text": "ne", + "start": 108.46, + "end": 108.66, + "confidence": 0.726 + }, + { + "text": "se", + "start": 108.66, + "end": 108.7, + "confidence": 0.991 + }, + { + "text": "trompe", + "start": 108.7, + "end": 108.88, + "confidence": 0.995 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.34, + "confidence": 0.991 + } + ] + }, + { + "id": 22, + "seek": 10584, + "start": 109.88, + "end": 114.98, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.969, + "words": [ + { + "text": "Pour", + "start": 109.88, + "end": 110.08, + "confidence": 0.997 + }, + { + "text": "autant", + "start": 110.08, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.42, + "confidence": 0.988 + }, + { + "text": "je", + "start": 110.42, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 111.14, + "confidence": 0.963 + }, + { + "text": "il", + "start": 111.14, + "end": 111.18, + "confidence": 0.997 + }, + { + "text": "y", + "start": 111.18, + "end": 111.32, + "confidence": 0.992 + }, + { + "text": "a", + "start": 111.32, + "end": 111.36, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.36, + "end": 111.68, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.68, + "end": 111.88, + "confidence": 0.974 + }, + { + "text": "de", + "start": 111.88, + "end": 112.06, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.06, + "end": 112.6, + "confidence": 0.799 + }, + { + "text": "autour", + "start": 112.6, + "end": 112.94, + "confidence": 0.995 + }, + { + "text": "de", + "start": 112.94, + "end": 113.46, + "confidence": 0.997 + }, + { + "text": "la", + "start": 113.46, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.86, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.86, + "end": 114.06, + "confidence": 0.765 + }, + { + "text": "même", + "start": 114.06, + "end": 114.44, + "confidence": 0.996 + }, + { + "text": "du", + "start": 114.44, + "end": 114.6, + "confidence": 0.995 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 114.98, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 10584, + "start": 115.34, + "end": 119.84, + "text": " Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.972, + "words": [ + { + "text": "Mais", + "start": 115.34, + "end": 115.72, + "confidence": 0.994 + }, + { + "text": "la", + "start": 115.72, + "end": 116.02, + "confidence": 0.937 + }, + { + "text": "dépendance", + "start": 116.02, + "end": 116.4, + "confidence": 0.997 + }, + { + "text": "n'était", + "start": 116.4, + "end": 116.62, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.16, + "confidence": 0.995 + }, + { + "text": "même", + "start": 117.16, + "end": 117.46, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 117.46, + "end": 117.78, + "confidence": 0.999 + }, + { + "text": "Donc", + "start": 117.78, + "end": 117.98, + "confidence": 0.804 + }, + { + "text": "le", + "start": 117.98, + "end": 118.34, + "confidence": 0.715 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.62, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.62, + "end": 118.78, + "confidence": 0.975 + }, + { + "text": "plus", + "start": 118.78, + "end": 118.94, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.12, + "confidence": 0.987 + }, + { + "text": "pas", + "start": 119.12, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.38, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.38, + "end": 119.56, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.56, + "end": 119.84, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 10584, + "start": 119.98, + "end": 123.02, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.991, + "words": [ + { + "text": "On", + "start": 119.98, + "end": 120.18, + "confidence": 0.996 + }, + { + "text": "peut", + "start": 120.18, + "end": 120.38, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.38, + "end": 120.66, + "confidence": 0.99 + }, + { + "text": "sa", + "start": 120.66, + "end": 120.88, + "confidence": 0.985 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.46, + "confidence": 0.984 + }, + { + "text": "en", + "start": 121.46, + "end": 121.56, + "confidence": 0.989 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.74, + "confidence": 0.998 + }, + { + "text": "besoin", + "start": 121.74, + "end": 122.1, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.1, + "end": 122.34, + "confidence": 0.987 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.68, + "confidence": 0.989 + }, + { + "text": "de", + "start": 122.68, + "end": 122.8, + "confidence": 0.998 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.02, + "confidence": 0.989 + } + ] + }, + { + "id": 25, + "seek": 10584, + "start": 123.28, + "end": 126.36, + "text": " Et bien, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3610, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.895, + "words": [ + { + "text": "Et", + "start": 123.28, + "end": 123.46, + "confidence": 0.667 + }, + { + "text": "bien,", + "start": 123.46, + "end": 123.86, + "confidence": 0.469 + }, + { + "text": "le", + "start": 123.86, + "end": 123.98, + "confidence": 0.996 + }, + { + "text": "soir,", + "start": 123.98, + "end": 124.68, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 124.68, + "end": 124.9, + "confidence": 0.997 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.998 + }, + { + "text": "va", + "start": 125.02, + "end": 125.14, + "confidence": 0.996 + }, + { + "text": "se", + "start": 125.14, + "end": 125.38, + "confidence": 0.988 + }, + { + "text": "coucher,", + "start": 125.38, + "end": 125.8, + "confidence": 0.987 + }, + { + "text": "on", + "start": 125.8, + "end": 126.02, + "confidence": 0.995 + }, + { + "text": "la", + "start": 126.02, + "end": 126.22, + "confidence": 0.802 + }, + { + "text": "laisse.", + "start": 126.22, + "end": 126.36, + "confidence": 0.999 + } + ] + }, + { + "id": 26, + "seek": 10584, + "start": 126.98, + "end": 130.48, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.07386374800172571, + "compression_ratio": 1.685512367491166, + "no_speech_prob": 5.46344235772267e-05, + "confidence": 0.914, + "words": [ + { + "text": "On", + "start": 126.98, + "end": 127.32, + "confidence": 0.954 + }, + { + "text": "ne", + "start": 127.32, + "end": 127.36, + "confidence": 0.803 + }, + { + "text": "l'a", + "start": 127.36, + "end": 127.48, + "confidence": 0.974 + }, + { + "text": "pas", + "start": 127.48, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.8, + "confidence": 0.996 + }, + { + "text": "la", + "start": 127.8, + "end": 128.06, + "confidence": 0.994 + }, + { + "text": "main", + "start": 128.06, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.44, + "confidence": 0.935 + }, + { + "text": "on", + "start": 128.44, + "end": 128.62, + "confidence": 0.997 + }, + { + "text": "est", + "start": 128.62, + "end": 128.68, + "confidence": 0.993 + }, + { + "text": "au", + "start": 128.68, + "end": 129.04, + "confidence": 0.984 + }, + { + "text": "lit,", + "start": 129.04, + "end": 129.14, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.14, + "end": 129.26, + "confidence": 0.427 + }, + { + "text": "ne", + "start": 129.26, + "end": 129.3, + "confidence": 0.963 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.5, + "confidence": 0.992 + }, + { + "text": "pas", + "start": 129.5, + "end": 129.68, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.68, + "end": 129.86, + "confidence": 0.674 + }, + { + "text": "chiottes.", + "start": 129.86, + "end": 130.48, + "confidence": 0.828 + } + ] + }, + { + "id": 27, + "seek": 13084, + "start": 130.86, + "end": 136.9, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.955, + "words": [ + { + "text": "On", + "start": 130.86, + "end": 131.04, + "confidence": 0.983 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.28, + "end": 131.48, + "confidence": 0.996 + }, + { + "text": "énervé", + "start": 131.48, + "end": 132.22, + "confidence": 0.906 + }, + { + "text": "par", + "start": 132.22, + "end": 132.44, + "confidence": 0.991 + }, + { + "text": "son", + "start": 132.44, + "end": 132.7, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.7, + "end": 133.1, + "confidence": 0.832 + }, + { + "text": "qui", + "start": 133.1, + "end": 133.34, + "confidence": 0.919 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.76, + "confidence": 0.991 + }, + { + "text": "la", + "start": 133.76, + "end": 133.8, + "confidence": 0.992 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.08, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.08, + "end": 134.14, + "confidence": 0.997 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.6, + "confidence": 0.985 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.82, + "confidence": 0.981 + }, + { + "text": "une", + "start": 134.82, + "end": 135.2, + "confidence": 0.87 + }, + { + "text": "heure", + "start": 135.2, + "end": 135.36, + "confidence": 0.998 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.54, + "confidence": 0.991 + }, + { + "text": "soir", + "start": 135.54, + "end": 135.8, + "confidence": 0.996 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.96, + "confidence": 0.79 + }, + { + "text": "discuter", + "start": 135.96, + "end": 136.28, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.28, + "end": 136.48, + "confidence": 0.996 + }, + { + "text": "un", + "start": 136.48, + "end": 136.6, + "confidence": 0.999 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.9, + "confidence": 0.998 + } + ] + }, + { + "id": 28, + "seek": 13084, + "start": 137.28, + "end": 141.88, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.959, + "words": [ + { + "text": "Mais", + "start": 137.28, + "end": 137.46, + "confidence": 0.993 + }, + { + "text": "ça", + "start": 137.46, + "end": 137.68, + "confidence": 0.938 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.94, + "confidence": 0.998 + }, + { + "text": "ressemblait", + "start": 137.94, + "end": 138.4, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 138.4, + "end": 138.76, + "confidence": 0.995 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.988 + }, + { + "text": "ce", + "start": 138.94, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.1, + "confidence": 0.986 + }, + { + "text": "peut", + "start": 139.1, + "end": 139.48, + "confidence": 0.988 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.12, + "confidence": 0.997 + }, + { + "text": "à", + "start": 140.12, + "end": 140.32, + "confidence": 0.498 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.855 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.68, + "confidence": 0.989 + }, + { + "text": "même", + "start": 140.68, + "end": 140.94, + "confidence": 0.984 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.34, + "confidence": 0.998 + }, + { + "text": "aujourd'hui,", + "start": 141.34, + "end": 141.88, + "confidence": 0.988 + } + ] + }, + { + "id": 29, + "seek": 13084, + "start": 142.14, + "end": 146.3, + "text": " continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe,", + "tokens": [ + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.963, + "words": [ + { + "text": "continuellement", + "start": 142.14, + "end": 142.94, + "confidence": 0.971 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.18, + "confidence": 0.805 + }, + { + "text": "son", + "start": 143.18, + "end": 143.38, + "confidence": 0.995 + }, + { + "text": "smartphone", + "start": 143.38, + "end": 143.76, + "confidence": 0.978 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.94, + "confidence": 0.979 + }, + { + "text": "la", + "start": 143.94, + "end": 144.0, + "confidence": 0.996 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.36, + "confidence": 0.998 + }, + { + "text": "comme", + "start": 144.36, + "end": 144.52, + "confidence": 0.835 + }, + { + "text": "si", + "start": 144.52, + "end": 144.64, + "confidence": 0.975 + }, + { + "text": "c'était", + "start": 144.64, + "end": 144.82, + "confidence": 0.991 + }, + { + "text": "une", + "start": 144.82, + "end": 145.06, + "confidence": 0.989 + }, + { + "text": "sorte", + "start": 145.06, + "end": 145.22, + "confidence": 0.997 + }, + { + "text": "de", + "start": 145.22, + "end": 145.3, + "confidence": 0.982 + }, + { + "text": "pacemaker", + "start": 145.3, + "end": 145.82, + "confidence": 0.917 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.3, + "confidence": 0.992 + } + ] + }, + { + "id": 30, + "seek": 13084, + "start": 146.34, + "end": 148.84, + "text": " comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.989, + "words": [ + { + "text": "comme", + "start": 146.34, + "end": 146.58, + "confidence": 0.996 + }, + { + "text": "si", + "start": 146.58, + "end": 146.76, + "confidence": 0.994 + }, + { + "text": "le", + "start": 146.76, + "end": 146.84, + "confidence": 0.997 + }, + { + "text": "lâcher", + "start": 146.84, + "end": 147.36, + "confidence": 0.969 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.56, + "confidence": 0.993 + }, + { + "text": "entraîner", + "start": 147.56, + "end": 147.86, + "confidence": 0.978 + }, + { + "text": "sa", + "start": 147.86, + "end": 148.0, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.0, + "end": 148.22, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.22, + "end": 148.84, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 13084, + "start": 149.04, + "end": 151.96, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.974, + "words": [ + { + "text": "Bon,", + "start": 149.04, + "end": 149.28, + "confidence": 0.918 + }, + { + "text": "je", + "start": 149.28, + "end": 149.32, + "confidence": 0.934 + }, + { + "text": "dis", + "start": 149.32, + "end": 149.46, + "confidence": 0.988 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.994 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.74, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.74, + "end": 149.88, + "confidence": 0.995 + }, + { + "text": "môme,", + "start": 149.88, + "end": 150.32, + "confidence": 0.998 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.52, + "confidence": 0.791 + }, + { + "text": "c'est", + "start": 150.52, + "end": 150.82, + "confidence": 0.981 + }, + { + "text": "évidemment", + "start": 150.82, + "end": 151.14, + "confidence": 0.98 + }, + { + "text": "valable", + "start": 151.14, + "end": 151.48, + "confidence": 0.998 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.62, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 151.62, + "end": 151.76, + "confidence": 0.999 + }, + { + "text": "aussi.", + "start": 151.76, + "end": 151.96, + "confidence": 0.996 + } + ] + }, + { + "id": 32, + "seek": 13084, + "start": 152.34, + "end": 158.22, + "text": " Donc, rapport inédit. D'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais?", + "tokens": [ + 7477, + 11, + 18018, + 294, + 7811, + 270, + 13, + 413, + 6, + 19947, + 13, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.06199159333200166, + "compression_ratio": 1.6127167630057804, + "no_speech_prob": 1.6126719856401905e-05, + "confidence": 0.948, + "words": [ + { + "text": "Donc,", + "start": 152.34, + "end": 153.46, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 153.46, + "end": 153.66, + "confidence": 0.976 + }, + { + "text": "inédit.", + "start": 153.66, + "end": 154.24, + "confidence": 0.996 + }, + { + "text": "D'accord.", + "start": 154.24, + "end": 155.48, + "confidence": 0.978 + }, + { + "text": "Mais", + "start": 155.48, + "end": 155.82, + "confidence": 0.557 + }, + { + "text": "pourquoi", + "start": 155.82, + "end": 156.32, + "confidence": 0.994 + }, + { + "text": "a-t-on", + "start": 156.32, + "end": 156.68, + "confidence": 0.94 + }, + { + "text": "l'impression", + "start": 156.68, + "end": 157.06, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.26, + "confidence": 0.995 + }, + { + "text": "n'en", + "start": 157.26, + "end": 157.44, + "confidence": 0.878 + }, + { + "text": "sortira", + "start": 157.44, + "end": 157.9, + "confidence": 0.989 + }, + { + "text": "jamais?", + "start": 157.9, + "end": 158.22, + "confidence": 0.998 + } + ] + }, + { + "id": 33, + "seek": 15884, + "start": 158.86, + "end": 165.32, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux?", + "tokens": [ + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.962, + "words": [ + { + "text": "Est-ce", + "start": 158.86, + "end": 159.34, + "confidence": 0.981 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.997 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.62, + "confidence": 0.999 + }, + { + "text": "en", + "start": 159.62, + "end": 159.78, + "confidence": 0.97 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.12, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.12, + "end": 160.34, + "confidence": 0.998 + }, + { + "text": "faute", + "start": 160.34, + "end": 160.66, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.66, + "end": 160.94, + "confidence": 0.995 + }, + { + "text": "les", + "start": 160.94, + "end": 161.28, + "confidence": 0.995 + }, + { + "text": "gens", + "start": 161.28, + "end": 161.44, + "confidence": 1.0 + }, + { + "text": "qui", + "start": 161.44, + "end": 161.58, + "confidence": 0.984 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.72, + "confidence": 0.998 + }, + { + "text": "créé", + "start": 161.72, + "end": 162.3, + "confidence": 0.99 + }, + { + "text": "cet", + "start": 162.3, + "end": 162.46, + "confidence": 0.852 + }, + { + "text": "outil", + "start": 162.46, + "end": 162.78, + "confidence": 0.99 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.34, + "confidence": 0.994 + }, + { + "text": "et", + "start": 163.34, + "end": 163.5, + "confidence": 0.954 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.86, + "confidence": 0.992 + }, + { + "text": "et", + "start": 163.86, + "end": 163.92, + "confidence": 0.54 + }, + { + "text": "diabolique", + "start": 163.92, + "end": 164.4, + "confidence": 0.951 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.703 + }, + { + "text": "que", + "start": 164.66, + "end": 164.84, + "confidence": 0.99 + }, + { + "text": "merveilleux?", + "start": 164.84, + "end": 165.32, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15884, + "start": 166.34, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.079788723507443, + "compression_ratio": 1.6517857142857142, + "no_speech_prob": 5.4980162531137466e-05, + "confidence": 0.984, + "words": [ + { + "text": "Les", + "start": 166.34, + "end": 167.04, + "confidence": 0.926 + }, + { + "text": "économistes", + "start": 167.04, + "end": 167.48, + "confidence": 0.998 + }, + { + "text": "parlent", + "start": 167.48, + "end": 167.68, + "confidence": 0.995 + }, + { + "text": "de", + "start": 167.68, + "end": 167.82, + "confidence": 0.995 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.36, + "confidence": 0.985 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.997 + }, + { + "text": "sentier.", + "start": 168.5, + "end": 168.82, + "confidence": 0.978 + } + ] + }, + { + "id": 35, + "seek": 16884, + "start": 168.86, + "end": 177.42, + "text": " C'est l'idée qu'on est sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 50364, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 871, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.10415018598238628, + "compression_ratio": 1.3916666666666666, + "no_speech_prob": 3.958350498578511e-05, + "confidence": 0.93, + "words": [ + { + "text": "C'est", + "start": 168.86, + "end": 169.1, + "confidence": 0.952 + }, + { + "text": "l'idée", + "start": 169.1, + "end": 169.34, + "confidence": 0.992 + }, + { + "text": "qu'on", + "start": 169.34, + "end": 169.7, + "confidence": 0.825 + }, + { + "text": "est", + "start": 169.7, + "end": 169.88, + "confidence": 0.631 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.0, + "confidence": 0.993 + }, + { + "text": "un", + "start": 170.0, + "end": 170.54, + "confidence": 0.998 + }, + { + "text": "sentier", + "start": 170.54, + "end": 170.78, + "confidence": 0.99 + }, + { + "text": "qui", + "start": 170.78, + "end": 170.84, + "confidence": 0.961 + }, + { + "text": "a", + "start": 170.84, + "end": 170.94, + "confidence": 0.98 + }, + { + "text": "été", + "start": 170.94, + "end": 171.12, + "confidence": 0.996 + }, + { + "text": "établi,", + "start": 171.12, + "end": 171.9, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.12, + "confidence": 0.989 + }, + { + "text": "volontairement", + "start": 172.12, + "end": 172.72, + "confidence": 0.965 + }, + { + "text": "en", + "start": 172.72, + "end": 172.8, + "confidence": 0.941 + }, + { + "text": "marchant", + "start": 172.8, + "end": 173.06, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.06, + "end": 174.24, + "confidence": 0.972 + }, + { + "text": "soit", + "start": 174.24, + "end": 174.92, + "confidence": 0.996 + }, + { + "text": "en", + "start": 174.92, + "end": 175.36, + "confidence": 0.991 + }, + { + "text": "définissant", + "start": 175.36, + "end": 175.5, + "confidence": 0.983 + }, + { + "text": "des", + "start": 175.5, + "end": 175.76, + "confidence": 0.984 + }, + { + "text": "bornes,", + "start": 175.76, + "end": 176.04, + "confidence": 0.975 + }, + { + "text": "en", + "start": 176.04, + "end": 176.08, + "confidence": 0.68 + }, + { + "text": "définissant", + "start": 176.08, + "end": 176.58, + "confidence": 0.997 + }, + { + "text": "une", + "start": 176.58, + "end": 176.82, + "confidence": 0.944 + }, + { + "text": "signalétique.", + "start": 176.82, + "end": 177.42, + "confidence": 0.642 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/bonjour.wav.words.json b/tests/expected/medium_fr/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e0e9ea07a37c2520ea8ae3e722c2da4b0422e019 --- /dev/null +++ b/tests/expected/medium_fr/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7049755573272705, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08610370755195618, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/medium_fr/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..5f571a743d7dc16d453a1c173965af6e6189638d --- /dev/null +++ b/tests/expected/medium_fr/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,134 @@ +{ + "text": " Bonjour ! Est-ce que vous allez bien ? Bonjour ! Est-ce que vous allez bien ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.46, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.3616662392249474, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.07169149070978165, + "confidence": 0.936, + "words": [ + { + "text": "Bonjour !", + "start": 0.42, + "end": 1.92, + "confidence": 0.874 + }, + { + "text": "Est-ce", + "start": 1.92, + "end": 2.16, + "confidence": 0.885 + }, + { + "text": "que", + "start": 2.16, + "end": 2.24, + "confidence": 0.988 + }, + { + "text": "vous", + "start": 2.24, + "end": 2.38, + "confidence": 0.996 + }, + { + "text": "allez", + "start": 2.38, + "end": 2.58, + "confidence": 0.99 + }, + { + "text": "bien ?", + "start": 2.58, + "end": 3.46, + "confidence": 0.999 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.94, + "end": 35.86, + "text": " Bonjour ! Est-ce que vous allez bien ?", + "tokens": [ + 50364, + 25431, + 2298, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3065794431246244, + "compression_ratio": 0.8260869565217391, + "no_speech_prob": 0.40451279282569885, + "confidence": 0.933, + "words": [ + { + "text": "Bonjour !", + "start": 32.94, + "end": 34.44, + "confidence": 0.741 + }, + { + "text": "Est-ce", + "start": 34.44, + "end": 34.7, + "confidence": 0.92 + }, + { + "text": "que", + "start": 34.7, + "end": 34.76, + "confidence": 0.996 + }, + { + "text": "vous", + "start": 34.76, + "end": 34.9, + "confidence": 0.998 + }, + { + "text": "allez", + "start": 34.9, + "end": 35.1, + "confidence": 0.997 + }, + { + "text": "bien ?", + "start": 35.1, + "end": 35.86, + "confidence": 0.999 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/empty.mp3.words.json b/tests/expected/medium_fr/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..fbfaeba7cd1b2b0b58b614830d66f2d88312cdaf --- /dev/null +++ b/tests/expected/medium_fr/empty.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 1.58, + "text": " ...", + "tokens": [ + 50364, + 1097, + 50518 + ], + "temperature": 0.0, + "avg_logprob": -0.8881432414054871, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.43768733739852905, + "confidence": 0.128, + "words": [ + { + "text": "...", + "start": 0.08, + "end": 1.58, + "confidence": 0.128 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/gaenswein15.mp3.words.json b/tests/expected/medium_fr/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7ca1dffaeffe0e51a12c5e42c59e6cfe830c9a60 --- /dev/null +++ b/tests/expected/medium_fr/gaenswein15.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 8.46, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51122 + ], + "temperature": 0.0, + "avg_logprob": -1.0787408351898193, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.2541781961917877, + "confidence": 0.133, + "words": [ + { + "text": "...", + "start": 0.0, + "end": 8.46, + "confidence": 0.133 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/gloria.mp3.words.json b/tests/expected/medium_fr/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..0d4073d4d6a44e2bd9102c82ec2af7b15b1d799a --- /dev/null +++ b/tests/expected/medium_fr/gloria.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 20.18, + "end": 20.2, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51392 + ], + "temperature": 0.0, + "avg_logprob": -1.4416621923446655, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.05016394704580307, + "confidence": 0.158, + "words": [ + { + "text": "...", + "start": 20.18, + "end": 20.2, + "confidence": 0.158 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/laugh1.mp3.words.json b/tests/expected/medium_fr/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..aa75af47ad68c614c7666bfbc54a3e08a51515c2 --- /dev/null +++ b/tests/expected/medium_fr/laugh1.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " Rires", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 1.7, + "text": " Rires", + "tokens": [ + 50364, + 497, + 3145, + 50576 + ], + "temperature": 0.0, + "avg_logprob": -1.0570931434631348, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.5899588465690613, + "confidence": 0.254, + "words": [ + { + "text": "Rires", + "start": 0.0, + "end": 1.7, + "confidence": 0.254 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/laugh2.mp3.words.json b/tests/expected/medium_fr/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..4f7c6b071c17ac35631490d3a409e982a2935014 --- /dev/null +++ b/tests/expected/medium_fr/laugh2.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " Hehehe", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.22, + "end": 0.64, + "text": " Hehehe", + "tokens": [ + 50364, + 634, + 23500, + 50398 + ], + "temperature": 0.0, + "avg_logprob": -1.2407761573791505, + "compression_ratio": 0.5, + "no_speech_prob": 0.3699853718280792, + "confidence": 0.173, + "words": [ + { + "text": "Hehehe", + "start": 0.22, + "end": 0.64, + "confidence": 0.173 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/punctuations.mp3.words.json b/tests/expected/medium_fr/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..440342c5261720c292eb5ec0f1827124decf0caf --- /dev/null +++ b/tests/expected/medium_fr/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26328243928797107, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03882359713315964, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi,", + "start": 0.38, + "end": 1.1, + "confidence": 0.809 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole ?", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/radio_short.mp3.words.json b/tests/expected/medium_fr/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..c2ba6413a8a603ecdfa7fd6925e49ab7a4503755 --- /dev/null +++ b/tests/expected/medium_fr/radio_short.mp3.words.json @@ -0,0 +1,1642 @@ +{ + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites. Winamax, la référence du poker en ligne. Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité. Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons. Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt ? Question posée par les leaders de la CFDT et la CGT. Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT. Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National. La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story. Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite. ...", + "segments": [ + { + "id": 0, + "seek": 3000, + "start": 30.88, + "end": 34.26, + "text": " Le plus important au poker ce ne sont pas les cartes, c'est ce que vous en faites.", + "tokens": [ + 50364, + 1456, + 1804, + 1021, + 1609, + 36863, + 1769, + 408, + 4900, + 1736, + 1512, + 5467, + 279, + 11, + 269, + 6, + 377, + 1769, + 631, + 2630, + 465, + 29902, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.1932395863755841, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5535648465156555, + "confidence": 0.92, + "words": [ + { + "text": "Le", + "start": 30.88, + "end": 31.02, + "confidence": 0.534 + }, + { + "text": "plus", + "start": 31.02, + "end": 31.18, + "confidence": 0.99 + }, + { + "text": "important", + "start": 31.18, + "end": 31.5, + "confidence": 0.993 + }, + { + "text": "au", + "start": 31.5, + "end": 31.62, + "confidence": 0.946 + }, + { + "text": "poker", + "start": 31.62, + "end": 31.84, + "confidence": 0.992 + }, + { + "text": "ce", + "start": 31.84, + "end": 32.0, + "confidence": 0.583 + }, + { + "text": "ne", + "start": 32.0, + "end": 32.06, + "confidence": 0.961 + }, + { + "text": "sont", + "start": 32.06, + "end": 32.16, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 32.16, + "end": 32.3, + "confidence": 0.991 + }, + { + "text": "les", + "start": 32.3, + "end": 32.46, + "confidence": 0.969 + }, + { + "text": "cartes,", + "start": 32.46, + "end": 33.26, + "confidence": 0.996 + }, + { + "text": "c'est", + "start": 33.48, + "end": 33.62, + "confidence": 0.95 + }, + { + "text": "ce", + "start": 33.62, + "end": 33.66, + "confidence": 0.99 + }, + { + "text": "que", + "start": 33.66, + "end": 33.76, + "confidence": 0.991 + }, + { + "text": "vous", + "start": 33.76, + "end": 33.9, + "confidence": 0.991 + }, + { + "text": "en", + "start": 33.9, + "end": 34.06, + "confidence": 0.982 + }, + { + "text": "faites.", + "start": 34.06, + "end": 34.26, + "confidence": 0.893 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 36.26, + "end": 38.74, + "text": " Winamax, la référence du poker en ligne.", + "tokens": [ + 50664, + 10427, + 2404, + 87, + 11, + 635, + 30170, + 41635, + 1581, + 36863, + 465, + 34207, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.1932395863755841, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5535648465156555, + "confidence": 0.967, + "words": [ + { + "text": "Winamax,", + "start": 36.26, + "end": 37.04, + "confidence": 0.905 + }, + { + "text": "la", + "start": 37.26, + "end": 37.36, + "confidence": 0.988 + }, + { + "text": "référence", + "start": 37.36, + "end": 37.78, + "confidence": 0.998 + }, + { + "text": "du", + "start": 37.78, + "end": 37.96, + "confidence": 0.992 + }, + { + "text": "poker", + "start": 37.96, + "end": 38.22, + "confidence": 0.998 + }, + { + "text": "en", + "start": 38.22, + "end": 38.44, + "confidence": 0.993 + }, + { + "text": "ligne.", + "start": 38.44, + "end": 38.74, + "confidence": 0.993 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 44.9, + "end": 51.38, + "text": " Bonsoir à toutes et tous, vous êtes sur BFM TV, nous sommes en direct, c'est bien sûr BFM story avec tout ce qui fait l'actualité.", + "tokens": [ + 51114, + 7368, + 539, + 347, + 1531, + 14437, + 1030, + 8317, + 11, + 2630, + 18935, + 1022, + 363, + 37, + 44, + 3558, + 11, + 4666, + 25232, + 465, + 2047, + 11, + 269, + 6, + 377, + 3610, + 18143, + 363, + 37, + 44, + 1657, + 4163, + 3486, + 1769, + 1956, + 3887, + 287, + 6, + 578, + 901, + 5066, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.1932395863755841, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5535648465156555, + "confidence": 0.907, + "words": [ + { + "text": "Bonsoir", + "start": 44.9, + "end": 45.42, + "confidence": 0.955 + }, + { + "text": "à", + "start": 45.42, + "end": 45.84, + "confidence": 0.542 + }, + { + "text": "toutes", + "start": 45.84, + "end": 46.06, + "confidence": 0.916 + }, + { + "text": "et", + "start": 46.06, + "end": 46.26, + "confidence": 0.988 + }, + { + "text": "tous,", + "start": 46.26, + "end": 46.5, + "confidence": 0.981 + }, + { + "text": "vous", + "start": 46.6, + "end": 46.74, + "confidence": 0.97 + }, + { + "text": "êtes", + "start": 46.74, + "end": 46.88, + "confidence": 0.995 + }, + { + "text": "sur", + "start": 46.88, + "end": 47.1, + "confidence": 0.989 + }, + { + "text": "BFM", + "start": 47.1, + "end": 47.52, + "confidence": 0.987 + }, + { + "text": "TV,", + "start": 47.52, + "end": 47.72, + "confidence": 0.853 + }, + { + "text": "nous", + "start": 47.84, + "end": 47.92, + "confidence": 0.829 + }, + { + "text": "sommes", + "start": 47.92, + "end": 48.14, + "confidence": 0.988 + }, + { + "text": "en", + "start": 48.14, + "end": 48.28, + "confidence": 0.987 + }, + { + "text": "direct,", + "start": 48.28, + "end": 48.52, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 48.7, + "end": 48.76, + "confidence": 0.939 + }, + { + "text": "bien", + "start": 48.76, + "end": 48.98, + "confidence": 0.971 + }, + { + "text": "sûr", + "start": 48.98, + "end": 49.22, + "confidence": 0.906 + }, + { + "text": "BFM", + "start": 49.22, + "end": 49.82, + "confidence": 0.949 + }, + { + "text": "story", + "start": 49.82, + "end": 50.04, + "confidence": 0.356 + }, + { + "text": "avec", + "start": 50.04, + "end": 50.26, + "confidence": 0.551 + }, + { + "text": "tout", + "start": 50.26, + "end": 50.42, + "confidence": 0.931 + }, + { + "text": "ce", + "start": 50.42, + "end": 50.52, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 50.52, + "end": 50.58, + "confidence": 0.952 + }, + { + "text": "fait", + "start": 50.58, + "end": 50.72, + "confidence": 0.961 + }, + { + "text": "l'actualité.", + "start": 50.72, + "end": 51.38, + "confidence": 0.994 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 51.96, + "end": 56.22, + "text": " Durant 60 minutes ce sont des gros plans, des analyses, des réactions que nous vous proposons.", + "tokens": [ + 51414, + 13710, + 394, + 4060, + 2077, + 1769, + 4900, + 730, + 18638, + 5482, + 11, + 730, + 37560, + 11, + 730, + 3960, + 12299, + 631, + 4666, + 2630, + 7532, + 892, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.1932395863755841, + "compression_ratio": 1.5084745762711864, + "no_speech_prob": 0.5535648465156555, + "confidence": 0.947, + "words": [ + { + "text": "Durant", + "start": 51.96, + "end": 52.32, + "confidence": 0.935 + }, + { + "text": "60", + "start": 52.32, + "end": 52.76, + "confidence": 0.922 + }, + { + "text": "minutes", + "start": 52.76, + "end": 53.08, + "confidence": 0.912 + }, + { + "text": "ce", + "start": 53.08, + "end": 53.28, + "confidence": 0.748 + }, + { + "text": "sont", + "start": 53.28, + "end": 53.62, + "confidence": 0.979 + }, + { + "text": "des", + "start": 53.62, + "end": 53.84, + "confidence": 0.99 + }, + { + "text": "gros", + "start": 53.84, + "end": 54.06, + "confidence": 0.969 + }, + { + "text": "plans,", + "start": 54.06, + "end": 54.2, + "confidence": 0.958 + }, + { + "text": "des", + "start": 54.32, + "end": 54.44, + "confidence": 0.986 + }, + { + "text": "analyses,", + "start": 54.44, + "end": 54.68, + "confidence": 0.914 + }, + { + "text": "des", + "start": 54.78, + "end": 54.86, + "confidence": 0.995 + }, + { + "text": "réactions", + "start": 54.86, + "end": 55.22, + "confidence": 0.996 + }, + { + "text": "que", + "start": 55.22, + "end": 55.4, + "confidence": 0.881 + }, + { + "text": "nous", + "start": 55.4, + "end": 55.52, + "confidence": 0.947 + }, + { + "text": "vous", + "start": 55.52, + "end": 55.78, + "confidence": 0.99 + }, + { + "text": "proposons.", + "start": 55.78, + "end": 56.22, + "confidence": 0.991 + } + ] + }, + { + "id": 4, + "seek": 5600, + "start": 56.66, + "end": 64.23, + "text": " Comment Eric Verth peut-il encore soutenir la réforme des retraites alors qu'il est englué dans sa propre affaire, l'affaire Verth-Bettancourt ?", + "tokens": [ + 50364, + 16328, + 9336, + 4281, + 392, + 5977, + 12, + 388, + 10122, + 29350, + 268, + 347, + 635, + 3960, + 44562, + 730, + 49356, + 3324, + 11246, + 421, + 6, + 388, + 871, + 1741, + 2781, + 526, + 2680, + 601, + 35221, + 2096, + 9020, + 11, + 287, + 6, + 2518, + 9020, + 4281, + 392, + 12, + 33, + 3093, + 4463, + 33403, + 2506, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.13356954513057587, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.4252333343029022, + "confidence": 0.854, + "words": [ + { + "text": "Comment", + "start": 56.66, + "end": 56.96, + "confidence": 0.944 + }, + { + "text": "Eric", + "start": 56.96, + "end": 57.2, + "confidence": 0.877 + }, + { + "text": "Verth", + "start": 57.2, + "end": 57.36, + "confidence": 0.369 + }, + { + "text": "peut-il", + "start": 57.36, + "end": 57.8, + "confidence": 0.947 + }, + { + "text": "encore", + "start": 57.8, + "end": 58.22, + "confidence": 0.954 + }, + { + "text": "soutenir", + "start": 58.22, + "end": 58.62, + "confidence": 0.997 + }, + { + "text": "la", + "start": 58.62, + "end": 58.76, + "confidence": 0.977 + }, + { + "text": "réforme", + "start": 58.76, + "end": 59.2, + "confidence": 0.996 + }, + { + "text": "des", + "start": 59.2, + "end": 59.42, + "confidence": 0.992 + }, + { + "text": "retraites", + "start": 59.42, + "end": 59.76, + "confidence": 0.99 + }, + { + "text": "alors", + "start": 59.76, + "end": 59.98, + "confidence": 0.492 + }, + { + "text": "qu'il", + "start": 59.98, + "end": 60.74, + "confidence": 0.959 + }, + { + "text": "est", + "start": 60.74, + "end": 60.9, + "confidence": 0.973 + }, + { + "text": "englué", + "start": 60.9, + "end": 61.7, + "confidence": 0.945 + }, + { + "text": "dans", + "start": 61.7, + "end": 61.96, + "confidence": 0.961 + }, + { + "text": "sa", + "start": 61.96, + "end": 62.44, + "confidence": 0.848 + }, + { + "text": "propre", + "start": 62.44, + "end": 62.68, + "confidence": 0.992 + }, + { + "text": "affaire,", + "start": 62.68, + "end": 62.94, + "confidence": 0.993 + }, + { + "text": "l'affaire", + "start": 63.02, + "end": 63.3, + "confidence": 0.909 + }, + { + "text": "Verth-Bettancourt ?", + "start": 63.3, + "end": 64.23, + "confidence": 0.711 + } + ] + }, + { + "id": 5, + "seek": 5600, + "start": 64.23, + "end": 67.18, + "text": " Question posée par les leaders de la CFDT et la CGT.", + "tokens": [ + 50764, + 14464, + 1366, + 3856, + 971, + 1512, + 3523, + 368, + 635, + 21792, + 35, + 51, + 1030, + 635, + 38007, + 51, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.13356954513057587, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.4252333343029022, + "confidence": 0.895, + "words": [ + { + "text": "Question", + "start": 64.23, + "end": 64.34, + "confidence": 0.63 + }, + { + "text": "posée", + "start": 64.34, + "end": 64.74, + "confidence": 0.977 + }, + { + "text": "par", + "start": 64.74, + "end": 64.96, + "confidence": 0.988 + }, + { + "text": "les", + "start": 64.96, + "end": 65.12, + "confidence": 0.965 + }, + { + "text": "leaders", + "start": 65.12, + "end": 65.66, + "confidence": 0.977 + }, + { + "text": "de", + "start": 65.66, + "end": 65.92, + "confidence": 0.986 + }, + { + "text": "la", + "start": 65.92, + "end": 66.1, + "confidence": 0.967 + }, + { + "text": "CFDT", + "start": 66.1, + "end": 66.52, + "confidence": 0.994 + }, + { + "text": "et", + "start": 66.52, + "end": 66.6, + "confidence": 0.606 + }, + { + "text": "la", + "start": 66.6, + "end": 66.74, + "confidence": 0.609 + }, + { + "text": "CGT.", + "start": 66.74, + "end": 67.18, + "confidence": 0.987 + } + ] + }, + { + "id": 6, + "seek": 5600, + "start": 67.5, + "end": 76.38, + "text": " Réponse de Nicolas Sarkozy, Eric Verth portera le débat sur les retraites, on en parle dans BFM story avec le numéro de la CFDT.", + "tokens": [ + 50914, + 41587, + 3739, + 368, + 38268, + 318, + 809, + 78, + 1229, + 11, + 9336, + 4281, + 392, + 1515, + 23833, + 476, + 2795, + 11980, + 1022, + 1512, + 49356, + 3324, + 11, + 322, + 465, + 18508, + 2680, + 363, + 37, + 44, + 1657, + 4163, + 476, + 49525, + 368, + 635, + 21792, + 35, + 51, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.13356954513057587, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.4252333343029022, + "confidence": 0.968, + "words": [ + { + "text": "Réponse", + "start": 67.5, + "end": 67.96, + "confidence": 0.947 + }, + { + "text": "de", + "start": 67.96, + "end": 68.14, + "confidence": 0.962 + }, + { + "text": "Nicolas", + "start": 68.14, + "end": 68.38, + "confidence": 0.931 + }, + { + "text": "Sarkozy,", + "start": 68.38, + "end": 68.88, + "confidence": 0.987 + }, + { + "text": "Eric", + "start": 68.98, + "end": 69.24, + "confidence": 0.962 + }, + { + "text": "Verth", + "start": 69.24, + "end": 69.84, + "confidence": 0.986 + }, + { + "text": "portera", + "start": 69.84, + "end": 70.44, + "confidence": 0.966 + }, + { + "text": "le", + "start": 70.44, + "end": 71.06, + "confidence": 0.989 + }, + { + "text": "débat", + "start": 71.06, + "end": 71.46, + "confidence": 0.996 + }, + { + "text": "sur", + "start": 71.46, + "end": 71.7, + "confidence": 0.988 + }, + { + "text": "les", + "start": 71.7, + "end": 71.88, + "confidence": 0.997 + }, + { + "text": "retraites,", + "start": 71.88, + "end": 72.26, + "confidence": 0.997 + }, + { + "text": "on", + "start": 72.36, + "end": 72.5, + "confidence": 0.865 + }, + { + "text": "en", + "start": 72.5, + "end": 72.68, + "confidence": 0.951 + }, + { + "text": "parle", + "start": 72.68, + "end": 73.1, + "confidence": 0.996 + }, + { + "text": "dans", + "start": 73.1, + "end": 73.56, + "confidence": 0.981 + }, + { + "text": "BFM", + "start": 73.56, + "end": 74.46, + "confidence": 0.991 + }, + { + "text": "story", + "start": 74.46, + "end": 74.76, + "confidence": 0.761 + }, + { + "text": "avec", + "start": 74.76, + "end": 75.12, + "confidence": 0.845 + }, + { + "text": "le", + "start": 75.12, + "end": 75.32, + "confidence": 0.982 + }, + { + "text": "numéro", + "start": 75.32, + "end": 75.54, + "confidence": 0.997 + }, + { + "text": "de", + "start": 75.54, + "end": 75.76, + "confidence": 0.996 + }, + { + "text": "la", + "start": 75.76, + "end": 75.9, + "confidence": 0.987 + }, + { + "text": "CFDT.", + "start": 75.9, + "end": 76.38, + "confidence": 0.998 + } + ] + }, + { + "id": 7, + "seek": 5600, + "start": 76.8, + "end": 83.04, + "text": " Et puis il y a une bataille qui a démarré, celle entre Marine Le Pen et Bruno Gognich, la bataille de la succession de Jean-Marie Le Pen à la tête du Front National.", + "tokens": [ + 51364, + 3790, + 9093, + 1930, + 288, + 257, + 2251, + 272, + 3274, + 3409, + 1956, + 257, + 22761, + 2284, + 526, + 11, + 25722, + 3962, + 20415, + 1456, + 10571, + 1030, + 23046, + 460, + 2912, + 480, + 11, + 635, + 272, + 3274, + 3409, + 368, + 635, + 36624, + 368, + 13854, + 12, + 16639, + 414, + 1456, + 10571, + 1531, + 635, + 24661, + 1581, + 17348, + 4862, + 13, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.13356954513057587, + "compression_ratio": 1.573667711598746, + "no_speech_prob": 0.4252333343029022, + "confidence": 0.871, + "words": [ + { + "text": "Et", + "start": 76.8, + "end": 76.96, + "confidence": 0.883 + }, + { + "text": "puis", + "start": 76.96, + "end": 77.06, + "confidence": 0.952 + }, + { + "text": "il", + "start": 77.06, + "end": 77.18, + "confidence": 0.792 + }, + { + "text": "y", + "start": 77.18, + "end": 77.2, + "confidence": 0.995 + }, + { + "text": "a", + "start": 77.2, + "end": 77.22, + "confidence": 0.986 + }, + { + "text": "une", + "start": 77.22, + "end": 77.28, + "confidence": 0.995 + }, + { + "text": "bataille", + "start": 77.28, + "end": 77.56, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 77.56, + "end": 77.72, + "confidence": 0.997 + }, + { + "text": "a", + "start": 77.72, + "end": 77.76, + "confidence": 0.985 + }, + { + "text": "démarré,", + "start": 77.76, + "end": 78.16, + "confidence": 0.988 + }, + { + "text": "celle", + "start": 78.24, + "end": 78.4, + "confidence": 0.997 + }, + { + "text": "entre", + "start": 78.4, + "end": 78.64, + "confidence": 0.975 + }, + { + "text": "Marine", + "start": 78.64, + "end": 78.94, + "confidence": 0.987 + }, + { + "text": "Le", + "start": 78.94, + "end": 79.12, + "confidence": 0.986 + }, + { + "text": "Pen", + "start": 79.12, + "end": 79.18, + "confidence": 0.986 + }, + { + "text": "et", + "start": 79.18, + "end": 79.34, + "confidence": 0.997 + }, + { + "text": "Bruno", + "start": 79.34, + "end": 79.54, + "confidence": 0.986 + }, + { + "text": "Gognich,", + "start": 79.54, + "end": 80.02, + "confidence": 0.239 + }, + { + "text": "la", + "start": 80.2, + "end": 80.32, + "confidence": 0.71 + }, + { + "text": "bataille", + "start": 80.32, + "end": 80.58, + "confidence": 0.998 + }, + { + "text": "de", + "start": 80.58, + "end": 80.72, + "confidence": 0.976 + }, + { + "text": "la", + "start": 80.72, + "end": 80.84, + "confidence": 0.987 + }, + { + "text": "succession", + "start": 80.84, + "end": 81.22, + "confidence": 0.982 + }, + { + "text": "de", + "start": 81.22, + "end": 81.44, + "confidence": 0.985 + }, + { + "text": "Jean-Marie", + "start": 81.44, + "end": 81.72, + "confidence": 0.95 + }, + { + "text": "Le", + "start": 81.72, + "end": 81.9, + "confidence": 0.996 + }, + { + "text": "Pen", + "start": 81.9, + "end": 82.0, + "confidence": 0.999 + }, + { + "text": "à", + "start": 82.0, + "end": 82.16, + "confidence": 0.977 + }, + { + "text": "la", + "start": 82.16, + "end": 82.22, + "confidence": 0.995 + }, + { + "text": "tête", + "start": 82.22, + "end": 82.32, + "confidence": 0.926 + }, + { + "text": "du", + "start": 82.32, + "end": 82.46, + "confidence": 0.997 + }, + { + "text": "Front", + "start": 82.46, + "end": 82.66, + "confidence": 0.777 + }, + { + "text": "National.", + "start": 82.66, + "end": 83.04, + "confidence": 0.822 + } + ] + }, + { + "id": 8, + "seek": 8300, + "start": 83.34, + "end": 88.88, + "text": " La tournée de campagne de Marine Le Pen commence aujourd'hui dans le Var, Marine Le Pen sera en direct dans BFM story.", + "tokens": [ + 50364, + 2369, + 3512, + 77, + 3856, + 368, + 2255, + 13887, + 368, + 20415, + 1456, + 10571, + 18137, + 14023, + 6, + 10556, + 2680, + 476, + 14662, + 11, + 20415, + 1456, + 10571, + 15021, + 465, + 2047, + 2680, + 363, + 37, + 44, + 1657, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.1127170822837136, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.4064846634864807, + "confidence": 0.954, + "words": [ + { + "text": "La", + "start": 83.34, + "end": 83.68, + "confidence": 0.921 + }, + { + "text": "tournée", + "start": 83.68, + "end": 84.16, + "confidence": 0.994 + }, + { + "text": "de", + "start": 84.16, + "end": 84.48, + "confidence": 0.936 + }, + { + "text": "campagne", + "start": 84.48, + "end": 84.8, + "confidence": 0.981 + }, + { + "text": "de", + "start": 84.8, + "end": 84.98, + "confidence": 0.971 + }, + { + "text": "Marine", + "start": 84.98, + "end": 85.14, + "confidence": 0.997 + }, + { + "text": "Le", + "start": 85.14, + "end": 85.32, + "confidence": 0.997 + }, + { + "text": "Pen", + "start": 85.32, + "end": 85.44, + "confidence": 0.999 + }, + { + "text": "commence", + "start": 85.44, + "end": 85.8, + "confidence": 0.971 + }, + { + "text": "aujourd'hui", + "start": 85.8, + "end": 86.14, + "confidence": 0.988 + }, + { + "text": "dans", + "start": 86.14, + "end": 86.28, + "confidence": 0.979 + }, + { + "text": "le", + "start": 86.28, + "end": 86.46, + "confidence": 0.923 + }, + { + "text": "Var,", + "start": 86.46, + "end": 86.64, + "confidence": 0.533 + }, + { + "text": "Marine", + "start": 86.74, + "end": 86.96, + "confidence": 0.992 + }, + { + "text": "Le", + "start": 86.96, + "end": 87.12, + "confidence": 0.992 + }, + { + "text": "Pen", + "start": 87.12, + "end": 87.22, + "confidence": 0.999 + }, + { + "text": "sera", + "start": 87.22, + "end": 87.48, + "confidence": 0.989 + }, + { + "text": "en", + "start": 87.48, + "end": 87.76, + "confidence": 0.989 + }, + { + "text": "direct", + "start": 87.76, + "end": 88.04, + "confidence": 0.997 + }, + { + "text": "dans", + "start": 88.04, + "end": 88.34, + "confidence": 0.925 + }, + { + "text": "BFM", + "start": 88.34, + "end": 88.68, + "confidence": 0.996 + }, + { + "text": "story.", + "start": 88.68, + "end": 88.88, + "confidence": 0.831 + } + ] + }, + { + "id": 9, + "seek": 8300, + "start": 89.02, + "end": 92.28, + "text": " Restez avec nous Marine Le Pen dans moins de 3 minutes, à tout de suite.", + "tokens": [ + 50664, + 13094, + 4371, + 4163, + 4666, + 20415, + 1456, + 10571, + 2680, + 13099, + 368, + 805, + 2077, + 11, + 1531, + 3486, + 368, + 14205, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.1127170822837136, + "compression_ratio": 1.3591549295774648, + "no_speech_prob": 0.4064846634864807, + "confidence": 0.936, + "words": [ + { + "text": "Restez", + "start": 89.02, + "end": 89.4, + "confidence": 0.967 + }, + { + "text": "avec", + "start": 89.4, + "end": 89.7, + "confidence": 0.997 + }, + { + "text": "nous", + "start": 89.7, + "end": 89.94, + "confidence": 0.956 + }, + { + "text": "Marine", + "start": 89.94, + "end": 90.24, + "confidence": 0.912 + }, + { + "text": "Le", + "start": 90.24, + "end": 90.52, + "confidence": 0.997 + }, + { + "text": "Pen", + "start": 90.52, + "end": 90.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 90.68, + "end": 90.98, + "confidence": 0.843 + }, + { + "text": "moins", + "start": 90.98, + "end": 91.2, + "confidence": 0.992 + }, + { + "text": "de", + "start": 91.2, + "end": 91.34, + "confidence": 0.991 + }, + { + "text": "3", + "start": 91.34, + "end": 91.52, + "confidence": 0.595 + }, + { + "text": "minutes,", + "start": 91.52, + "end": 91.8, + "confidence": 0.985 + }, + { + "text": "à", + "start": 91.82, + "end": 91.98, + "confidence": 0.961 + }, + { + "text": "tout", + "start": 91.98, + "end": 92.1, + "confidence": 0.917 + }, + { + "text": "de", + "start": 92.1, + "end": 92.14, + "confidence": 0.999 + }, + { + "text": "suite.", + "start": 92.14, + "end": 92.28, + "confidence": 0.999 + } + ] + }, + { + "id": 10, + "seek": 9200, + "start": 92.28, + "end": 93.08, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.6883271932601929, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.8045242428779602, + "confidence": 0.153, + "words": [ + { + "text": "...", + "start": 92.28, + "end": 93.08, + "confidence": 0.153 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/medium_fr/smartphone.mp3.words.json b/tests/expected/medium_fr/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..487ef4822e529a0eedef411d70eb22aba4b88bab --- /dev/null +++ b/tests/expected/medium_fr/smartphone.mp3.words.json @@ -0,0 +1,4899 @@ +{ + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions, mais la manière dont elles interagissent entre elles. Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces. L'écran tactile a été beaucoup très souvent mentionné. Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes. Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible. Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but. Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité. Mais ça, ça soulève une autre interrogation. Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ? Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent en fait. Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant. Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet, d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet. Donc à objet inédit, rapport inédit. Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment. Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépendance n'était pas du même ordre. Donc le rejet non plus n'était pas du même ordre. On peut adorer sa bagnole, en avoir besoin pour plein de choses. Le soir, quand on va se coucher, on la laisse. On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes. On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui, continuellement avec son smartphone dans la main, comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate. Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi. Donc, rapport inédit, d'accord. Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ? Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ? Les économistes parlent de dépendance du sentier. C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.4, + "end": 3.66, + "text": " C'est évident ce que dit Nicolas, mais je ne me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 1078, + 1769, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 408, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.935, + "words": [ + { + "text": "C'est", + "start": 0.4, + "end": 0.64, + "confidence": 0.961 + }, + { + "text": "évident", + "start": 0.64, + "end": 0.9, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 0.9, + "end": 1.0, + "confidence": 0.663 + }, + { + "text": "que", + "start": 1.0, + "end": 1.08, + "confidence": 0.989 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.994 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.48, + "confidence": 0.91 + }, + { + "text": "mais", + "start": 1.7, + "end": 2.04, + "confidence": 0.979 + }, + { + "text": "je", + "start": 2.04, + "end": 2.26, + "confidence": 0.981 + }, + { + "text": "ne", + "start": 2.26, + "end": 2.34, + "confidence": 0.837 + }, + { + "text": "me", + "start": 2.34, + "end": 2.36, + "confidence": 0.821 + }, + { + "text": "l'étais", + "start": 2.36, + "end": 2.58, + "confidence": 0.971 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.88, + "confidence": 0.989 + }, + { + "text": "formulé", + "start": 2.88, + "end": 3.26, + "confidence": 0.908 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.44, + "confidence": 0.993 + }, + { + "text": "ça.", + "start": 3.44, + "end": 3.66, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.16, + "end": 7.94, + "text": " Ce qui fait la force du smartphone, c'est pas seulement l'accumulation des fonctions,", + "tokens": [ + 50549, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 287, + 6, + 8476, + 449, + 2776, + 730, + 17290, + 3916, + 11, + 50756 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.93, + "words": [ + { + "text": "Ce", + "start": 4.16, + "end": 4.28, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 4.28, + "end": 4.36, + "confidence": 0.958 + }, + { + "text": "fait", + "start": 4.36, + "end": 4.5, + "confidence": 0.567 + }, + { + "text": "la", + "start": 4.5, + "end": 4.76, + "confidence": 0.972 + }, + { + "text": "force", + "start": 4.76, + "end": 5.02, + "confidence": 0.999 + }, + { + "text": "du", + "start": 5.02, + "end": 5.22, + "confidence": 0.996 + }, + { + "text": "smartphone,", + "start": 5.22, + "end": 5.7, + "confidence": 0.912 + }, + { + "text": "c'est", + "start": 6.04, + "end": 6.16, + "confidence": 0.879 + }, + { + "text": "pas", + "start": 6.16, + "end": 6.26, + "confidence": 0.991 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.54, + "confidence": 0.999 + }, + { + "text": "l'accumulation", + "start": 6.54, + "end": 7.4, + "confidence": 0.958 + }, + { + "text": "des", + "start": 7.4, + "end": 7.58, + "confidence": 0.983 + }, + { + "text": "fonctions,", + "start": 7.58, + "end": 7.94, + "confidence": 0.987 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.32, + "end": 10.88, + "text": " mais la manière dont elles interagissent entre elles.", + "tokens": [ + 50756, + 2420, + 635, + 22267, + 9400, + 23576, + 728, + 559, + 25450, + 3962, + 23576, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.977, + "words": [ + { + "text": "mais", + "start": 8.32, + "end": 8.48, + "confidence": 0.992 + }, + { + "text": "la", + "start": 8.48, + "end": 8.7, + "confidence": 0.995 + }, + { + "text": "manière", + "start": 8.7, + "end": 8.94, + "confidence": 0.999 + }, + { + "text": "dont", + "start": 8.94, + "end": 9.08, + "confidence": 0.978 + }, + { + "text": "elles", + "start": 9.08, + "end": 9.48, + "confidence": 0.967 + }, + { + "text": "interagissent", + "start": 9.48, + "end": 10.38, + "confidence": 0.964 + }, + { + "text": "entre", + "start": 10.38, + "end": 10.7, + "confidence": 0.955 + }, + { + "text": "elles.", + "start": 10.7, + "end": 10.88, + "confidence": 0.989 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.96, + "end": 13.0, + "text": " Ce qui dit d'ailleurs sur la photo, c'est hyper convaincant.", + "tokens": [ + 50906, + 8257, + 1956, + 6176, + 274, + 6, + 19400, + 1022, + 635, + 5052, + 11, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.906, + "words": [ + { + "text": "Ce", + "start": 10.96, + "end": 11.16, + "confidence": 0.608 + }, + { + "text": "qui", + "start": 11.16, + "end": 11.26, + "confidence": 0.769 + }, + { + "text": "dit", + "start": 11.26, + "end": 11.38, + "confidence": 0.983 + }, + { + "text": "d'ailleurs", + "start": 11.38, + "end": 11.58, + "confidence": 0.985 + }, + { + "text": "sur", + "start": 11.58, + "end": 11.72, + "confidence": 0.48 + }, + { + "text": "la", + "start": 11.72, + "end": 11.82, + "confidence": 0.984 + }, + { + "text": "photo,", + "start": 11.82, + "end": 12.0, + "confidence": 0.994 + }, + { + "text": "c'est", + "start": 12.14, + "end": 12.2, + "confidence": 0.997 + }, + { + "text": "hyper", + "start": 12.2, + "end": 12.48, + "confidence": 0.993 + }, + { + "text": "convaincant.", + "start": 12.48, + "end": 13.0, + "confidence": 0.982 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.38, + "end": 16.04, + "text": " Alors évidemment, il faudrait ajouter les interfaces.", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 13, + 51166 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.912, + "words": [ + { + "text": "Alors", + "start": 13.38, + "end": 13.58, + "confidence": 0.584 + }, + { + "text": "évidemment,", + "start": 13.58, + "end": 13.86, + "confidence": 0.832 + }, + { + "text": "il", + "start": 14.26, + "end": 14.42, + "confidence": 0.953 + }, + { + "text": "faudrait", + "start": 14.42, + "end": 14.76, + "confidence": 0.996 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.2, + "confidence": 0.992 + }, + { + "text": "les", + "start": 15.2, + "end": 15.6, + "confidence": 0.985 + }, + { + "text": "interfaces.", + "start": 15.6, + "end": 16.04, + "confidence": 0.984 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.22, + "end": 19.36, + "text": " L'écran tactile a été beaucoup très souvent mentionné.", + "tokens": [ + 51166, + 441, + 6, + 9062, + 4257, + 47319, + 257, + 8862, + 8796, + 5732, + 20847, + 2152, + 15055, + 13, + 51331 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.923, + "words": [ + { + "text": "L'écran", + "start": 16.22, + "end": 16.78, + "confidence": 0.996 + }, + { + "text": "tactile", + "start": 16.78, + "end": 17.1, + "confidence": 0.986 + }, + { + "text": "a", + "start": 17.1, + "end": 17.3, + "confidence": 0.98 + }, + { + "text": "été", + "start": 17.3, + "end": 17.84, + "confidence": 0.974 + }, + { + "text": "beaucoup", + "start": 17.84, + "end": 18.28, + "confidence": 0.976 + }, + { + "text": "très", + "start": 18.28, + "end": 18.62, + "confidence": 0.447 + }, + { + "text": "souvent", + "start": 18.62, + "end": 18.9, + "confidence": 0.996 + }, + { + "text": "mentionné.", + "start": 18.9, + "end": 19.36, + "confidence": 0.978 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 25.44, + "text": " Mais bon, il faut dire qu'il profite aussi de 20 ans pendant lesquels les ordinateurs nous ont appris à cliquer sur des icônes.", + "tokens": [ + 51331, + 6313, + 4428, + 11, + 1930, + 8487, + 1264, + 421, + 6, + 388, + 1740, + 642, + 6212, + 368, + 945, + 1567, + 17338, + 1512, + 358, + 1625, + 1512, + 4792, + 13923, + 2156, + 4666, + 6592, + 724, + 5714, + 1531, + 596, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13, + 51631 + ], + "temperature": 0.0, + "avg_logprob": -0.23626650879714664, + "compression_ratio": 1.6238532110091743, + "no_speech_prob": 0.15368737280368805, + "confidence": 0.92, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.943 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.5, + "confidence": 0.666 + }, + { + "text": "il", + "start": 20.52, + "end": 20.62, + "confidence": 0.99 + }, + { + "text": "faut", + "start": 20.62, + "end": 20.7, + "confidence": 0.99 + }, + { + "text": "dire", + "start": 20.7, + "end": 20.82, + "confidence": 0.995 + }, + { + "text": "qu'il", + "start": 20.82, + "end": 21.04, + "confidence": 0.88 + }, + { + "text": "profite", + "start": 21.04, + "end": 21.3, + "confidence": 0.995 + }, + { + "text": "aussi", + "start": 21.3, + "end": 21.7, + "confidence": 0.972 + }, + { + "text": "de", + "start": 21.7, + "end": 21.94, + "confidence": 0.97 + }, + { + "text": "20", + "start": 21.94, + "end": 22.16, + "confidence": 0.812 + }, + { + "text": "ans", + "start": 22.16, + "end": 22.3, + "confidence": 0.997 + }, + { + "text": "pendant", + "start": 22.3, + "end": 22.54, + "confidence": 0.723 + }, + { + "text": "lesquels", + "start": 22.54, + "end": 22.92, + "confidence": 0.98 + }, + { + "text": "les", + "start": 22.92, + "end": 23.14, + "confidence": 0.71 + }, + { + "text": "ordinateurs", + "start": 23.14, + "end": 23.58, + "confidence": 0.966 + }, + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.602 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.88, + "confidence": 0.974 + }, + { + "text": "appris", + "start": 23.88, + "end": 24.12, + "confidence": 0.991 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.828 + }, + { + "text": "cliquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.989 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.984 + }, + { + "text": "des", + "start": 24.72, + "end": 24.92, + "confidence": 0.971 + }, + { + "text": "icônes.", + "start": 24.92, + "end": 25.44, + "confidence": 0.992 + } + ] + }, + { + "id": 7, + "seek": 2534, + "start": 25.54, + "end": 30.64, + "text": " Sauf que le smartphone ajoute le toucher, ce qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 318, + 9507, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1769, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.968, + "words": [ + { + "text": "Sauf", + "start": 25.54, + "end": 25.82, + "confidence": 0.974 + }, + { + "text": "que", + "start": 25.82, + "end": 26.3, + "confidence": 0.996 + }, + { + "text": "le", + "start": 26.3, + "end": 26.66, + "confidence": 0.671 + }, + { + "text": "smartphone", + "start": 26.66, + "end": 27.08, + "confidence": 0.995 + }, + { + "text": "ajoute", + "start": 27.08, + "end": 27.48, + "confidence": 0.99 + }, + { + "text": "le", + "start": 27.48, + "end": 27.66, + "confidence": 0.989 + }, + { + "text": "toucher,", + "start": 27.66, + "end": 28.06, + "confidence": 0.987 + }, + { + "text": "ce", + "start": 28.14, + "end": 28.16, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 28.16, + "end": 28.3, + "confidence": 1.0 + }, + { + "text": "rend", + "start": 28.3, + "end": 28.54, + "confidence": 0.994 + }, + { + "text": "le", + "start": 28.54, + "end": 28.8, + "confidence": 0.992 + }, + { + "text": "contact", + "start": 28.8, + "end": 29.16, + "confidence": 0.999 + }, + { + "text": "plus", + "start": 29.16, + "end": 29.58, + "confidence": 0.983 + }, + { + "text": "direct,", + "start": 29.58, + "end": 30.0, + "confidence": 0.991 + }, + { + "text": "plus", + "start": 30.1, + "end": 30.24, + "confidence": 0.99 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.64, + "confidence": 0.997 + } + ] + }, + { + "id": 8, + "seek": 2534, + "start": 31.04, + "end": 37.82, + "text": " Et puis évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté touffu de la navigation web pour aller directement au but.", + "tokens": [ + 50639, + 3790, + 9093, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 10095, + 602, + 84, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 1609, + 457, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.893, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.964 + }, + { + "text": "puis", + "start": 31.24, + "end": 31.34, + "confidence": 0.967 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.62, + "confidence": 0.87 + }, + { + "text": "il", + "start": 31.66, + "end": 31.72, + "confidence": 0.99 + }, + { + "text": "faudrait", + "start": 31.72, + "end": 31.94, + "confidence": 0.996 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.16, + "confidence": 0.864 + }, + { + "text": "aussi", + "start": 32.16, + "end": 32.34, + "confidence": 0.971 + }, + { + "text": "des", + "start": 32.34, + "end": 32.48, + "confidence": 0.993 + }, + { + "text": "applications", + "start": 32.48, + "end": 32.92, + "confidence": 0.993 + }, + { + "text": "qui", + "start": 32.92, + "end": 33.22, + "confidence": 0.564 + }, + { + "text": "permettent", + "start": 33.22, + "end": 33.76, + "confidence": 0.99 + }, + { + "text": "de", + "start": 33.76, + "end": 33.98, + "confidence": 0.871 + }, + { + "text": "contourner", + "start": 33.98, + "end": 34.42, + "confidence": 0.953 + }, + { + "text": "le", + "start": 34.42, + "end": 34.54, + "confidence": 0.75 + }, + { + "text": "côté", + "start": 34.54, + "end": 34.78, + "confidence": 0.985 + }, + { + "text": "touffu", + "start": 34.78, + "end": 35.32, + "confidence": 0.726 + }, + { + "text": "de", + "start": 35.32, + "end": 35.7, + "confidence": 0.882 + }, + { + "text": "la", + "start": 35.7, + "end": 35.82, + "confidence": 0.991 + }, + { + "text": "navigation", + "start": 35.82, + "end": 36.3, + "confidence": 0.992 + }, + { + "text": "web", + "start": 36.3, + "end": 36.58, + "confidence": 0.847 + }, + { + "text": "pour", + "start": 36.58, + "end": 36.76, + "confidence": 0.59 + }, + { + "text": "aller", + "start": 36.76, + "end": 37.16, + "confidence": 0.981 + }, + { + "text": "directement", + "start": 37.16, + "end": 37.54, + "confidence": 0.997 + }, + { + "text": "au", + "start": 37.54, + "end": 37.7, + "confidence": 0.969 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.82, + "confidence": 0.995 + } + ] + }, + { + "id": 9, + "seek": 2534, + "start": 37.9, + "end": 46.6, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cet objet dont Nicolas dit qu'il est vraisemblablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 50989, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 8603, + 14964, + 9400, + 38268, + 6176, + 421, + 6, + 388, + 871, + 6070, + 271, + 443, + 5199, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51439 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.973, + "words": [ + { + "text": "Bref,", + "start": 37.9, + "end": 38.24, + "confidence": 0.984 + }, + { + "text": "tout", + "start": 38.88, + "end": 39.02, + "confidence": 0.7 + }, + { + "text": "ça,", + "start": 39.02, + "end": 39.4, + "confidence": 0.995 + }, + { + "text": "ce", + "start": 39.4, + "end": 39.72, + "confidence": 0.989 + }, + { + "text": "sont", + "start": 39.72, + "end": 39.92, + "confidence": 0.999 + }, + { + "text": "les", + "start": 39.92, + "end": 40.22, + "confidence": 0.992 + }, + { + "text": "conditions", + "start": 40.22, + "end": 40.7, + "confidence": 0.995 + }, + { + "text": "qui", + "start": 40.7, + "end": 41.02, + "confidence": 0.997 + }, + { + "text": "permettent", + "start": 41.02, + "end": 41.44, + "confidence": 0.996 + }, + { + "text": "de", + "start": 41.44, + "end": 41.74, + "confidence": 0.997 + }, + { + "text": "créer", + "start": 41.74, + "end": 42.1, + "confidence": 0.999 + }, + { + "text": "cet", + "start": 42.1, + "end": 42.38, + "confidence": 0.997 + }, + { + "text": "objet", + "start": 42.38, + "end": 42.64, + "confidence": 0.994 + }, + { + "text": "dont", + "start": 42.64, + "end": 42.84, + "confidence": 0.789 + }, + { + "text": "Nicolas", + "start": 42.84, + "end": 43.24, + "confidence": 0.988 + }, + { + "text": "dit", + "start": 43.24, + "end": 43.52, + "confidence": 0.986 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.74, + "confidence": 0.982 + }, + { + "text": "est", + "start": 43.74, + "end": 43.94, + "confidence": 0.99 + }, + { + "text": "vraisemblablement", + "start": 43.94, + "end": 44.86, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.44, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 45.44, + "end": 45.72, + "confidence": 0.967 + }, + { + "text": "l'histoire", + "start": 45.72, + "end": 46.02, + "confidence": 0.958 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.999 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.6, + "confidence": 0.992 + } + ] + }, + { + "id": 10, + "seek": 2534, + "start": 47.02, + "end": 48.78, + "text": " Mais ça, ça soulève une autre interrogation.", + "tokens": [ + 51439, + 6313, + 2788, + 11, + 2788, + 5133, + 31397, + 2251, + 15081, + 24871, + 399, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.10786590413150625, + "compression_ratio": 1.6254416961130742, + "no_speech_prob": 0.18929888308048248, + "confidence": 0.969, + "words": [ + { + "text": "Mais", + "start": 47.02, + "end": 47.28, + "confidence": 0.84 + }, + { + "text": "ça,", + "start": 47.28, + "end": 47.48, + "confidence": 0.942 + }, + { + "text": "ça", + "start": 47.62, + "end": 47.64, + "confidence": 0.976 + }, + { + "text": "soulève", + "start": 47.64, + "end": 47.86, + "confidence": 0.993 + }, + { + "text": "une", + "start": 47.86, + "end": 48.02, + "confidence": 0.998 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.2, + "confidence": 0.999 + }, + { + "text": "interrogation.", + "start": 48.2, + "end": 48.78, + "confidence": 0.997 + } + ] + }, + { + "id": 11, + "seek": 4884, + "start": 49.36, + "end": 55.5, + "text": " Est-ce que le fait que cet objet soit inédit induit que notre rapport à lui est aussi un rapport inédit ?", + "tokens": [ + 50389, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 8603, + 14964, + 12703, + 294, + 7811, + 270, + 13716, + 270, + 631, + 10349, + 18018, + 1531, + 8783, + 871, + 6212, + 517, + 18018, + 294, + 7811, + 270, + 2506, + 50689 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.986, + "words": [ + { + "text": "Est-ce", + "start": 49.36, + "end": 49.64, + "confidence": 0.978 + }, + { + "text": "que", + "start": 49.64, + "end": 49.76, + "confidence": 0.991 + }, + { + "text": "le", + "start": 49.76, + "end": 49.82, + "confidence": 0.993 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.0, + "confidence": 0.999 + }, + { + "text": "que", + "start": 50.0, + "end": 50.14, + "confidence": 0.991 + }, + { + "text": "cet", + "start": 50.14, + "end": 50.32, + "confidence": 0.99 + }, + { + "text": "objet", + "start": 50.32, + "end": 50.66, + "confidence": 0.997 + }, + { + "text": "soit", + "start": 50.66, + "end": 51.12, + "confidence": 0.995 + }, + { + "text": "inédit", + "start": 51.12, + "end": 51.78, + "confidence": 0.995 + }, + { + "text": "induit", + "start": 51.78, + "end": 52.32, + "confidence": 0.955 + }, + { + "text": "que", + "start": 52.32, + "end": 52.42, + "confidence": 0.984 + }, + { + "text": "notre", + "start": 52.42, + "end": 52.78, + "confidence": 0.995 + }, + { + "text": "rapport", + "start": 52.78, + "end": 53.28, + "confidence": 0.997 + }, + { + "text": "à", + "start": 53.28, + "end": 53.46, + "confidence": 0.978 + }, + { + "text": "lui", + "start": 53.46, + "end": 53.68, + "confidence": 0.999 + }, + { + "text": "est", + "start": 53.68, + "end": 54.14, + "confidence": 0.907 + }, + { + "text": "aussi", + "start": 54.14, + "end": 54.52, + "confidence": 0.996 + }, + { + "text": "un", + "start": 54.52, + "end": 54.72, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 54.72, + "end": 55.0, + "confidence": 0.997 + }, + { + "text": "inédit ?", + "start": 55.0, + "end": 55.5, + "confidence": 0.996 + } + ] + }, + { + "id": 12, + "seek": 4884, + "start": 55.76, + "end": 63.4, + "text": " Je veux dire, est-ce que le rapport qu'on a au smartphone est comparable à celui qu'on entretenait à d'autres objets techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50689, + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 13307, + 871, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 1111, + 25349, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.96, + "words": [ + { + "text": "Je", + "start": 55.76, + "end": 55.9, + "confidence": 0.904 + }, + { + "text": "veux", + "start": 55.9, + "end": 56.0, + "confidence": 0.989 + }, + { + "text": "dire,", + "start": 56.0, + "end": 56.12, + "confidence": 0.997 + }, + { + "text": "est-ce", + "start": 56.22, + "end": 56.38, + "confidence": 0.99 + }, + { + "text": "que", + "start": 56.38, + "end": 56.5, + "confidence": 0.991 + }, + { + "text": "le", + "start": 56.5, + "end": 56.66, + "confidence": 0.996 + }, + { + "text": "rapport", + "start": 56.66, + "end": 56.86, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 56.86, + "end": 57.08, + "confidence": 0.985 + }, + { + "text": "a", + "start": 57.08, + "end": 57.16, + "confidence": 0.99 + }, + { + "text": "au", + "start": 57.16, + "end": 57.28, + "confidence": 0.966 + }, + { + "text": "smartphone", + "start": 57.28, + "end": 57.6, + "confidence": 0.995 + }, + { + "text": "est", + "start": 57.6, + "end": 57.86, + "confidence": 0.91 + }, + { + "text": "comparable", + "start": 57.86, + "end": 58.3, + "confidence": 0.996 + }, + { + "text": "à", + "start": 58.3, + "end": 58.5, + "confidence": 0.946 + }, + { + "text": "celui", + "start": 58.5, + "end": 58.66, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 58.66, + "end": 58.96, + "confidence": 0.989 + }, + { + "text": "entretenait", + "start": 58.96, + "end": 59.36, + "confidence": 0.926 + }, + { + "text": "à", + "start": 59.36, + "end": 59.48, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.48, + "end": 59.7, + "confidence": 0.997 + }, + { + "text": "objets", + "start": 59.7, + "end": 59.98, + "confidence": 0.99 + }, + { + "text": "techniques", + "start": 59.98, + "end": 60.44, + "confidence": 0.984 + }, + { + "text": "comme", + "start": 60.44, + "end": 60.96, + "confidence": 0.496 + }, + { + "text": "la", + "start": 60.96, + "end": 61.52, + "confidence": 0.987 + }, + { + "text": "voiture", + "start": 61.52, + "end": 62.08, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 62.08, + "end": 62.52, + "confidence": 0.95 + }, + { + "text": "le", + "start": 62.52, + "end": 62.74, + "confidence": 0.998 + }, + { + "text": "téléphone ?", + "start": 62.74, + "end": 63.4, + "confidence": 0.999 + } + ] + }, + { + "id": 13, + "seek": 4884, + "start": 65.36, + "end": 66.62, + "text": " Il n'y a pas d'équivalent en fait.", + "tokens": [ + 51189, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 465, + 3887, + 13, + 51239 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.94, + "words": [ + { + "text": "Il", + "start": 65.36, + "end": 65.5, + "confidence": 0.849 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.979 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.66, + "confidence": 0.999 + }, + { + "text": "d'équivalent", + "start": 65.66, + "end": 66.3, + "confidence": 0.995 + }, + { + "text": "en", + "start": 66.3, + "end": 66.44, + "confidence": 0.582 + }, + { + "text": "fait.", + "start": 66.44, + "end": 66.62, + "confidence": 0.997 + } + ] + }, + { + "id": 14, + "seek": 4884, + "start": 66.88, + "end": 71.24, + "text": " Et donc cette espèce de nouveauté dans la relation à l'objet, c'est fascinant et terrifiant.", + "tokens": [ + 51239, + 3790, + 5926, + 5550, + 7089, + 30236, + 368, + 11456, + 1375, + 526, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 11, + 269, + 6, + 377, + 7184, + 259, + 394, + 1030, + 7245, + 351, + 5798, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.0824541612104936, + "compression_ratio": 1.5960784313725491, + "no_speech_prob": 0.09794807434082031, + "confidence": 0.957, + "words": [ + { + "text": "Et", + "start": 66.88, + "end": 67.02, + "confidence": 0.654 + }, + { + "text": "donc", + "start": 67.02, + "end": 67.12, + "confidence": 0.901 + }, + { + "text": "cette", + "start": 67.12, + "end": 67.3, + "confidence": 0.713 + }, + { + "text": "espèce", + "start": 67.3, + "end": 67.54, + "confidence": 0.996 + }, + { + "text": "de", + "start": 67.54, + "end": 67.76, + "confidence": 0.999 + }, + { + "text": "nouveauté", + "start": 67.76, + "end": 68.48, + "confidence": 0.98 + }, + { + "text": "dans", + "start": 68.48, + "end": 68.84, + "confidence": 0.982 + }, + { + "text": "la", + "start": 68.84, + "end": 68.96, + "confidence": 0.995 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.998 + }, + { + "text": "à", + "start": 69.24, + "end": 69.38, + "confidence": 0.997 + }, + { + "text": "l'objet,", + "start": 69.38, + "end": 70.0, + "confidence": 0.998 + }, + { + "text": "c'est", + "start": 70.28, + "end": 70.38, + "confidence": 0.977 + }, + { + "text": "fascinant", + "start": 70.38, + "end": 70.68, + "confidence": 0.98 + }, + { + "text": "et", + "start": 70.68, + "end": 70.76, + "confidence": 0.973 + }, + { + "text": "terrifiant.", + "start": 70.76, + "end": 71.24, + "confidence": 0.977 + } + ] + }, + { + "id": 15, + "seek": 7134, + "start": 71.62, + "end": 76.42, + "text": " Parce qu'on a l'impression, comme le disent les utilisateurs et les services, d'être dépendant de cet objet,", + "tokens": [ + 50389, + 20429, + 421, + 6, + 266, + 257, + 287, + 6, + 36107, + 11, + 5173, + 476, + 37313, + 1512, + 33643, + 25929, + 1030, + 1512, + 3328, + 11, + 274, + 6, + 9498, + 45768, + 394, + 368, + 8603, + 14964, + 11, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.862, + "words": [ + { + "text": "Parce", + "start": 71.62, + "end": 71.9, + "confidence": 0.843 + }, + { + "text": "qu'on", + "start": 71.9, + "end": 72.4, + "confidence": 0.968 + }, + { + "text": "a", + "start": 72.4, + "end": 72.54, + "confidence": 0.979 + }, + { + "text": "l'impression,", + "start": 72.54, + "end": 72.9, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 73.66, + "end": 73.9, + "confidence": 0.982 + }, + { + "text": "le", + "start": 73.9, + "end": 74.02, + "confidence": 0.984 + }, + { + "text": "disent", + "start": 74.02, + "end": 74.22, + "confidence": 0.997 + }, + { + "text": "les", + "start": 74.22, + "end": 74.52, + "confidence": 0.996 + }, + { + "text": "utilisateurs", + "start": 74.52, + "end": 74.84, + "confidence": 0.998 + }, + { + "text": "et", + "start": 74.84, + "end": 74.94, + "confidence": 0.845 + }, + { + "text": "les", + "start": 74.94, + "end": 75.02, + "confidence": 0.783 + }, + { + "text": "services,", + "start": 75.02, + "end": 75.18, + "confidence": 0.129 + }, + { + "text": "d'être", + "start": 75.22, + "end": 75.5, + "confidence": 0.935 + }, + { + "text": "dépendant", + "start": 75.5, + "end": 75.98, + "confidence": 0.709 + }, + { + "text": "de", + "start": 75.98, + "end": 76.08, + "confidence": 0.977 + }, + { + "text": "cet", + "start": 76.08, + "end": 76.26, + "confidence": 0.998 + }, + { + "text": "objet,", + "start": 76.26, + "end": 76.42, + "confidence": 0.997 + } + ] + }, + { + "id": 16, + "seek": 7134, + "start": 76.42, + "end": 83.3, + "text": " d'induire en fait une espèce de relation, de médiation avec le monde qui rend de l'ampleur et qui amène aussi à des formes de rejet.", + "tokens": [ + 50639, + 274, + 6, + 471, + 43612, + 465, + 3887, + 2251, + 7089, + 30236, + 368, + 9721, + 11, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 368, + 287, + 6, + 335, + 781, + 374, + 1030, + 1956, + 669, + 18832, + 6212, + 1531, + 730, + 1254, + 279, + 368, + 319, + 7108, + 13, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.923, + "words": [ + { + "text": "d'induire", + "start": 76.42, + "end": 77.1, + "confidence": 0.964 + }, + { + "text": "en", + "start": 77.1, + "end": 77.18, + "confidence": 0.72 + }, + { + "text": "fait", + "start": 77.18, + "end": 77.34, + "confidence": 0.997 + }, + { + "text": "une", + "start": 77.34, + "end": 77.5, + "confidence": 0.979 + }, + { + "text": "espèce", + "start": 77.5, + "end": 77.88, + "confidence": 0.997 + }, + { + "text": "de", + "start": 77.88, + "end": 78.28, + "confidence": 0.997 + }, + { + "text": "relation,", + "start": 78.28, + "end": 78.54, + "confidence": 0.422 + }, + { + "text": "de", + "start": 78.66, + "end": 78.96, + "confidence": 0.992 + }, + { + "text": "médiation", + "start": 78.96, + "end": 79.52, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 79.52, + "end": 79.76, + "confidence": 0.954 + }, + { + "text": "le", + "start": 79.76, + "end": 80.02, + "confidence": 0.998 + }, + { + "text": "monde", + "start": 80.02, + "end": 80.3, + "confidence": 0.996 + }, + { + "text": "qui", + "start": 80.3, + "end": 81.24, + "confidence": 0.661 + }, + { + "text": "rend", + "start": 81.24, + "end": 81.66, + "confidence": 0.968 + }, + { + "text": "de", + "start": 81.66, + "end": 81.74, + "confidence": 0.688 + }, + { + "text": "l'ampleur", + "start": 81.74, + "end": 82.02, + "confidence": 0.995 + }, + { + "text": "et", + "start": 82.02, + "end": 82.1, + "confidence": 0.952 + }, + { + "text": "qui", + "start": 82.1, + "end": 82.2, + "confidence": 0.987 + }, + { + "text": "amène", + "start": 82.2, + "end": 82.38, + "confidence": 0.971 + }, + { + "text": "aussi", + "start": 82.38, + "end": 82.54, + "confidence": 0.946 + }, + { + "text": "à", + "start": 82.54, + "end": 82.62, + "confidence": 0.959 + }, + { + "text": "des", + "start": 82.62, + "end": 82.7, + "confidence": 0.992 + }, + { + "text": "formes", + "start": 82.7, + "end": 82.88, + "confidence": 0.995 + }, + { + "text": "de", + "start": 82.88, + "end": 83.04, + "confidence": 0.998 + }, + { + "text": "rejet.", + "start": 83.04, + "end": 83.3, + "confidence": 0.802 + } + ] + }, + { + "id": 17, + "seek": 7134, + "start": 83.92, + "end": 87.7, + "text": " Donc à objet inédit, rapport inédit.", + "tokens": [ + 50989, + 7477, + 1531, + 14964, + 294, + 7811, + 270, + 11, + 18018, + 294, + 7811, + 270, + 13, + 51189 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.901, + "words": [ + { + "text": "Donc", + "start": 83.92, + "end": 84.46, + "confidence": 0.984 + }, + { + "text": "à", + "start": 84.46, + "end": 84.98, + "confidence": 0.483 + }, + { + "text": "objet", + "start": 84.98, + "end": 85.46, + "confidence": 0.77 + }, + { + "text": "inédit,", + "start": 85.46, + "end": 86.2, + "confidence": 0.994 + }, + { + "text": "rapport", + "start": 86.54, + "end": 86.9, + "confidence": 0.986 + }, + { + "text": "inédit.", + "start": 86.9, + "end": 87.7, + "confidence": 0.998 + } + ] + }, + { + "id": 18, + "seek": 7134, + "start": 88.02, + "end": 94.92, + "text": " Et ce rapport, si j'en crois à Nicolas, serait caractérisé par un mélange de dépendance et de rejet.", + "tokens": [ + 51189, + 3790, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 21724, + 1531, + 38268, + 11, + 23139, + 1032, + 578, + 4198, + 22118, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 1030, + 368, + 319, + 7108, + 13, + 51539 + ], + "temperature": 0.0, + "avg_logprob": -0.11225989713507184, + "compression_ratio": 1.5975609756097562, + "no_speech_prob": 0.015181643888354301, + "confidence": 0.975, + "words": [ + { + "text": "Et", + "start": 88.02, + "end": 88.62, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 88.62, + "end": 88.9, + "confidence": 0.985 + }, + { + "text": "rapport,", + "start": 88.9, + "end": 89.32, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.999 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.78, + "confidence": 0.997 + }, + { + "text": "crois", + "start": 89.78, + "end": 89.88, + "confidence": 0.988 + }, + { + "text": "à", + "start": 89.88, + "end": 90.06, + "confidence": 0.724 + }, + { + "text": "Nicolas,", + "start": 90.06, + "end": 90.24, + "confidence": 0.997 + }, + { + "text": "serait", + "start": 90.7, + "end": 91.0, + "confidence": 0.904 + }, + { + "text": "caractérisé", + "start": 91.0, + "end": 91.8, + "confidence": 0.994 + }, + { + "text": "par", + "start": 91.8, + "end": 92.22, + "confidence": 0.995 + }, + { + "text": "un", + "start": 92.22, + "end": 92.52, + "confidence": 0.996 + }, + { + "text": "mélange", + "start": 92.52, + "end": 93.04, + "confidence": 0.999 + }, + { + "text": "de", + "start": 93.04, + "end": 93.46, + "confidence": 0.998 + }, + { + "text": "dépendance", + "start": 93.46, + "end": 94.12, + "confidence": 0.938 + }, + { + "text": "et", + "start": 94.12, + "end": 94.54, + "confidence": 0.998 + }, + { + "text": "de", + "start": 94.54, + "end": 94.72, + "confidence": 0.999 + }, + { + "text": "rejet.", + "start": 94.72, + "end": 94.92, + "confidence": 0.988 + } + ] + }, + { + "id": 19, + "seek": 9484, + "start": 95.74, + "end": 102.82, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objets techniques et de leur insertion dans nos vies", + "tokens": [ + 50389, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 1111, + 25349, + 7512, + 1030, + 368, + 9580, + 8969, + 313, + 2680, + 3269, + 371, + 530, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.932, + "words": [ + { + "text": "Bon,", + "start": 95.74, + "end": 96.0, + "confidence": 0.751 + }, + { + "text": "en", + "start": 96.34, + "end": 96.54, + "confidence": 0.992 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.86, + "confidence": 0.991 + }, + { + "text": "il", + "start": 97.06, + "end": 97.18, + "confidence": 0.992 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.58, + "confidence": 0.996 + }, + { + "text": "remonter", + "start": 97.58, + "end": 98.06, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.06, + "end": 98.58, + "confidence": 0.995 + }, + { + "text": "très", + "start": 98.58, + "end": 98.84, + "confidence": 0.757 + }, + { + "text": "finement", + "start": 98.84, + "end": 99.38, + "confidence": 0.823 + }, + { + "text": "toute", + "start": 99.38, + "end": 99.7, + "confidence": 0.937 + }, + { + "text": "l'histoire", + "start": 99.7, + "end": 100.08, + "confidence": 0.996 + }, + { + "text": "des", + "start": 100.08, + "end": 100.26, + "confidence": 0.991 + }, + { + "text": "objets", + "start": 100.26, + "end": 100.52, + "confidence": 0.998 + }, + { + "text": "techniques", + "start": 100.52, + "end": 100.94, + "confidence": 0.982 + }, + { + "text": "et", + "start": 100.94, + "end": 101.54, + "confidence": 0.512 + }, + { + "text": "de", + "start": 101.54, + "end": 101.7, + "confidence": 0.984 + }, + { + "text": "leur", + "start": 101.7, + "end": 101.84, + "confidence": 0.818 + }, + { + "text": "insertion", + "start": 101.84, + "end": 102.34, + "confidence": 0.994 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.969 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.66, + "confidence": 0.998 + }, + { + "text": "vies", + "start": 102.66, + "end": 102.82, + "confidence": 0.998 + } + ] + }, + { + "id": 20, + "seek": 9484, + "start": 102.86, + "end": 105.72, + "text": " pour déterminer si ce rapport est totalement inédit.", + "tokens": [ + 50739, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 294, + 7811, + 270, + 13, + 50889 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.995, + "words": [ + { + "text": "pour", + "start": 102.86, + "end": 103.1, + "confidence": 0.989 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.995 + }, + { + "text": "si", + "start": 103.64, + "end": 103.76, + "confidence": 0.986 + }, + { + "text": "ce", + "start": 103.76, + "end": 103.94, + "confidence": 0.991 + }, + { + "text": "rapport", + "start": 103.94, + "end": 104.26, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.26, + "end": 104.84, + "confidence": 0.997 + }, + { + "text": "totalement", + "start": 104.84, + "end": 105.3, + "confidence": 0.998 + }, + { + "text": "inédit.", + "start": 105.3, + "end": 105.72, + "confidence": 0.998 + } + ] + }, + { + "id": 21, + "seek": 9484, + "start": 106.14, + "end": 109.32, + "text": " Mais j'ai l'impression comme ça que Nicolas ne se trompe pas vraiment.", + "tokens": [ + 50889, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 408, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51089 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.932, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.4, + "confidence": 0.975 + }, + { + "text": "j'ai", + "start": 106.4, + "end": 106.92, + "confidence": 0.956 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.38, + "confidence": 0.999 + }, + { + "text": "comme", + "start": 107.38, + "end": 107.58, + "confidence": 0.513 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.82, + "confidence": 0.969 + }, + { + "text": "que", + "start": 107.82, + "end": 108.14, + "confidence": 0.973 + }, + { + "text": "Nicolas", + "start": 108.14, + "end": 108.48, + "confidence": 0.983 + }, + { + "text": "ne", + "start": 108.48, + "end": 108.6, + "confidence": 0.72 + }, + { + "text": "se", + "start": 108.6, + "end": 108.72, + "confidence": 0.992 + }, + { + "text": "trompe", + "start": 108.72, + "end": 108.88, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 108.88, + "end": 109.08, + "confidence": 0.999 + }, + { + "text": "vraiment.", + "start": 109.08, + "end": 109.32, + "confidence": 0.989 + } + ] + }, + { + "id": 22, + "seek": 9484, + "start": 109.94, + "end": 115.06, + "text": " Pour autant que je sache, il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 51089, + 8732, + 34081, + 631, + 1506, + 262, + 6000, + 11, + 1930, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 51389 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.964, + "words": [ + { + "text": "Pour", + "start": 109.94, + "end": 110.1, + "confidence": 0.995 + }, + { + "text": "autant", + "start": 110.1, + "end": 110.24, + "confidence": 1.0 + }, + { + "text": "que", + "start": 110.24, + "end": 110.38, + "confidence": 0.984 + }, + { + "text": "je", + "start": 110.38, + "end": 110.52, + "confidence": 0.998 + }, + { + "text": "sache,", + "start": 110.52, + "end": 110.84, + "confidence": 0.952 + }, + { + "text": "il", + "start": 111.08, + "end": 111.16, + "confidence": 0.994 + }, + { + "text": "y", + "start": 111.16, + "end": 111.3, + "confidence": 0.995 + }, + { + "text": "a", + "start": 111.3, + "end": 111.32, + "confidence": 0.993 + }, + { + "text": "eu", + "start": 111.32, + "end": 111.62, + "confidence": 0.998 + }, + { + "text": "plein", + "start": 111.62, + "end": 111.9, + "confidence": 0.966 + }, + { + "text": "de", + "start": 111.9, + "end": 112.14, + "confidence": 0.997 + }, + { + "text": "discussions", + "start": 112.14, + "end": 112.66, + "confidence": 0.83 + }, + { + "text": "autour", + "start": 112.66, + "end": 113.02, + "confidence": 0.995 + }, + { + "text": "de", + "start": 113.02, + "end": 113.38, + "confidence": 0.996 + }, + { + "text": "la", + "start": 113.38, + "end": 113.52, + "confidence": 0.998 + }, + { + "text": "voiture", + "start": 113.52, + "end": 113.88, + "confidence": 1.0 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.06, + "confidence": 0.69 + }, + { + "text": "même", + "start": 114.06, + "end": 114.34, + "confidence": 0.995 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.992 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.06, + "confidence": 0.999 + } + ] + }, + { + "id": 23, + "seek": 9484, + "start": 115.48, + "end": 117.7, + "text": " Mais la dépendance n'était pas du même ordre.", + "tokens": [ + 51389, + 6313, + 635, + 45768, + 719, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51489 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.992, + "words": [ + { + "text": "Mais", + "start": 115.48, + "end": 115.78, + "confidence": 0.993 + }, + { + "text": "la", + "start": 115.78, + "end": 116.04, + "confidence": 0.943 + }, + { + "text": "dépendance", + "start": 116.04, + "end": 116.42, + "confidence": 0.996 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.7, + "confidence": 0.994 + }, + { + "text": "pas", + "start": 116.7, + "end": 117.0, + "confidence": 0.998 + }, + { + "text": "du", + "start": 117.0, + "end": 117.2, + "confidence": 0.996 + }, + { + "text": "même", + "start": 117.2, + "end": 117.44, + "confidence": 0.998 + }, + { + "text": "ordre.", + "start": 117.44, + "end": 117.7, + "confidence": 0.999 + } + ] + }, + { + "id": 24, + "seek": 9484, + "start": 117.72, + "end": 119.78, + "text": " Donc le rejet non plus n'était pas du même ordre.", + "tokens": [ + 51489, + 7477, + 476, + 319, + 7108, + 2107, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 4792, + 265, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.979, + "words": [ + { + "text": "Donc", + "start": 117.72, + "end": 118.02, + "confidence": 0.959 + }, + { + "text": "le", + "start": 118.02, + "end": 118.4, + "confidence": 0.819 + }, + { + "text": "rejet", + "start": 118.4, + "end": 118.64, + "confidence": 0.999 + }, + { + "text": "non", + "start": 118.64, + "end": 118.82, + "confidence": 0.982 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.94, + "confidence": 0.994 + }, + { + "text": "n'était", + "start": 118.94, + "end": 119.18, + "confidence": 0.991 + }, + { + "text": "pas", + "start": 119.18, + "end": 119.3, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.3, + "end": 119.42, + "confidence": 0.995 + }, + { + "text": "même", + "start": 119.42, + "end": 119.58, + "confidence": 0.999 + }, + { + "text": "ordre.", + "start": 119.58, + "end": 119.78, + "confidence": 0.999 + } + ] + }, + { + "id": 25, + "seek": 9484, + "start": 120.04, + "end": 123.1, + "text": " On peut adorer sa bagnole, en avoir besoin pour plein de choses.", + "tokens": [ + 51589, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 1771, + 306, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 51789 + ], + "temperature": 0.0, + "avg_logprob": -0.07819620768229167, + "compression_ratio": 1.6719745222929936, + "no_speech_prob": 0.017434891313314438, + "confidence": 0.971, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.16, + "confidence": 0.757 + }, + { + "text": "peut", + "start": 120.16, + "end": 120.36, + "confidence": 0.997 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.68, + "confidence": 0.991 + }, + { + "text": "sa", + "start": 120.68, + "end": 120.88, + "confidence": 0.98 + }, + { + "text": "bagnole,", + "start": 120.88, + "end": 121.34, + "confidence": 0.985 + }, + { + "text": "en", + "start": 121.42, + "end": 121.56, + "confidence": 0.971 + }, + { + "text": "avoir", + "start": 121.56, + "end": 121.8, + "confidence": 0.997 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.999 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.46, + "confidence": 0.989 + }, + { + "text": "plein", + "start": 122.46, + "end": 122.7, + "confidence": 0.987 + }, + { + "text": "de", + "start": 122.7, + "end": 122.78, + "confidence": 0.997 + }, + { + "text": "choses.", + "start": 122.78, + "end": 123.1, + "confidence": 0.99 + } + ] + }, + { + "id": 26, + "seek": 12334, + "start": 123.38, + "end": 126.44, + "text": " Le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50389, + 1456, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 50539 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.94, + "words": [ + { + "text": "Le", + "start": 123.38, + "end": 123.96, + "confidence": 0.645 + }, + { + "text": "soir,", + "start": 123.96, + "end": 124.54, + "confidence": 0.998 + }, + { + "text": "quand", + "start": 124.74, + "end": 124.94, + "confidence": 0.982 + }, + { + "text": "on", + "start": 124.94, + "end": 125.06, + "confidence": 0.996 + }, + { + "text": "va", + "start": 125.06, + "end": 125.16, + "confidence": 0.99 + }, + { + "text": "se", + "start": 125.16, + "end": 125.26, + "confidence": 0.989 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.7, + "confidence": 0.989 + }, + { + "text": "on", + "start": 125.94, + "end": 126.04, + "confidence": 0.992 + }, + { + "text": "la", + "start": 126.04, + "end": 126.18, + "confidence": 0.852 + }, + { + "text": "laisse.", + "start": 126.18, + "end": 126.44, + "confidence": 0.999 + } + ] + }, + { + "id": 27, + "seek": 12334, + "start": 127.0, + "end": 130.3, + "text": " On ne l'a pas dans la main quand on est au lit, on ne l'emmène pas au chiottes.", + "tokens": [ + 50539, + 1282, + 408, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 6932, + 322, + 871, + 1609, + 7997, + 11, + 322, + 408, + 287, + 6, + 443, + 76, + 18832, + 1736, + 1609, + 13228, + 1521, + 279, + 13, + 50739 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.918, + "words": [ + { + "text": "On", + "start": 127.0, + "end": 127.34, + "confidence": 0.967 + }, + { + "text": "ne", + "start": 127.34, + "end": 127.46, + "confidence": 0.801 + }, + { + "text": "l'a", + "start": 127.46, + "end": 127.56, + "confidence": 0.929 + }, + { + "text": "pas", + "start": 127.56, + "end": 127.68, + "confidence": 0.999 + }, + { + "text": "dans", + "start": 127.68, + "end": 127.86, + "confidence": 0.992 + }, + { + "text": "la", + "start": 127.86, + "end": 128.08, + "confidence": 0.996 + }, + { + "text": "main", + "start": 128.08, + "end": 128.26, + "confidence": 0.999 + }, + { + "text": "quand", + "start": 128.26, + "end": 128.48, + "confidence": 0.936 + }, + { + "text": "on", + "start": 128.48, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.74, + "confidence": 0.992 + }, + { + "text": "au", + "start": 128.74, + "end": 128.94, + "confidence": 0.976 + }, + { + "text": "lit,", + "start": 128.94, + "end": 129.1, + "confidence": 0.999 + }, + { + "text": "on", + "start": 129.16, + "end": 129.28, + "confidence": 0.43 + }, + { + "text": "ne", + "start": 129.28, + "end": 129.3, + "confidence": 0.949 + }, + { + "text": "l'emmène", + "start": 129.3, + "end": 129.58, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 129.58, + "end": 129.7, + "confidence": 0.997 + }, + { + "text": "au", + "start": 129.7, + "end": 129.88, + "confidence": 0.72 + }, + { + "text": "chiottes.", + "start": 129.88, + "end": 130.3, + "confidence": 0.886 + } + ] + }, + { + "id": 28, + "seek": 12334, + "start": 130.84, + "end": 136.88, + "text": " On pouvait être énervé par son môme qui occupait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain.", + "tokens": [ + 50739, + 1282, + 45913, + 7418, + 45045, + 15797, + 971, + 1872, + 275, + 2851, + 1398, + 1956, + 8073, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51039 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.925, + "words": [ + { + "text": "On", + "start": 130.84, + "end": 131.06, + "confidence": 0.996 + }, + { + "text": "pouvait", + "start": 131.06, + "end": 131.26, + "confidence": 0.989 + }, + { + "text": "être", + "start": 131.26, + "end": 131.58, + "confidence": 0.995 + }, + { + "text": "énervé", + "start": 131.58, + "end": 132.22, + "confidence": 0.896 + }, + { + "text": "par", + "start": 132.22, + "end": 132.46, + "confidence": 0.992 + }, + { + "text": "son", + "start": 132.46, + "end": 132.72, + "confidence": 0.998 + }, + { + "text": "môme", + "start": 132.72, + "end": 133.08, + "confidence": 0.758 + }, + { + "text": "qui", + "start": 133.08, + "end": 133.34, + "confidence": 0.915 + }, + { + "text": "occupait", + "start": 133.34, + "end": 133.74, + "confidence": 0.992 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.986 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.06, + "confidence": 0.999 + }, + { + "text": "de", + "start": 134.06, + "end": 134.22, + "confidence": 0.995 + }, + { + "text": "téléphone", + "start": 134.22, + "end": 134.6, + "confidence": 0.992 + }, + { + "text": "pendant", + "start": 134.6, + "end": 134.92, + "confidence": 0.709 + }, + { + "text": "une", + "start": 134.92, + "end": 135.16, + "confidence": 0.78 + }, + { + "text": "heure", + "start": 135.16, + "end": 135.34, + "confidence": 0.995 + }, + { + "text": "chaque", + "start": 135.34, + "end": 135.58, + "confidence": 0.982 + }, + { + "text": "soir", + "start": 135.58, + "end": 135.8, + "confidence": 0.995 + }, + { + "text": "pour", + "start": 135.8, + "end": 135.98, + "confidence": 0.66 + }, + { + "text": "discuter", + "start": 135.98, + "end": 136.3, + "confidence": 0.997 + }, + { + "text": "avec", + "start": 136.3, + "end": 136.5, + "confidence": 0.995 + }, + { + "text": "un", + "start": 136.5, + "end": 136.66, + "confidence": 0.997 + }, + { + "text": "copain.", + "start": 136.66, + "end": 136.88, + "confidence": 0.996 + } + ] + }, + { + "id": 29, + "seek": 12334, + "start": 137.26, + "end": 141.86, + "text": " Mais ça ne ressemblait pas à ce qu'on peut ressentir à voir ce même môme aujourd'hui,", + "tokens": [ + 51039, + 6313, + 2788, + 408, + 725, + 15750, + 35235, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 1769, + 5698, + 275, + 2851, + 1398, + 14023, + 6, + 10556, + 11, + 51289 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.956, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.5, + "confidence": 0.989 + }, + { + "text": "ça", + "start": 137.5, + "end": 137.68, + "confidence": 0.928 + }, + { + "text": "ne", + "start": 137.68, + "end": 137.88, + "confidence": 0.999 + }, + { + "text": "ressemblait", + "start": 137.88, + "end": 138.42, + "confidence": 0.993 + }, + { + "text": "pas", + "start": 138.42, + "end": 138.78, + "confidence": 0.994 + }, + { + "text": "à", + "start": 138.78, + "end": 138.9, + "confidence": 0.989 + }, + { + "text": "ce", + "start": 138.9, + "end": 138.98, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 138.98, + "end": 139.12, + "confidence": 0.976 + }, + { + "text": "peut", + "start": 139.12, + "end": 139.48, + "confidence": 0.979 + }, + { + "text": "ressentir", + "start": 139.48, + "end": 140.1, + "confidence": 0.996 + }, + { + "text": "à", + "start": 140.1, + "end": 140.32, + "confidence": 0.575 + }, + { + "text": "voir", + "start": 140.32, + "end": 140.46, + "confidence": 0.728 + }, + { + "text": "ce", + "start": 140.46, + "end": 140.7, + "confidence": 0.985 + }, + { + "text": "même", + "start": 140.7, + "end": 140.94, + "confidence": 0.982 + }, + { + "text": "môme", + "start": 140.94, + "end": 141.28, + "confidence": 0.997 + }, + { + "text": "aujourd'hui,", + "start": 141.28, + "end": 141.86, + "confidence": 0.994 + } + ] + }, + { + "id": 30, + "seek": 12334, + "start": 142.16, + "end": 144.22, + "text": " continuellement avec son smartphone dans la main,", + "tokens": [ + 51289, + 2354, + 285, + 1712, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.961, + "words": [ + { + "text": "continuellement", + "start": 142.16, + "end": 142.94, + "confidence": 0.963 + }, + { + "text": "avec", + "start": 142.94, + "end": 143.2, + "confidence": 0.844 + }, + { + "text": "son", + "start": 143.2, + "end": 143.42, + "confidence": 0.994 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.76, + "confidence": 0.972 + }, + { + "text": "dans", + "start": 143.76, + "end": 143.92, + "confidence": 0.963 + }, + { + "text": "la", + "start": 143.92, + "end": 144.02, + "confidence": 0.997 + }, + { + "text": "main,", + "start": 144.02, + "end": 144.22, + "confidence": 0.997 + } + ] + }, + { + "id": 31, + "seek": 12334, + "start": 144.34, + "end": 148.8, + "text": " comme si c'était une sorte de pacemaker externe, comme si le lâcher allait entraîner sa mort immédiate.", + "tokens": [ + 51414, + 5173, + 1511, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 15165, + 49523, + 454, + 391, + 716, + 11, + 5173, + 1511, + 476, + 48835, + 6759, + 439, + 1001, + 22284, + 7517, + 1193, + 601, + 6599, + 3397, + 526, + 4504, + 473, + 13, + 51639 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.973, + "words": [ + { + "text": "comme", + "start": 144.34, + "end": 144.56, + "confidence": 0.985 + }, + { + "text": "si", + "start": 144.56, + "end": 144.66, + "confidence": 0.971 + }, + { + "text": "c'était", + "start": 144.66, + "end": 144.86, + "confidence": 0.987 + }, + { + "text": "une", + "start": 144.86, + "end": 144.98, + "confidence": 0.983 + }, + { + "text": "sorte", + "start": 144.98, + "end": 145.14, + "confidence": 0.992 + }, + { + "text": "de", + "start": 145.14, + "end": 145.34, + "confidence": 0.989 + }, + { + "text": "pacemaker", + "start": 145.34, + "end": 145.82, + "confidence": 0.931 + }, + { + "text": "externe,", + "start": 145.82, + "end": 146.4, + "confidence": 0.99 + }, + { + "text": "comme", + "start": 146.44, + "end": 146.6, + "confidence": 0.721 + }, + { + "text": "si", + "start": 146.6, + "end": 146.74, + "confidence": 0.993 + }, + { + "text": "le", + "start": 146.74, + "end": 147.0, + "confidence": 0.996 + }, + { + "text": "lâcher", + "start": 147.0, + "end": 147.36, + "confidence": 0.968 + }, + { + "text": "allait", + "start": 147.36, + "end": 147.58, + "confidence": 0.991 + }, + { + "text": "entraîner", + "start": 147.58, + "end": 147.88, + "confidence": 0.982 + }, + { + "text": "sa", + "start": 147.88, + "end": 148.02, + "confidence": 0.999 + }, + { + "text": "mort", + "start": 148.02, + "end": 148.3, + "confidence": 0.998 + }, + { + "text": "immédiate.", + "start": 148.3, + "end": 148.8, + "confidence": 0.998 + } + ] + }, + { + "id": 32, + "seek": 12334, + "start": 148.94, + "end": 152.02, + "text": " Bon, je dis ça pour le môme, mais c'est évidemment valable pour nous aussi.", + "tokens": [ + 51639, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 2851, + 1398, + 11, + 2420, + 269, + 6, + 377, + 24724, + 1323, + 712, + 2016, + 4666, + 6212, + 13, + 51839 + ], + "temperature": 0.0, + "avg_logprob": -0.08275167058023174, + "compression_ratio": 1.6818181818181819, + "no_speech_prob": 0.01115426979959011, + "confidence": 0.973, + "words": [ + { + "text": "Bon,", + "start": 148.94, + "end": 149.24, + "confidence": 0.804 + }, + { + "text": "je", + "start": 149.26, + "end": 149.34, + "confidence": 0.933 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.46, + "confidence": 0.982 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.992 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.78, + "confidence": 0.997 + }, + { + "text": "le", + "start": 149.78, + "end": 149.9, + "confidence": 0.994 + }, + { + "text": "môme,", + "start": 149.9, + "end": 150.1, + "confidence": 0.999 + }, + { + "text": "mais", + "start": 150.32, + "end": 150.5, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 150.5, + "end": 150.84, + "confidence": 0.979 + }, + { + "text": "évidemment", + "start": 150.84, + "end": 151.18, + "confidence": 0.979 + }, + { + "text": "valable", + "start": 151.18, + "end": 151.48, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 151.48, + "end": 151.64, + "confidence": 0.996 + }, + { + "text": "nous", + "start": 151.64, + "end": 151.78, + "confidence": 0.998 + }, + { + "text": "aussi.", + "start": 151.78, + "end": 152.02, + "confidence": 0.997 + } + ] + }, + { + "id": 33, + "seek": 15284, + "start": 153.14, + "end": 154.68, + "text": " Donc, rapport inédit, d'accord.", + "tokens": [ + 50389, + 7477, + 11, + 18018, + 294, + 7811, + 270, + 11, + 274, + 6, + 19947, + 13, + 50489 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.813, + "words": [ + { + "text": "Donc,", + "start": 153.14, + "end": 153.24, + "confidence": 0.232 + }, + { + "text": "rapport", + "start": 153.26, + "end": 153.62, + "confidence": 0.855 + }, + { + "text": "inédit,", + "start": 153.62, + "end": 154.18, + "confidence": 0.992 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.68, + "confidence": 0.997 + } + ] + }, + { + "id": 34, + "seek": 15284, + "start": 155.64, + "end": 158.52, + "text": " Mais pourquoi a-t-on l'impression qu'on n'en sortira jamais ?", + "tokens": [ + 50489, + 6313, + 19934, + 257, + 12, + 83, + 12, + 266, + 287, + 6, + 36107, + 421, + 6, + 266, + 297, + 6, + 268, + 1333, + 4271, + 14540, + 2506, + 50639 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.958, + "words": [ + { + "text": "Mais", + "start": 155.64, + "end": 155.88, + "confidence": 0.991 + }, + { + "text": "pourquoi", + "start": 155.88, + "end": 156.36, + "confidence": 0.992 + }, + { + "text": "a-t-on", + "start": 156.36, + "end": 156.7, + "confidence": 0.95 + }, + { + "text": "l'impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.99 + }, + { + "text": "n'en", + "start": 157.28, + "end": 157.5, + "confidence": 0.859 + }, + { + "text": "sortira", + "start": 157.5, + "end": 157.86, + "confidence": 0.974 + }, + { + "text": "jamais ?", + "start": 157.86, + "end": 158.52, + "confidence": 0.997 + } + ] + }, + { + "id": 35, + "seek": 15284, + "start": 159.12, + "end": 165.36, + "text": " Est-ce qu'il faut en remettre la faute sur les gens qui ont créé cet outil merveilleux et diabolique, et diabolique parce que merveilleux ?", + "tokens": [ + 50639, + 4410, + 12, + 384, + 421, + 6, + 388, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 8603, + 484, + 388, + 3551, + 303, + 3409, + 2449, + 1030, + 1026, + 14923, + 1925, + 11, + 1030, + 1026, + 14923, + 1925, + 6992, + 631, + 3551, + 303, + 3409, + 2449, + 2506, + 50989 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.959, + "words": [ + { + "text": "Est-ce", + "start": 159.12, + "end": 159.34, + "confidence": 0.987 + }, + { + "text": "qu'il", + "start": 159.34, + "end": 159.46, + "confidence": 0.993 + }, + { + "text": "faut", + "start": 159.46, + "end": 159.64, + "confidence": 0.998 + }, + { + "text": "en", + "start": 159.64, + "end": 159.88, + "confidence": 0.961 + }, + { + "text": "remettre", + "start": 159.88, + "end": 160.14, + "confidence": 0.999 + }, + { + "text": "la", + "start": 160.14, + "end": 160.46, + "confidence": 0.995 + }, + { + "text": "faute", + "start": 160.46, + "end": 160.64, + "confidence": 0.986 + }, + { + "text": "sur", + "start": 160.64, + "end": 161.06, + "confidence": 0.982 + }, + { + "text": "les", + "start": 161.06, + "end": 161.3, + "confidence": 0.991 + }, + { + "text": "gens", + "start": 161.3, + "end": 161.46, + "confidence": 0.998 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.56, + "confidence": 0.724 + }, + { + "text": "ont", + "start": 161.56, + "end": 161.82, + "confidence": 0.996 + }, + { + "text": "créé", + "start": 161.82, + "end": 162.28, + "confidence": 0.985 + }, + { + "text": "cet", + "start": 162.28, + "end": 162.48, + "confidence": 0.816 + }, + { + "text": "outil", + "start": 162.48, + "end": 162.78, + "confidence": 0.993 + }, + { + "text": "merveilleux", + "start": 162.78, + "end": 163.36, + "confidence": 0.981 + }, + { + "text": "et", + "start": 163.36, + "end": 163.5, + "confidence": 0.983 + }, + { + "text": "diabolique,", + "start": 163.5, + "end": 163.84, + "confidence": 0.991 + }, + { + "text": "et", + "start": 163.86, + "end": 164.0, + "confidence": 0.555 + }, + { + "text": "diabolique", + "start": 164.0, + "end": 164.4, + "confidence": 0.949 + }, + { + "text": "parce", + "start": 164.4, + "end": 164.66, + "confidence": 0.927 + }, + { + "text": "que", + "start": 164.66, + "end": 164.86, + "confidence": 0.979 + }, + { + "text": "merveilleux ?", + "start": 164.86, + "end": 165.36, + "confidence": 0.997 + } + ] + }, + { + "id": 36, + "seek": 15284, + "start": 166.84, + "end": 168.82, + "text": " Les économistes parlent de dépendance du sentier.", + "tokens": [ + 51039, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 2279, + 811, + 13, + 51139 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.986, + "words": [ + { + "text": "Les", + "start": 166.84, + "end": 167.08, + "confidence": 0.945 + }, + { + "text": "économistes", + "start": 167.08, + "end": 167.46, + "confidence": 0.997 + }, + { + "text": "parlent", + "start": 167.46, + "end": 167.72, + "confidence": 0.993 + }, + { + "text": "de", + "start": 167.72, + "end": 167.88, + "confidence": 0.992 + }, + { + "text": "dépendance", + "start": 167.88, + "end": 168.34, + "confidence": 0.986 + }, + { + "text": "du", + "start": 168.34, + "end": 168.52, + "confidence": 0.996 + }, + { + "text": "sentier.", + "start": 168.52, + "end": 168.82, + "confidence": 0.983 + } + ] + }, + { + "id": 37, + "seek": 15284, + "start": 168.98, + "end": 177.38, + "text": " C'est l'idée qu'on met sur un sentier qui a été établi, soit volontairement en marchant dessus, soit en définissant des bornes, en définissant une signalétique.", + "tokens": [ + 51139, + 383, + 6, + 377, + 287, + 6, + 34281, + 421, + 6, + 266, + 1131, + 1022, + 517, + 2279, + 811, + 1956, + 257, + 8862, + 4823, + 455, + 2081, + 11, + 12703, + 40005, + 9020, + 518, + 465, + 8368, + 394, + 30677, + 11, + 12703, + 465, + 40763, + 29492, + 730, + 4232, + 279, + 11, + 465, + 40763, + 29492, + 2251, + 6358, + 42379, + 13, + 51589 + ], + "temperature": 0.0, + "avg_logprob": -0.1240098174189178, + "compression_ratio": 1.6703296703296704, + "no_speech_prob": 0.07031125575304031, + "confidence": 0.907, + "words": [ + { + "text": "C'est", + "start": 168.98, + "end": 169.2, + "confidence": 0.996 + }, + { + "text": "l'idée", + "start": 169.2, + "end": 169.38, + "confidence": 0.998 + }, + { + "text": "qu'on", + "start": 169.38, + "end": 169.74, + "confidence": 0.7 + }, + { + "text": "met", + "start": 169.74, + "end": 169.88, + "confidence": 0.648 + }, + { + "text": "sur", + "start": 169.88, + "end": 170.1, + "confidence": 0.995 + }, + { + "text": "un", + "start": 170.1, + "end": 170.52, + "confidence": 0.997 + }, + { + "text": "sentier", + "start": 170.52, + "end": 170.84, + "confidence": 0.991 + }, + { + "text": "qui", + "start": 170.84, + "end": 170.9, + "confidence": 0.955 + }, + { + "text": "a", + "start": 170.9, + "end": 171.0, + "confidence": 0.969 + }, + { + "text": "été", + "start": 171.0, + "end": 171.14, + "confidence": 0.994 + }, + { + "text": "établi,", + "start": 171.14, + "end": 171.5, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 171.76, + "end": 172.1, + "confidence": 0.527 + }, + { + "text": "volontairement", + "start": 172.1, + "end": 172.7, + "confidence": 0.987 + }, + { + "text": "en", + "start": 172.7, + "end": 172.86, + "confidence": 0.927 + }, + { + "text": "marchant", + "start": 172.86, + "end": 173.1, + "confidence": 0.997 + }, + { + "text": "dessus,", + "start": 173.1, + "end": 173.48, + "confidence": 0.995 + }, + { + "text": "soit", + "start": 173.86, + "end": 174.28, + "confidence": 0.749 + }, + { + "text": "en", + "start": 174.28, + "end": 175.16, + "confidence": 0.967 + }, + { + "text": "définissant", + "start": 175.16, + "end": 175.54, + "confidence": 0.981 + }, + { + "text": "des", + "start": 175.54, + "end": 175.74, + "confidence": 0.99 + }, + { + "text": "bornes,", + "start": 175.74, + "end": 175.98, + "confidence": 0.977 + }, + { + "text": "en", + "start": 176.04, + "end": 176.14, + "confidence": 0.672 + }, + { + "text": "définissant", + "start": 176.14, + "end": 176.66, + "confidence": 0.996 + }, + { + "text": "une", + "start": 176.66, + "end": 176.94, + "confidence": 0.983 + }, + { + "text": "signalétique.", + "start": 176.94, + "end": 177.38, + "confidence": 0.686 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/naive.cpu/accurate_apollo11.mp3.words.json b/tests/expected/naive.cpu/accurate_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..960a425ed608e1568f8c2f523148e8ea33ebb33c --- /dev/null +++ b/tests/expected/naive.cpu/accurate_apollo11.mp3.words.json @@ -0,0 +1,2375 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A. All right. Okay. Yeah, sir. Yeah, sir. Let's take that camera. Let's say it makes it want to go on the helmet we were going to have in B-1. The other one. And you can put the other one on the mic helmet. We'll show it to you in a quick screen. Over. All right. Yeah, sir. All right. Yeah, sir. There's a better helmet than B-1. That's the other one. Nice. Let's go in there. At least we're safe. We've got them in there. Helmet bags. And, uh, I guess we have helmets in the helmet bag. At least it's in the helmet bag. Right here. Right here. Yeah, we're taking it next day out of the field up. Hey, we were... You want to hack me on this? Hey, we were... You want to hack me on this? With a cover, I tried it already. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd... You could check it out. It's not much of an order to turn that. So, uh, I guess we're going to come up with this. Let us know. Okay. No problem. Okay. No problem. We'll let you know when the end of... None. Okay.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.78, + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 44, + 12, + 36, + 12, + 38, + 12, + 36, + 12, + 32, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.578, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.426 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.842 + }, + { + "text": "Houston,", + "start": 1.54, + "end": 1.96, + "confidence": 0.666 + }, + { + "text": "we", + "start": 1.96, + "end": 2.0, + "confidence": 0.983 + }, + { + "text": "got", + "start": 2.0, + "end": 2.16, + "confidence": 0.742 + }, + { + "text": "a", + "start": 2.16, + "end": 2.38, + "confidence": 0.993 + }, + { + "text": "recommendation", + "start": 2.38, + "end": 3.1, + "confidence": 0.977 + }, + { + "text": "for", + "start": 3.1, + "end": 3.54, + "confidence": 0.94 + }, + { + "text": "you", + "start": 3.54, + "end": 3.88, + "confidence": 0.993 + }, + { + "text": "on", + "start": 3.88, + "end": 4.26, + "confidence": 0.918 + }, + { + "text": "your", + "start": 4.26, + "end": 4.3, + "confidence": 0.976 + }, + { + "text": "Soyuz-EA", + "start": 4.3, + "end": 5.38, + "confidence": 0.453 + }, + { + "text": "GLEM-E-G-E-A.", + "start": 5.38, + "end": 6.78, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 7.78, + "end": 10.46, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.158, + "words": [ + { + "text": "All", + "start": 7.78, + "end": 7.82, + "confidence": 0.007 + }, + { + "text": "right.", + "start": 7.82, + "end": 10.46, + "confidence": 0.727 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 10.82, + "end": 11.38, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.105, + "words": [ + { + "text": "Okay.", + "start": 10.82, + "end": 11.38, + "confidence": 0.105 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 12.02, + "end": 13.44, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.101, + "words": [ + { + "text": "Yeah,", + "start": 12.02, + "end": 12.76, + "confidence": 0.076 + }, + { + "text": "sir.", + "start": 12.76, + "end": 13.44, + "confidence": 0.135 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.58, + "end": 14.02, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.052, + "words": [ + { + "text": "Yeah,", + "start": 13.58, + "end": 13.84, + "confidence": 0.108 + }, + { + "text": "sir.", + "start": 13.84, + "end": 14.02, + "confidence": 0.025 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 14.04, + "end": 14.6, + "text": " Let's take that camera.", + "tokens": [ + 961, + 311, + 747, + 300, + 2799, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.082, + "words": [ + { + "text": "Let's", + "start": 14.04, + "end": 14.26, + "confidence": 0.043 + }, + { + "text": "take", + "start": 14.26, + "end": 14.3, + "confidence": 0.04 + }, + { + "text": "that", + "start": 14.3, + "end": 14.36, + "confidence": 0.055 + }, + { + "text": "camera.", + "start": 14.36, + "end": 14.6, + "confidence": 0.269 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 14.62, + "end": 19.16, + "text": " Let's say it makes it want to go on the helmet we were going to have in B-1.", + "tokens": [ + 961, + 311, + 584, + 309, + 1669, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.311, + "words": [ + { + "text": "Let's", + "start": 14.62, + "end": 15.34, + "confidence": 0.181 + }, + { + "text": "say", + "start": 15.34, + "end": 15.56, + "confidence": 0.451 + }, + { + "text": "it", + "start": 15.56, + "end": 15.76, + "confidence": 0.043 + }, + { + "text": "makes", + "start": 15.76, + "end": 15.8, + "confidence": 0.62 + }, + { + "text": "it", + "start": 15.8, + "end": 16.04, + "confidence": 0.943 + }, + { + "text": "want", + "start": 16.04, + "end": 16.22, + "confidence": 0.118 + }, + { + "text": "to", + "start": 16.22, + "end": 16.34, + "confidence": 0.953 + }, + { + "text": "go", + "start": 16.34, + "end": 16.46, + "confidence": 0.775 + }, + { + "text": "on", + "start": 16.46, + "end": 16.74, + "confidence": 0.939 + }, + { + "text": "the", + "start": 16.74, + "end": 16.88, + "confidence": 0.532 + }, + { + "text": "helmet", + "start": 16.88, + "end": 17.36, + "confidence": 0.003 + }, + { + "text": "we", + "start": 17.36, + "end": 17.64, + "confidence": 0.018 + }, + { + "text": "were", + "start": 17.64, + "end": 17.86, + "confidence": 0.255 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.654 + }, + { + "text": "to", + "start": 18.06, + "end": 18.26, + "confidence": 0.966 + }, + { + "text": "have", + "start": 18.26, + "end": 18.3, + "confidence": 0.967 + }, + { + "text": "in", + "start": 18.3, + "end": 18.46, + "confidence": 0.899 + }, + { + "text": "B-1.", + "start": 18.46, + "end": 19.16, + "confidence": 0.489 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.4, + "end": 19.94, + "text": " The other one.", + "tokens": [ + 440, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.127, + "words": [ + { + "text": "The", + "start": 19.4, + "end": 19.44, + "confidence": 0.004 + }, + { + "text": "other", + "start": 19.44, + "end": 19.7, + "confidence": 0.144 + }, + { + "text": "one.", + "start": 19.7, + "end": 19.94, + "confidence": 0.656 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 20.12, + "end": 22.92, + "text": " And you can put the other one on the mic helmet.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.394, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.26, + "confidence": 0.358 + }, + { + "text": "you", + "start": 20.26, + "end": 20.38, + "confidence": 0.915 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.122 + }, + { + "text": "put", + "start": 20.54, + "end": 20.7, + "confidence": 0.92 + }, + { + "text": "the", + "start": 20.7, + "end": 20.88, + "confidence": 0.981 + }, + { + "text": "other", + "start": 20.88, + "end": 21.06, + "confidence": 0.997 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.97 + }, + { + "text": "on", + "start": 21.26, + "end": 22.14, + "confidence": 0.971 + }, + { + "text": "the", + "start": 22.14, + "end": 22.18, + "confidence": 0.085 + }, + { + "text": "mic", + "start": 22.18, + "end": 22.62, + "confidence": 0.679 + }, + { + "text": "helmet.", + "start": 22.62, + "end": 22.92, + "confidence": 0.085 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 22.94, + "end": 24.58, + "text": " We'll show it to you in a quick screen.", + "tokens": [ + 492, + 603, + 855, + 309, + 281, + 291, + 294, + 257, + 1702, + 2568, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.124, + "words": [ + { + "text": "We'll", + "start": 22.94, + "end": 23.32, + "confidence": 0.098 + }, + { + "text": "show", + "start": 23.32, + "end": 23.36, + "confidence": 0.008 + }, + { + "text": "it", + "start": 23.36, + "end": 23.54, + "confidence": 0.167 + }, + { + "text": "to", + "start": 23.54, + "end": 23.64, + "confidence": 0.268 + }, + { + "text": "you", + "start": 23.64, + "end": 23.8, + "confidence": 0.656 + }, + { + "text": "in", + "start": 23.8, + "end": 23.98, + "confidence": 0.246 + }, + { + "text": "a", + "start": 23.98, + "end": 24.02, + "confidence": 0.683 + }, + { + "text": "quick", + "start": 24.02, + "end": 24.14, + "confidence": 0.022 + }, + { + "text": "screen.", + "start": 24.14, + "end": 24.58, + "confidence": 0.114 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 24.94, + "end": 25.18, + "text": " Over.", + "tokens": [ + 4886, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5336953440020161, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.45046156644821167, + "confidence": 0.117, + "words": [ + { + "text": "Over.", + "start": 24.94, + "end": 25.18, + "confidence": 0.117 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 25.52, + "end": 27.48, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.094, + "words": [ + { + "text": "All", + "start": 25.52, + "end": 25.56, + "confidence": 0.002 + }, + { + "text": "right.", + "start": 25.56, + "end": 27.48, + "confidence": 0.622 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 31.32, + "end": 32.12, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.045, + "words": [ + { + "text": "Yeah,", + "start": 31.32, + "end": 31.6, + "confidence": 0.032 + }, + { + "text": "sir.", + "start": 31.6, + "end": 32.12, + "confidence": 0.063 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 32.66, + "end": 33.06, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.199, + "words": [ + { + "text": "All", + "start": 32.66, + "end": 32.94, + "confidence": 0.025 + }, + { + "text": "right.", + "start": 32.94, + "end": 33.06, + "confidence": 0.56 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 33.08, + "end": 33.56, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.036, + "words": [ + { + "text": "Yeah,", + "start": 33.08, + "end": 33.38, + "confidence": 0.145 + }, + { + "text": "sir.", + "start": 33.38, + "end": 33.56, + "confidence": 0.009 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 33.58, + "end": 35.12, + "text": " There's a better helmet than B-1.", + "tokens": [ + 821, + 311, + 257, + 1101, + 15922, + 813, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.046, + "words": [ + { + "text": "There's", + "start": 33.58, + "end": 33.96, + "confidence": 0.035 + }, + { + "text": "a", + "start": 33.96, + "end": 34.0, + "confidence": 0.302 + }, + { + "text": "better", + "start": 34.0, + "end": 34.04, + "confidence": 0.037 + }, + { + "text": "helmet", + "start": 34.04, + "end": 34.36, + "confidence": 0.009 + }, + { + "text": "than", + "start": 34.36, + "end": 34.7, + "confidence": 0.386 + }, + { + "text": "B-1.", + "start": 34.7, + "end": 35.12, + "confidence": 0.031 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 35.14, + "end": 35.96, + "text": " That's the other one.", + "tokens": [ + 663, + 311, + 264, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.259, + "words": [ + { + "text": "That's", + "start": 35.14, + "end": 35.48, + "confidence": 0.114 + }, + { + "text": "the", + "start": 35.48, + "end": 35.52, + "confidence": 0.609 + }, + { + "text": "other", + "start": 35.52, + "end": 35.7, + "confidence": 0.954 + }, + { + "text": "one.", + "start": 35.7, + "end": 35.96, + "confidence": 0.2 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 35.98, + "end": 36.24, + "text": " Nice.", + "tokens": [ + 5490, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.008, + "words": [ + { + "text": "Nice.", + "start": 35.98, + "end": 36.24, + "confidence": 0.008 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 37.5, + "end": 38.44, + "text": " Let's go in there.", + "tokens": [ + 961, + 311, + 352, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.127, + "words": [ + { + "text": "Let's", + "start": 37.5, + "end": 38.16, + "confidence": 0.024 + }, + { + "text": "go", + "start": 38.16, + "end": 38.2, + "confidence": 0.624 + }, + { + "text": "in", + "start": 38.2, + "end": 38.3, + "confidence": 0.037 + }, + { + "text": "there.", + "start": 38.3, + "end": 38.44, + "confidence": 0.571 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 38.6, + "end": 39.26, + "text": " At least we're safe.", + "tokens": [ + 1711, + 1935, + 321, + 434, + 3273, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.14, + "words": [ + { + "text": "At", + "start": 38.6, + "end": 38.64, + "confidence": 0.002 + }, + { + "text": "least", + "start": 38.64, + "end": 38.84, + "confidence": 0.269 + }, + { + "text": "we're", + "start": 38.84, + "end": 39.18, + "confidence": 0.138 + }, + { + "text": "safe.", + "start": 39.18, + "end": 39.26, + "confidence": 0.802 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 39.92, + "end": 40.52, + "text": " We've got them in there.", + "tokens": [ + 492, + 600, + 658, + 552, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.071, + "words": [ + { + "text": "We've", + "start": 39.92, + "end": 40.36, + "confidence": 0.042 + }, + { + "text": "got", + "start": 40.36, + "end": 40.4, + "confidence": 0.682 + }, + { + "text": "them", + "start": 40.4, + "end": 40.44, + "confidence": 0.004 + }, + { + "text": "in", + "start": 40.44, + "end": 40.48, + "confidence": 0.041 + }, + { + "text": "there.", + "start": 40.48, + "end": 40.52, + "confidence": 0.216 + } + ] + }, + { + "id": 21, + "seek": 2600, + "start": 40.52, + "end": 41.1, + "text": " Helmet bags.", + "tokens": [ + 6128, + 5537, + 10405, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.002, + "words": [ + { + "text": "Helmet", + "start": 40.52, + "end": 40.8, + "confidence": 0.002 + }, + { + "text": "bags.", + "start": 40.8, + "end": 41.1, + "confidence": 0.002 + } + ] + }, + { + "id": 22, + "seek": 2600, + "start": 41.12, + "end": 43.1, + "text": " And, uh, I guess we have helmets in the helmet bag.", + "tokens": [ + 400, + 11, + 2232, + 11, + 286, + 2041, + 321, + 362, + 42022, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.036, + "words": [ + { + "text": "And,", + "start": 41.12, + "end": 41.5, + "confidence": 0.02 + }, + { + "text": "uh,", + "start": 41.5, + "end": 41.54, + "confidence": 0.55 + }, + { + "text": "I", + "start": 41.54, + "end": 41.58, + "confidence": 0.409 + }, + { + "text": "guess", + "start": 41.58, + "end": 41.64, + "confidence": 0.031 + }, + { + "text": "we", + "start": 41.64, + "end": 41.68, + "confidence": 0.059 + }, + { + "text": "have", + "start": 41.68, + "end": 41.72, + "confidence": 0.018 + }, + { + "text": "helmets", + "start": 41.72, + "end": 41.76, + "confidence": 0.0 + }, + { + "text": "in", + "start": 41.76, + "end": 42.08, + "confidence": 0.025 + }, + { + "text": "the", + "start": 42.08, + "end": 42.38, + "confidence": 0.214 + }, + { + "text": "helmet", + "start": 42.38, + "end": 43.06, + "confidence": 0.001 + }, + { + "text": "bag.", + "start": 43.06, + "end": 43.1, + "confidence": 0.611 + } + ] + }, + { + "id": 23, + "seek": 2600, + "start": 43.56, + "end": 47.42, + "text": " At least it's in the helmet bag.", + "tokens": [ + 1711, + 1935, + 309, + 311, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.051, + "words": [ + { + "text": "At", + "start": 43.56, + "end": 44.06, + "confidence": 0.001 + }, + { + "text": "least", + "start": 44.06, + "end": 44.32, + "confidence": 0.325 + }, + { + "text": "it's", + "start": 44.32, + "end": 44.9, + "confidence": 0.169 + }, + { + "text": "in", + "start": 44.9, + "end": 45.46, + "confidence": 0.03 + }, + { + "text": "the", + "start": 45.46, + "end": 45.5, + "confidence": 0.345 + }, + { + "text": "helmet", + "start": 45.5, + "end": 46.54, + "confidence": 0.004 + }, + { + "text": "bag.", + "start": 46.54, + "end": 47.42, + "confidence": 0.095 + } + ] + }, + { + "id": 24, + "seek": 2600, + "start": 48.22, + "end": 48.44, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.02, + "words": [ + { + "text": "Right", + "start": 48.22, + "end": 48.26, + "confidence": 0.0 + }, + { + "text": "here.", + "start": 48.26, + "end": 48.44, + "confidence": 0.167 + } + ] + }, + { + "id": 25, + "seek": 2600, + "start": 48.46, + "end": 48.76, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.018, + "words": [ + { + "text": "Right", + "start": 48.46, + "end": 48.56, + "confidence": 0.001 + }, + { + "text": "here.", + "start": 48.56, + "end": 48.76, + "confidence": 0.078 + } + ] + }, + { + "id": 26, + "seek": 2600, + "start": 48.78, + "end": 55.4, + "text": " Yeah, we're taking it next day out of the field up.", + "tokens": [ + 865, + 11, + 321, + 434, + 1940, + 309, + 958, + 786, + 484, + 295, + 264, + 2519, + 493, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5317274729410807, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011296384036540985, + "confidence": 0.285, + "words": [ + { + "text": "Yeah,", + "start": 48.78, + "end": 52.1, + "confidence": 0.124 + }, + { + "text": "we're", + "start": 52.1, + "end": 53.64, + "confidence": 0.622 + }, + { + "text": "taking", + "start": 53.64, + "end": 53.68, + "confidence": 0.352 + }, + { + "text": "it", + "start": 53.68, + "end": 53.76, + "confidence": 0.114 + }, + { + "text": "next", + "start": 53.76, + "end": 53.98, + "confidence": 0.411 + }, + { + "text": "day", + "start": 53.98, + "end": 54.2, + "confidence": 0.888 + }, + { + "text": "out", + "start": 54.2, + "end": 54.32, + "confidence": 0.322 + }, + { + "text": "of", + "start": 54.32, + "end": 54.56, + "confidence": 0.473 + }, + { + "text": "the", + "start": 54.56, + "end": 54.7, + "confidence": 0.357 + }, + { + "text": "field", + "start": 54.7, + "end": 55.2, + "confidence": 0.051 + }, + { + "text": "up.", + "start": 55.2, + "end": 55.4, + "confidence": 0.31 + } + ] + }, + { + "id": 27, + "seek": 5500, + "start": 55.42, + "end": 56.32, + "text": " Hey, we were...", + "tokens": [ + 1911, + 11, + 321, + 645, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.009, + "words": [ + { + "text": "Hey,", + "start": 55.42, + "end": 56.24, + "confidence": 0.015 + }, + { + "text": "we", + "start": 56.24, + "end": 56.28, + "confidence": 0.014 + }, + { + "text": "were...", + "start": 56.28, + "end": 56.32, + "confidence": 0.004 + } + ] + }, + { + "id": 28, + "seek": 5500, + "start": 56.5, + "end": 57.52, + "text": " You want to hack me on this?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 341, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.022, + "words": [ + { + "text": "You", + "start": 56.5, + "end": 56.64, + "confidence": 0.006 + }, + { + "text": "want", + "start": 56.64, + "end": 57.02, + "confidence": 0.007 + }, + { + "text": "to", + "start": 57.02, + "end": 57.36, + "confidence": 0.345 + }, + { + "text": "hack", + "start": 57.36, + "end": 57.4, + "confidence": 0.0 + }, + { + "text": "me", + "start": 57.4, + "end": 57.44, + "confidence": 0.088 + }, + { + "text": "on", + "start": 57.44, + "end": 57.48, + "confidence": 0.004 + }, + { + "text": "this?", + "start": 57.48, + "end": 57.52, + "confidence": 0.177 + } + ] + }, + { + "id": 29, + "seek": 5500, + "start": 57.52, + "end": 58.44, + "text": " Hey, we were...", + "tokens": [ + 1911, + 11, + 321, + 645, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.048, + "words": [ + { + "text": "Hey,", + "start": 57.52, + "end": 57.98, + "confidence": 0.033 + }, + { + "text": "we", + "start": 57.98, + "end": 58.14, + "confidence": 0.391 + }, + { + "text": "were...", + "start": 58.14, + "end": 58.44, + "confidence": 0.024 + } + ] + }, + { + "id": 30, + "seek": 5500, + "start": 58.46, + "end": 59.44, + "text": " You want to hack me on this?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 341, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.04, + "words": [ + { + "text": "You", + "start": 58.46, + "end": 58.7, + "confidence": 0.014 + }, + { + "text": "want", + "start": 58.7, + "end": 58.74, + "confidence": 0.017 + }, + { + "text": "to", + "start": 58.74, + "end": 58.78, + "confidence": 0.336 + }, + { + "text": "hack", + "start": 58.78, + "end": 58.82, + "confidence": 0.0 + }, + { + "text": "me", + "start": 58.82, + "end": 59.0, + "confidence": 0.417 + }, + { + "text": "on", + "start": 59.0, + "end": 59.2, + "confidence": 0.277 + }, + { + "text": "this?", + "start": 59.2, + "end": 59.44, + "confidence": 0.138 + } + ] + }, + { + "id": 31, + "seek": 5500, + "start": 59.76, + "end": 61.48, + "text": " With a cover, I tried it already.", + "tokens": [ + 2022, + 257, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.064, + "words": [ + { + "text": "With", + "start": 59.76, + "end": 60.12, + "confidence": 0.027 + }, + { + "text": "a", + "start": 60.12, + "end": 60.84, + "confidence": 0.553 + }, + { + "text": "cover,", + "start": 60.84, + "end": 61.32, + "confidence": 0.167 + }, + { + "text": "I", + "start": 61.32, + "end": 61.36, + "confidence": 0.846 + }, + { + "text": "tried", + "start": 61.36, + "end": 61.4, + "confidence": 0.046 + }, + { + "text": "it", + "start": 61.4, + "end": 61.44, + "confidence": 0.011 + }, + { + "text": "already.", + "start": 61.44, + "end": 61.48, + "confidence": 0.01 + } + ] + }, + { + "id": 32, + "seek": 5500, + "start": 62.38, + "end": 62.46, + "text": " Okay, fine.", + "tokens": [ + 1033, + 11, + 2489, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.062, + "words": [ + { + "text": "Okay,", + "start": 62.38, + "end": 62.42, + "confidence": 0.06 + }, + { + "text": "fine.", + "start": 62.42, + "end": 62.46, + "confidence": 0.065 + } + ] + }, + { + "id": 33, + "seek": 5500, + "start": 62.56, + "end": 63.44, + "text": " We weren't sure of that.", + "tokens": [ + 492, + 4999, + 380, + 988, + 295, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.043, + "words": [ + { + "text": "We", + "start": 62.56, + "end": 63.06, + "confidence": 0.003 + }, + { + "text": "weren't", + "start": 63.06, + "end": 63.1, + "confidence": 0.155 + }, + { + "text": "sure", + "start": 63.1, + "end": 63.14, + "confidence": 0.01 + }, + { + "text": "of", + "start": 63.14, + "end": 63.18, + "confidence": 0.018 + }, + { + "text": "that.", + "start": 63.18, + "end": 63.44, + "confidence": 0.144 + } + ] + }, + { + "id": 34, + "seek": 5500, + "start": 63.46, + "end": 63.78, + "text": " Just a suggestion.", + "tokens": [ + 1449, + 257, + 16541, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.004, + "words": [ + { + "text": "Just", + "start": 63.46, + "end": 63.5, + "confidence": 0.003 + }, + { + "text": "a", + "start": 63.5, + "end": 63.74, + "confidence": 0.015 + }, + { + "text": "suggestion.", + "start": 63.74, + "end": 63.78, + "confidence": 0.002 + } + ] + }, + { + "id": 35, + "seek": 5500, + "start": 63.78, + "end": 65.26, + "text": " We thought we'd...", + "tokens": [ + 492, + 1194, + 321, + 1116, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.011, + "words": [ + { + "text": "We", + "start": 63.78, + "end": 64.9, + "confidence": 0.003 + }, + { + "text": "thought", + "start": 64.9, + "end": 65.2, + "confidence": 0.001 + }, + { + "text": "we'd...", + "start": 65.2, + "end": 65.26, + "confidence": 0.036 + } + ] + }, + { + "id": 36, + "seek": 5500, + "start": 66.26, + "end": 66.46, + "text": " You could check it out.", + "tokens": [ + 509, + 727, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.03, + "words": [ + { + "text": "You", + "start": 66.26, + "end": 66.3, + "confidence": 0.004 + }, + { + "text": "could", + "start": 66.3, + "end": 66.34, + "confidence": 0.002 + }, + { + "text": "check", + "start": 66.34, + "end": 66.38, + "confidence": 0.003 + }, + { + "text": "it", + "start": 66.38, + "end": 66.42, + "confidence": 0.16 + }, + { + "text": "out.", + "start": 66.42, + "end": 66.46, + "confidence": 0.442 + } + ] + }, + { + "id": 37, + "seek": 5500, + "start": 66.82, + "end": 69.34, + "text": " It's not much of an order to turn that.", + "tokens": [ + 467, + 311, + 406, + 709, + 295, + 364, + 1668, + 281, + 1261, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.136, + "words": [ + { + "text": "It's", + "start": 66.82, + "end": 67.32, + "confidence": 0.035 + }, + { + "text": "not", + "start": 67.32, + "end": 68.36, + "confidence": 0.025 + }, + { + "text": "much", + "start": 68.36, + "end": 68.62, + "confidence": 0.683 + }, + { + "text": "of", + "start": 68.62, + "end": 68.8, + "confidence": 0.59 + }, + { + "text": "an", + "start": 68.8, + "end": 68.86, + "confidence": 0.137 + }, + { + "text": "order", + "start": 68.86, + "end": 68.96, + "confidence": 0.142 + }, + { + "text": "to", + "start": 68.96, + "end": 69.16, + "confidence": 0.043 + }, + { + "text": "turn", + "start": 69.16, + "end": 69.2, + "confidence": 0.1 + }, + { + "text": "that.", + "start": 69.2, + "end": 69.34, + "confidence": 0.536 + } + ] + }, + { + "id": 38, + "seek": 5500, + "start": 70.32, + "end": 72.02, + "text": " So, uh, I guess we're going to come up with this.", + "tokens": [ + 407, + 11, + 2232, + 11, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.495, + "words": [ + { + "text": "So,", + "start": 70.32, + "end": 70.36, + "confidence": 0.528 + }, + { + "text": "uh,", + "start": 70.36, + "end": 70.5, + "confidence": 0.497 + }, + { + "text": "I", + "start": 70.5, + "end": 70.54, + "confidence": 0.958 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.84, + "confidence": 0.986 + }, + { + "text": "we're", + "start": 70.84, + "end": 71.12, + "confidence": 0.504 + }, + { + "text": "going", + "start": 71.12, + "end": 71.3, + "confidence": 0.143 + }, + { + "text": "to", + "start": 71.3, + "end": 71.48, + "confidence": 0.956 + }, + { + "text": "come", + "start": 71.48, + "end": 71.52, + "confidence": 0.547 + }, + { + "text": "up", + "start": 71.52, + "end": 71.7, + "confidence": 0.796 + }, + { + "text": "with", + "start": 71.7, + "end": 71.88, + "confidence": 0.807 + }, + { + "text": "this.", + "start": 71.88, + "end": 72.02, + "confidence": 0.181 + } + ] + }, + { + "id": 39, + "seek": 5500, + "start": 72.04, + "end": 72.52, + "text": " Let us know.", + "tokens": [ + 961, + 505, + 458, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.323, + "words": [ + { + "text": "Let", + "start": 72.04, + "end": 72.14, + "confidence": 0.076 + }, + { + "text": "us", + "start": 72.14, + "end": 72.34, + "confidence": 0.304 + }, + { + "text": "know.", + "start": 72.34, + "end": 72.52, + "confidence": 0.686 + } + ] + }, + { + "id": 40, + "seek": 5500, + "start": 72.54, + "end": 72.7, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.026, + "words": [ + { + "text": "Okay.", + "start": 72.54, + "end": 72.7, + "confidence": 0.026 + } + ] + }, + { + "id": 41, + "seek": 5500, + "start": 74.2, + "end": 75.16, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.082, + "words": [ + { + "text": "No", + "start": 74.2, + "end": 74.42, + "confidence": 0.004 + }, + { + "text": "problem.", + "start": 74.42, + "end": 75.16, + "confidence": 0.351 + } + ] + }, + { + "id": 42, + "seek": 5500, + "start": 75.18, + "end": 75.9, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.237, + "words": [ + { + "text": "Okay.", + "start": 75.18, + "end": 75.9, + "confidence": 0.237 + } + ] + }, + { + "id": 43, + "seek": 5500, + "start": 75.92, + "end": 76.64, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.56, + "words": [ + { + "text": "No", + "start": 75.92, + "end": 76.22, + "confidence": 0.366 + }, + { + "text": "problem.", + "start": 76.22, + "end": 76.64, + "confidence": 0.692 + } + ] + }, + { + "id": 44, + "seek": 5500, + "start": 76.66, + "end": 78.1, + "text": " We'll let you know when the end of...", + "tokens": [ + 492, + 603, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 485 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.367, + "words": [ + { + "text": "We'll", + "start": 76.66, + "end": 76.82, + "confidence": 0.091 + }, + { + "text": "let", + "start": 76.82, + "end": 77.04, + "confidence": 0.723 + }, + { + "text": "you", + "start": 77.04, + "end": 77.18, + "confidence": 0.296 + }, + { + "text": "know", + "start": 77.18, + "end": 77.26, + "confidence": 0.984 + }, + { + "text": "when", + "start": 77.26, + "end": 77.38, + "confidence": 0.596 + }, + { + "text": "the", + "start": 77.38, + "end": 77.44, + "confidence": 0.683 + }, + { + "text": "end", + "start": 77.44, + "end": 77.56, + "confidence": 0.937 + }, + { + "text": "of...", + "start": 77.56, + "end": 78.1, + "confidence": 0.257 + } + ] + }, + { + "id": 45, + "seek": 5500, + "start": 78.12, + "end": 78.34, + "text": " None.", + "tokens": [ + 14492, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.32385124256408293, + "compression_ratio": 1.7088607594936709, + "no_speech_prob": 0.0003275917551945895, + "confidence": 0.017, + "words": [ + { + "text": "None.", + "start": 78.12, + "end": 78.34, + "confidence": 0.017 + } + ] + }, + { + "id": 46, + "seek": 7900, + "start": 78.52, + "end": 82.18, + "text": " Okay.", + "tokens": [ + 50364, + 1033, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.7262506484985352, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.0013847488444298506, + "confidence": 0.036, + "words": [ + { + "text": "Okay.", + "start": 78.52, + "end": 82.18, + "confidence": 0.036 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive.cpu/naive_apollo11.mp3.words.json b/tests/expected/naive.cpu/naive_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e4eef5136cb849167d2c3d19c38438515c0e4232 --- /dev/null +++ b/tests/expected/naive.cpu/naive_apollo11.mp3.words.json @@ -0,0 +1,3814 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.68, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.49, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.426 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.54, + "confidence": 0.842 + }, + { + "text": "Houston", + "start": 1.54, + "end": 1.8, + "confidence": 0.974 + }, + { + "text": "we", + "start": 1.8, + "end": 1.98, + "confidence": 0.453 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.789 + }, + { + "text": "a", + "start": 2.16, + "end": 2.36, + "confidence": 0.992 + }, + { + "text": "recommendation", + "start": 2.36, + "end": 3.1, + "confidence": 0.971 + }, + { + "text": "for", + "start": 3.1, + "end": 3.54, + "confidence": 0.944 + }, + { + "text": "you", + "start": 3.54, + "end": 3.88, + "confidence": 0.99 + }, + { + "text": "on", + "start": 3.88, + "end": 4.26, + "confidence": 0.935 + }, + { + "text": "your", + "start": 4.26, + "end": 4.3, + "confidence": 0.974 + }, + { + "text": "Soyuz-VA", + "start": 4.3, + "end": 5.38, + "confidence": 0.325 + }, + { + "text": "GLEME", + "start": 5.38, + "end": 6.02, + "confidence": 0.171 + }, + { + "text": "GVA.", + "start": 6.02, + "end": 6.68, + "confidence": 0.318 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.7, + "end": 19.16, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.324, + "words": [ + { + "text": "Alright,", + "start": 6.7, + "end": 12.02, + "confidence": 0.083 + }, + { + "text": "okay,", + "start": 12.02, + "end": 12.82, + "confidence": 0.321 + }, + { + "text": "we", + "start": 12.82, + "end": 13.04, + "confidence": 0.365 + }, + { + "text": "like", + "start": 13.04, + "end": 13.5, + "confidence": 0.475 + }, + { + "text": "to", + "start": 13.5, + "end": 13.68, + "confidence": 0.124 + }, + { + "text": "say", + "start": 13.68, + "end": 14.98, + "confidence": 0.127 + }, + { + "text": "that", + "start": 14.98, + "end": 15.56, + "confidence": 0.244 + }, + { + "text": "they", + "start": 15.56, + "end": 15.6, + "confidence": 0.387 + }, + { + "text": "make", + "start": 15.6, + "end": 15.78, + "confidence": 0.155 + }, + { + "text": "the", + "start": 15.78, + "end": 15.92, + "confidence": 0.172 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.507 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.36, + "confidence": 0.385 + }, + { + "text": "on", + "start": 16.36, + "end": 16.6, + "confidence": 0.324 + }, + { + "text": "the", + "start": 16.6, + "end": 16.84, + "confidence": 0.35 + }, + { + "text": "helmet", + "start": 16.84, + "end": 17.36, + "confidence": 0.218 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.86, + "confidence": 0.361 + }, + { + "text": "going", + "start": 17.86, + "end": 18.06, + "confidence": 0.435 + }, + { + "text": "to", + "start": 18.06, + "end": 18.22, + "confidence": 0.496 + }, + { + "text": "have", + "start": 18.22, + "end": 18.26, + "confidence": 0.831 + }, + { + "text": "in", + "start": 18.26, + "end": 18.48, + "confidence": 0.761 + }, + { + "text": "B1.", + "start": 18.48, + "end": 19.16, + "confidence": 0.731 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 20.12, + "end": 24.64, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7224321867290296, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.45045843720436096, + "confidence": 0.119, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.16, + "confidence": 0.27 + }, + { + "text": "you", + "start": 20.16, + "end": 20.36, + "confidence": 0.884 + }, + { + "text": "can", + "start": 20.36, + "end": 20.54, + "confidence": 0.388 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.929 + }, + { + "text": "the", + "start": 20.72, + "end": 20.9, + "confidence": 0.98 + }, + { + "text": "other", + "start": 20.9, + "end": 21.06, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.964 + }, + { + "text": "on", + "start": 21.26, + "end": 21.8, + "confidence": 0.962 + }, + { + "text": "the", + "start": 21.8, + "end": 21.84, + "confidence": 0.185 + }, + { + "text": "mic", + "start": 21.84, + "end": 22.6, + "confidence": 0.682 + }, + { + "text": "helmet", + "start": 22.6, + "end": 22.96, + "confidence": 0.011 + }, + { + "text": "with", + "start": 22.96, + "end": 23.2, + "confidence": 0.055 + }, + { + "text": "those", + "start": 23.2, + "end": 23.46, + "confidence": 0.084 + }, + { + "text": "GVA", + "start": 23.46, + "end": 23.88, + "confidence": 0.0 + }, + { + "text": "blizzard", + "start": 23.88, + "end": 24.36, + "confidence": 0.026 + }, + { + "text": "frames.", + "start": 24.36, + "end": 24.64, + "confidence": 0.179 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 24.86, + "end": 54.62, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.1079042222764757, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.00111382023897022, + "confidence": 0.887, + "words": [ + { + "text": "Alright,", + "start": 24.86, + "end": 32.0, + "confidence": 0.092 + }, + { + "text": "got", + "start": 32.0, + "end": 32.04, + "confidence": 0.107 + }, + { + "text": "them,", + "start": 32.04, + "end": 32.58, + "confidence": 0.194 + }, + { + "text": "got", + "start": 32.58, + "end": 33.06, + "confidence": 0.39 + }, + { + "text": "them,", + "start": 33.06, + "end": 33.78, + "confidence": 0.616 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.641 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.52, + "confidence": 0.506 + }, + { + "text": "got", + "start": 34.52, + "end": 34.74, + "confidence": 0.662 + }, + { + "text": "them,", + "start": 34.74, + "end": 34.78, + "confidence": 0.507 + }, + { + "text": "got", + "start": 34.78, + "end": 35.02, + "confidence": 0.754 + }, + { + "text": "them,", + "start": 35.02, + "end": 35.06, + "confidence": 0.589 + }, + { + "text": "got", + "start": 35.06, + "end": 35.1, + "confidence": 0.779 + }, + { + "text": "them,", + "start": 35.1, + "end": 35.14, + "confidence": 0.705 + }, + { + "text": "got", + "start": 35.14, + "end": 35.34, + "confidence": 0.86 + }, + { + "text": "them,", + "start": 35.34, + "end": 35.98, + "confidence": 0.82 + }, + { + "text": "got", + "start": 35.98, + "end": 36.02, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.06, + "confidence": 0.76 + }, + { + "text": "got", + "start": 36.06, + "end": 36.1, + "confidence": 0.479 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.14, + "confidence": 0.846 + }, + { + "text": "got", + "start": 36.14, + "end": 36.18, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 36.18, + "end": 36.22, + "confidence": 0.89 + }, + { + "text": "got", + "start": 36.22, + "end": 36.26, + "confidence": 0.928 + }, + { + "text": "them,", + "start": 36.26, + "end": 36.3, + "confidence": 0.903 + }, + { + "text": "got", + "start": 36.3, + "end": 36.34, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.34, + "end": 36.38, + "confidence": 0.91 + }, + { + "text": "got", + "start": 36.38, + "end": 36.42, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.42, + "end": 36.46, + "confidence": 0.904 + }, + { + "text": "got", + "start": 36.46, + "end": 36.5, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 36.5, + "end": 36.54, + "confidence": 0.903 + }, + { + "text": "got", + "start": 36.54, + "end": 36.58, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 36.58, + "end": 36.62, + "confidence": 0.912 + }, + { + "text": "got", + "start": 36.62, + "end": 36.66, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 36.66, + "end": 36.7, + "confidence": 0.918 + }, + { + "text": "got", + "start": 36.7, + "end": 36.74, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 36.74, + "end": 36.78, + "confidence": 0.923 + }, + { + "text": "got", + "start": 36.78, + "end": 36.82, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 36.82, + "end": 36.86, + "confidence": 0.93 + }, + { + "text": "got", + "start": 36.86, + "end": 36.9, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 36.9, + "end": 36.94, + "confidence": 0.937 + }, + { + "text": "got", + "start": 36.94, + "end": 36.98, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.98, + "end": 37.02, + "confidence": 0.942 + }, + { + "text": "got", + "start": 37.02, + "end": 37.06, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 37.06, + "end": 37.1, + "confidence": 0.945 + }, + { + "text": "got", + "start": 37.1, + "end": 37.14, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 37.14, + "end": 37.18, + "confidence": 0.948 + }, + { + "text": "got", + "start": 37.18, + "end": 37.22, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 37.22, + "end": 37.26, + "confidence": 0.951 + }, + { + "text": "got", + "start": 37.26, + "end": 37.3, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 37.3, + "end": 37.34, + "confidence": 0.953 + }, + { + "text": "got", + "start": 37.34, + "end": 37.38, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.38, + "end": 37.42, + "confidence": 0.956 + }, + { + "text": "got", + "start": 37.42, + "end": 37.46, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.5, + "confidence": 0.957 + }, + { + "text": "got", + "start": 37.5, + "end": 37.54, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 37.54, + "end": 37.58, + "confidence": 0.958 + }, + { + "text": "got", + "start": 37.58, + "end": 37.62, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.62, + "end": 37.66, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.66, + "end": 37.7, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 37.7, + "end": 37.74, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.74, + "end": 37.78, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 37.78, + "end": 37.82, + "confidence": 0.962 + }, + { + "text": "got", + "start": 37.82, + "end": 37.86, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 37.86, + "end": 37.9, + "confidence": 0.964 + }, + { + "text": "got", + "start": 37.9, + "end": 37.94, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 37.94, + "end": 37.98, + "confidence": 0.965 + }, + { + "text": "got", + "start": 37.98, + "end": 38.02, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 38.02, + "end": 38.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 38.06, + "end": 38.1, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.1, + "end": 38.14, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.14, + "end": 38.18, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.18, + "end": 38.22, + "confidence": 0.969 + }, + { + "text": "got", + "start": 38.22, + "end": 38.26, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.26, + "end": 38.3, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.3, + "end": 38.34, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.34, + "end": 38.38, + "confidence": 0.971 + }, + { + "text": "got", + "start": 38.38, + "end": 38.42, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 38.42, + "end": 38.46, + "confidence": 0.972 + }, + { + "text": "got", + "start": 38.46, + "end": 38.5, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 38.5, + "end": 38.54, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.54, + "end": 38.58, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.58, + "end": 38.62, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.62, + "end": 38.66, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 38.66, + "end": 38.7, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.7, + "end": 38.74, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 38.74, + "end": 38.78, + "confidence": 0.976 + }, + { + "text": "got", + "start": 38.78, + "end": 38.82, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 38.82, + "end": 38.86, + "confidence": 0.977 + }, + { + "text": "got", + "start": 38.86, + "end": 38.9, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 38.9, + "end": 38.94, + "confidence": 0.978 + }, + { + "text": "got", + "start": 38.94, + "end": 38.98, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 38.98, + "end": 39.02, + "confidence": 0.978 + }, + { + "text": "got", + "start": 39.02, + "end": 39.06, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 39.06, + "end": 39.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.1, + "end": 39.14, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.14, + "end": 39.18, + "confidence": 0.981 + }, + { + "text": "got", + "start": 39.18, + "end": 39.22, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.22, + "end": 39.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.26, + "end": 39.3, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.3, + "end": 39.34, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.34, + "end": 39.38, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 39.38, + "end": 39.42, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.42, + "end": 39.46, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.46, + "end": 39.5, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.5, + "end": 39.54, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.54, + "end": 39.58, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.58, + "end": 39.62, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.62, + "end": 39.66, + "confidence": 0.984 + }, + { + "text": "got", + "start": 39.66, + "end": 39.7, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.7, + "end": 39.74, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.74, + "end": 39.78, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.78, + "end": 39.82, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.82, + "end": 39.86, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.86, + "end": 39.9, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.9, + "end": 39.94, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.94, + "end": 39.98, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.98, + "end": 40.02, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.02, + "end": 40.06, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.06, + "end": 40.1, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.1, + "end": 40.14, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.14, + "end": 40.18, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.18, + "end": 40.22, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.22, + "end": 40.26, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.26, + "end": 40.3, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.3, + "end": 40.34, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.34, + "end": 40.38, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.38, + "end": 40.42, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.42, + "end": 40.46, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.46, + "end": 40.5, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.5, + "end": 40.54, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.54, + "end": 40.58, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.58, + "end": 40.76, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.76, + "end": 40.8, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.8, + "end": 40.84, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.84, + "end": 40.88, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.88, + "end": 40.92, + "confidence": 0.989 + }, + { + "text": "got", + "start": 40.92, + "end": 40.96, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 40.96, + "end": 41.0, + "confidence": 0.989 + }, + { + "text": "got", + "start": 41.0, + "end": 41.04, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.04, + "end": 41.08, + "confidence": 0.989 + }, + { + "text": "got", + "start": 41.08, + "end": 41.12, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.12, + "end": 42.32, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.32, + "end": 42.68, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 42.68, + "end": 42.9, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.9, + "end": 44.08, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.08, + "end": 44.96, + "confidence": 0.989 + }, + { + "text": "got", + "start": 44.96, + "end": 46.9, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.9, + "end": 47.82, + "confidence": 0.989 + }, + { + "text": "got", + "start": 47.82, + "end": 48.56, + "confidence": 0.994 + }, + { + "text": "them", + "start": 48.56, + "end": 54.62, + "confidence": 0.996 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 54.64, + "end": 84.6, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.053046889369263245, + "compression_ratio": 29.52, + "no_speech_prob": 0.24410122632980347, + "confidence": 0.839, + "words": [ + { + "text": "got", + "start": 54.64, + "end": 54.82, + "confidence": 0.004 + }, + { + "text": "them,", + "start": 54.82, + "end": 56.36, + "confidence": 0.007 + }, + { + "text": "got", + "start": 56.36, + "end": 58.46, + "confidence": 0.037 + }, + { + "text": "them,", + "start": 58.46, + "end": 59.7, + "confidence": 0.349 + }, + { + "text": "got", + "start": 59.7, + "end": 60.0, + "confidence": 0.616 + }, + { + "text": "them,", + "start": 60.0, + "end": 60.04, + "confidence": 0.574 + }, + { + "text": "got", + "start": 60.04, + "end": 60.08, + "confidence": 0.808 + }, + { + "text": "them,", + "start": 60.08, + "end": 60.12, + "confidence": 0.636 + }, + { + "text": "got", + "start": 60.12, + "end": 60.16, + "confidence": 0.875 + }, + { + "text": "them,", + "start": 60.16, + "end": 60.2, + "confidence": 0.676 + }, + { + "text": "got", + "start": 60.2, + "end": 60.24, + "confidence": 0.9 + }, + { + "text": "them,", + "start": 60.24, + "end": 60.42, + "confidence": 0.689 + }, + { + "text": "got", + "start": 60.42, + "end": 60.46, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 60.46, + "end": 60.5, + "confidence": 0.757 + }, + { + "text": "got", + "start": 60.5, + "end": 60.62, + "confidence": 0.904 + }, + { + "text": "them,", + "start": 60.62, + "end": 61.78, + "confidence": 0.825 + }, + { + "text": "got", + "start": 61.78, + "end": 61.82, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 61.82, + "end": 61.86, + "confidence": 0.58 + }, + { + "text": "got", + "start": 61.86, + "end": 61.9, + "confidence": 0.123 + }, + { + "text": "them,", + "start": 61.9, + "end": 61.94, + "confidence": 0.75 + }, + { + "text": "got", + "start": 61.94, + "end": 61.98, + "confidence": 0.772 + }, + { + "text": "them,", + "start": 61.98, + "end": 62.02, + "confidence": 0.81 + }, + { + "text": "got", + "start": 62.02, + "end": 62.06, + "confidence": 0.808 + }, + { + "text": "them,", + "start": 62.06, + "end": 62.1, + "confidence": 0.828 + }, + { + "text": "got", + "start": 62.1, + "end": 62.14, + "confidence": 0.82 + }, + { + "text": "them,", + "start": 62.14, + "end": 62.18, + "confidence": 0.852 + }, + { + "text": "got", + "start": 62.18, + "end": 62.22, + "confidence": 0.858 + }, + { + "text": "them,", + "start": 62.22, + "end": 62.26, + "confidence": 0.886 + }, + { + "text": "got", + "start": 62.26, + "end": 62.3, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 62.3, + "end": 62.34, + "confidence": 0.901 + }, + { + "text": "got", + "start": 62.34, + "end": 62.38, + "confidence": 0.89 + }, + { + "text": "them,", + "start": 62.38, + "end": 62.42, + "confidence": 0.905 + }, + { + "text": "got", + "start": 62.42, + "end": 62.46, + "confidence": 0.868 + }, + { + "text": "them,", + "start": 62.46, + "end": 62.5, + "confidence": 0.919 + }, + { + "text": "got", + "start": 62.5, + "end": 62.54, + "confidence": 0.881 + }, + { + "text": "them,", + "start": 62.54, + "end": 62.58, + "confidence": 0.929 + }, + { + "text": "got", + "start": 62.58, + "end": 62.62, + "confidence": 0.893 + }, + { + "text": "them,", + "start": 62.62, + "end": 62.66, + "confidence": 0.938 + }, + { + "text": "got", + "start": 62.66, + "end": 62.7, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 62.7, + "end": 62.74, + "confidence": 0.943 + }, + { + "text": "got", + "start": 62.74, + "end": 62.78, + "confidence": 0.913 + }, + { + "text": "them,", + "start": 62.78, + "end": 62.82, + "confidence": 0.949 + }, + { + "text": "got", + "start": 62.82, + "end": 62.86, + "confidence": 0.919 + }, + { + "text": "them,", + "start": 62.86, + "end": 62.9, + "confidence": 0.952 + }, + { + "text": "got", + "start": 62.9, + "end": 62.94, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 62.94, + "end": 62.98, + "confidence": 0.956 + }, + { + "text": "got", + "start": 62.98, + "end": 63.02, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 63.02, + "end": 63.06, + "confidence": 0.958 + }, + { + "text": "got", + "start": 63.06, + "end": 63.1, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 63.1, + "end": 63.14, + "confidence": 0.961 + }, + { + "text": "got", + "start": 63.14, + "end": 63.18, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 63.18, + "end": 63.22, + "confidence": 0.964 + }, + { + "text": "got", + "start": 63.22, + "end": 63.26, + "confidence": 0.939 + }, + { + "text": "them,", + "start": 63.26, + "end": 63.3, + "confidence": 0.964 + }, + { + "text": "got", + "start": 63.3, + "end": 63.34, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 63.34, + "end": 63.38, + "confidence": 0.965 + }, + { + "text": "got", + "start": 63.38, + "end": 63.42, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 63.42, + "end": 63.46, + "confidence": 0.967 + }, + { + "text": "got", + "start": 63.46, + "end": 63.5, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 63.5, + "end": 63.54, + "confidence": 0.968 + }, + { + "text": "got", + "start": 63.54, + "end": 63.58, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 63.58, + "end": 63.62, + "confidence": 0.968 + }, + { + "text": "got", + "start": 63.62, + "end": 63.66, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 63.66, + "end": 63.7, + "confidence": 0.97 + }, + { + "text": "got", + "start": 63.7, + "end": 63.74, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 63.74, + "end": 63.78, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.78, + "end": 63.82, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 63.82, + "end": 63.86, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.86, + "end": 63.9, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 63.9, + "end": 63.94, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.94, + "end": 63.98, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 63.98, + "end": 64.02, + "confidence": 0.97 + }, + { + "text": "got", + "start": 64.02, + "end": 64.06, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 64.06, + "end": 64.1, + "confidence": 0.972 + }, + { + "text": "got", + "start": 64.1, + "end": 64.14, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 64.14, + "end": 64.18, + "confidence": 0.972 + }, + { + "text": "got", + "start": 64.18, + "end": 64.22, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 64.22, + "end": 64.26, + "confidence": 0.973 + }, + { + "text": "got", + "start": 64.26, + "end": 64.3, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 64.3, + "end": 64.34, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.34, + "end": 64.38, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 64.38, + "end": 64.42, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.42, + "end": 64.46, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 64.46, + "end": 64.5, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.5, + "end": 64.54, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 64.54, + "end": 64.58, + "confidence": 0.974 + }, + { + "text": "got", + "start": 64.58, + "end": 64.62, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 64.62, + "end": 64.66, + "confidence": 0.975 + }, + { + "text": "got", + "start": 64.66, + "end": 64.7, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 64.7, + "end": 64.74, + "confidence": 0.975 + }, + { + "text": "got", + "start": 64.74, + "end": 64.78, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 64.78, + "end": 64.82, + "confidence": 0.977 + }, + { + "text": "got", + "start": 64.82, + "end": 64.86, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 64.86, + "end": 64.9, + "confidence": 0.977 + }, + { + "text": "got", + "start": 64.9, + "end": 64.94, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 64.94, + "end": 64.98, + "confidence": 0.976 + }, + { + "text": "got", + "start": 64.98, + "end": 65.02, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 65.02, + "end": 65.06, + "confidence": 0.976 + }, + { + "text": "got", + "start": 65.06, + "end": 65.1, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 65.1, + "end": 65.14, + "confidence": 0.978 + }, + { + "text": "got", + "start": 65.14, + "end": 65.18, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 65.18, + "end": 65.22, + "confidence": 0.978 + }, + { + "text": "got", + "start": 65.22, + "end": 65.26, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 65.26, + "end": 65.3, + "confidence": 0.979 + }, + { + "text": "got", + "start": 65.3, + "end": 65.34, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 65.34, + "end": 65.38, + "confidence": 0.979 + }, + { + "text": "got", + "start": 65.38, + "end": 65.42, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 65.42, + "end": 65.46, + "confidence": 0.978 + }, + { + "text": "got", + "start": 65.46, + "end": 65.5, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 65.5, + "end": 65.54, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.54, + "end": 65.58, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 65.58, + "end": 65.62, + "confidence": 0.979 + }, + { + "text": "got", + "start": 65.62, + "end": 65.66, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 65.66, + "end": 65.7, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.7, + "end": 65.74, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 65.74, + "end": 65.78, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.78, + "end": 65.82, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 65.82, + "end": 65.86, + "confidence": 0.981 + }, + { + "text": "got", + "start": 65.86, + "end": 65.9, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 65.9, + "end": 65.94, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.94, + "end": 65.98, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 65.98, + "end": 66.02, + "confidence": 0.982 + }, + { + "text": "got", + "start": 66.02, + "end": 66.3, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 66.3, + "end": 66.34, + "confidence": 0.982 + }, + { + "text": "got", + "start": 66.34, + "end": 66.54, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 66.54, + "end": 66.58, + "confidence": 0.982 + }, + { + "text": "got", + "start": 66.58, + "end": 66.62, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 66.62, + "end": 66.66, + "confidence": 0.981 + }, + { + "text": "got", + "start": 66.66, + "end": 67.38, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 67.38, + "end": 67.42, + "confidence": 0.982 + }, + { + "text": "got", + "start": 67.42, + "end": 67.46, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 67.46, + "end": 67.5, + "confidence": 0.982 + }, + { + "text": "got", + "start": 67.5, + "end": 67.54, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 67.54, + "end": 67.72, + "confidence": 0.983 + }, + { + "text": "got", + "start": 67.72, + "end": 67.76, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 67.76, + "end": 67.8, + "confidence": 0.984 + }, + { + "text": "got", + "start": 67.8, + "end": 68.9, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 68.9, + "end": 69.8, + "confidence": 0.983 + }, + { + "text": "got", + "start": 69.8, + "end": 70.66, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 70.66, + "end": 70.94, + "confidence": 0.985 + }, + { + "text": "got", + "start": 70.94, + "end": 71.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 71.08, + "end": 72.02, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.02, + "end": 72.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 72.08, + "end": 72.84, + "confidence": 0.984 + }, + { + "text": "got", + "start": 72.84, + "end": 77.32, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 77.32, + "end": 79.8, + "confidence": 0.985 + }, + { + "text": "got", + "start": 79.8, + "end": 79.84, + "confidence": 0.992 + }, + { + "text": "them", + "start": 79.84, + "end": 84.6, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.24, + "end": 91.16, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04965524716227578, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971923112869263, + "confidence": 0.851, + "words": [ + { + "text": "got", + "start": 85.24, + "end": 85.28, + "confidence": 0.0 + }, + { + "text": "them,", + "start": 85.28, + "end": 85.32, + "confidence": 0.024 + }, + { + "text": "got", + "start": 85.32, + "end": 85.36, + "confidence": 0.839 + }, + { + "text": "them,", + "start": 85.36, + "end": 85.4, + "confidence": 0.564 + }, + { + "text": "got", + "start": 85.4, + "end": 85.44, + "confidence": 0.755 + }, + { + "text": "them,", + "start": 85.44, + "end": 85.48, + "confidence": 0.442 + }, + { + "text": "got", + "start": 85.48, + "end": 85.52, + "confidence": 0.859 + }, + { + "text": "them,", + "start": 85.52, + "end": 85.56, + "confidence": 0.389 + }, + { + "text": "got", + "start": 85.56, + "end": 85.6, + "confidence": 0.895 + }, + { + "text": "them,", + "start": 85.6, + "end": 85.64, + "confidence": 0.376 + }, + { + "text": "got", + "start": 85.64, + "end": 85.68, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 85.68, + "end": 85.72, + "confidence": 0.39 + }, + { + "text": "got", + "start": 85.72, + "end": 85.76, + "confidence": 0.871 + }, + { + "text": "them,", + "start": 85.76, + "end": 85.8, + "confidence": 0.468 + }, + { + "text": "got", + "start": 85.8, + "end": 85.84, + "confidence": 0.894 + }, + { + "text": "them,", + "start": 85.84, + "end": 85.88, + "confidence": 0.576 + }, + { + "text": "got", + "start": 85.88, + "end": 85.92, + "confidence": 0.914 + }, + { + "text": "them,", + "start": 85.92, + "end": 85.96, + "confidence": 0.645 + }, + { + "text": "got", + "start": 85.96, + "end": 86.0, + "confidence": 0.633 + }, + { + "text": "them,", + "start": 86.0, + "end": 86.04, + "confidence": 0.746 + }, + { + "text": "got", + "start": 86.04, + "end": 86.08, + "confidence": 0.829 + }, + { + "text": "them,", + "start": 86.08, + "end": 86.12, + "confidence": 0.842 + }, + { + "text": "got", + "start": 86.12, + "end": 86.16, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 86.16, + "end": 86.2, + "confidence": 0.89 + }, + { + "text": "got", + "start": 86.2, + "end": 86.24, + "confidence": 0.924 + }, + { + "text": "them,", + "start": 86.24, + "end": 86.28, + "confidence": 0.916 + }, + { + "text": "got", + "start": 86.28, + "end": 86.32, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 86.32, + "end": 86.36, + "confidence": 0.934 + }, + { + "text": "got", + "start": 86.36, + "end": 86.4, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 86.4, + "end": 86.44, + "confidence": 0.946 + }, + { + "text": "got", + "start": 86.44, + "end": 86.48, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 86.48, + "end": 86.52, + "confidence": 0.946 + }, + { + "text": "got", + "start": 86.52, + "end": 86.56, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 86.56, + "end": 86.6, + "confidence": 0.946 + }, + { + "text": "got", + "start": 86.6, + "end": 86.64, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 86.64, + "end": 86.68, + "confidence": 0.948 + }, + { + "text": "got", + "start": 86.68, + "end": 86.72, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 86.72, + "end": 86.76, + "confidence": 0.953 + }, + { + "text": "got", + "start": 86.76, + "end": 86.8, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 86.8, + "end": 86.84, + "confidence": 0.956 + }, + { + "text": "got", + "start": 86.84, + "end": 86.88, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 86.88, + "end": 86.92, + "confidence": 0.961 + }, + { + "text": "got", + "start": 86.92, + "end": 86.96, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 86.96, + "end": 87.0, + "confidence": 0.964 + }, + { + "text": "got", + "start": 87.0, + "end": 87.04, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 87.04, + "end": 87.08, + "confidence": 0.967 + }, + { + "text": "got", + "start": 87.08, + "end": 87.12, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 87.12, + "end": 87.16, + "confidence": 0.969 + }, + { + "text": "got", + "start": 87.16, + "end": 87.2, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.2, + "end": 87.24, + "confidence": 0.972 + }, + { + "text": "got", + "start": 87.24, + "end": 87.28, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.28, + "end": 87.32, + "confidence": 0.976 + }, + { + "text": "got", + "start": 87.32, + "end": 87.36, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.36, + "end": 87.4, + "confidence": 0.976 + }, + { + "text": "got", + "start": 87.4, + "end": 87.44, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 87.44, + "end": 87.48, + "confidence": 0.977 + }, + { + "text": "got", + "start": 87.48, + "end": 87.52, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 87.52, + "end": 87.56, + "confidence": 0.978 + }, + { + "text": "got", + "start": 87.56, + "end": 87.6, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 87.6, + "end": 87.64, + "confidence": 0.98 + }, + { + "text": "got", + "start": 87.64, + "end": 87.68, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 87.68, + "end": 87.72, + "confidence": 0.98 + }, + { + "text": "got", + "start": 87.72, + "end": 87.76, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 87.76, + "end": 87.8, + "confidence": 0.981 + }, + { + "text": "got", + "start": 87.8, + "end": 87.84, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 87.84, + "end": 87.88, + "confidence": 0.982 + }, + { + "text": "got", + "start": 87.88, + "end": 87.92, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 87.92, + "end": 87.96, + "confidence": 0.983 + }, + { + "text": "got", + "start": 87.96, + "end": 88.0, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 88.0, + "end": 88.04, + "confidence": 0.983 + }, + { + "text": "got", + "start": 88.04, + "end": 88.08, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 88.08, + "end": 88.12, + "confidence": 0.983 + }, + { + "text": "got", + "start": 88.12, + "end": 88.16, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 88.16, + "end": 88.2, + "confidence": 0.984 + }, + { + "text": "got", + "start": 88.2, + "end": 88.24, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 88.24, + "end": 88.28, + "confidence": 0.984 + }, + { + "text": "got", + "start": 88.28, + "end": 88.32, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 88.32, + "end": 88.36, + "confidence": 0.986 + }, + { + "text": "got", + "start": 88.36, + "end": 88.4, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 88.4, + "end": 88.44, + "confidence": 0.986 + }, + { + "text": "got", + "start": 88.44, + "end": 88.48, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 88.48, + "end": 88.52, + "confidence": 0.986 + }, + { + "text": "got", + "start": 88.52, + "end": 88.56, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 88.56, + "end": 88.6, + "confidence": 0.987 + }, + { + "text": "got", + "start": 88.6, + "end": 88.64, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 88.64, + "end": 88.68, + "confidence": 0.987 + }, + { + "text": "got", + "start": 88.68, + "end": 88.72, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 88.72, + "end": 88.76, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.76, + "end": 88.8, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 88.8, + "end": 88.84, + "confidence": 0.988 + }, + { + "text": "got", + "start": 88.84, + "end": 88.88, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 88.88, + "end": 88.92, + "confidence": 0.989 + }, + { + "text": "got", + "start": 88.92, + "end": 88.96, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 88.96, + "end": 89.0, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.0, + "end": 89.04, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 89.04, + "end": 89.08, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.08, + "end": 89.12, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 89.12, + "end": 89.16, + "confidence": 0.989 + }, + { + "text": "got", + "start": 89.16, + "end": 89.2, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 89.2, + "end": 89.24, + "confidence": 0.99 + }, + { + "text": "got", + "start": 89.24, + "end": 89.28, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 89.28, + "end": 89.32, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.32, + "end": 89.36, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 89.36, + "end": 89.4, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.4, + "end": 89.44, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 89.44, + "end": 89.48, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.48, + "end": 89.52, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 89.52, + "end": 89.56, + "confidence": 0.991 + }, + { + "text": "got", + "start": 89.56, + "end": 89.6, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 89.6, + "end": 89.64, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.64, + "end": 89.68, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 89.68, + "end": 89.72, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.72, + "end": 89.76, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 89.76, + "end": 89.8, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.8, + "end": 89.84, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 89.84, + "end": 89.88, + "confidence": 0.992 + }, + { + "text": "got", + "start": 89.88, + "end": 89.92, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 89.92, + "end": 89.96, + "confidence": 0.993 + }, + { + "text": "got", + "start": 89.96, + "end": 90.0, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 90.0, + "end": 90.04, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.04, + "end": 90.08, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.08, + "end": 90.12, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.12, + "end": 90.16, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.16, + "end": 90.2, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.2, + "end": 90.24, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.24, + "end": 90.28, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.28, + "end": 90.32, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.32, + "end": 90.36, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.36, + "end": 90.4, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.4, + "end": 90.44, + "confidence": 0.993 + }, + { + "text": "got", + "start": 90.44, + "end": 90.48, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.48, + "end": 90.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.52, + "end": 90.56, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 90.56, + "end": 90.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.6, + "end": 90.64, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.64, + "end": 90.68, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.68, + "end": 90.72, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.72, + "end": 90.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.76, + "end": 90.8, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.8, + "end": 90.84, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.84, + "end": 90.88, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.88, + "end": 90.92, + "confidence": 0.994 + }, + { + "text": "got", + "start": 90.92, + "end": 90.96, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 90.96, + "end": 91.0, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.0, + "end": 91.04, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.04, + "end": 91.08, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.08, + "end": 91.12, + "confidence": 0.995 + }, + { + "text": "them", + "start": 91.12, + "end": 91.16, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive.cuda/accurate_apollo11.mp3.words.json b/tests/expected/naive.cuda/accurate_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..5541228558b3a625658b695ccc9b63cdd745f928 --- /dev/null +++ b/tests/expected/naive.cuda/accurate_apollo11.mp3.words.json @@ -0,0 +1,2438 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A. All right. Okay. Yeah, sir. Yeah, sir. Let's take that camera. Let's say it makes it want to go on the helmet we were going to have in B-1. The other one. And you can put the other one on the mic helmet. We'll show it to you in a quick screen. Over. All right. Yeah, sir. All right. Yeah, sir. There's a better helmet than B-1. That's the other one. Nice. Let's go in there. At least we're safe. We've got them in there. Helmet bags. And, uh, I guess we have helmets in the helmet bag. At least it's in the helmet bag. Right here. Right here. Yeah, we're taking it next day out of the field up. Yeah, we were. You want to hack me on that? Yeah, we were. You want to hack me on that? With the cover, I tried it already. Okay, fine. We weren't sure of that. Just a suggestion. We thought we'd, uh, say you could check it out. It's not much of an order to turn that. So, uh, I guess we're going to come up with this. Let us know. Okay. No problem. Okay. No problem. No problem. No problem. No problem. No problem. No problem. No problem.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.76, + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 44, + 12, + 36, + 12, + 38, + 12, + 36, + 12, + 32, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.578, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.52, + "confidence": 0.842 + }, + { + "text": "Houston,", + "start": 1.52, + "end": 1.94, + "confidence": 0.665 + }, + { + "text": "we", + "start": 1.94, + "end": 1.98, + "confidence": 0.983 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.743 + }, + { + "text": "a", + "start": 2.16, + "end": 2.32, + "confidence": 0.993 + }, + { + "text": "recommendation", + "start": 2.32, + "end": 3.08, + "confidence": 0.977 + }, + { + "text": "for", + "start": 3.08, + "end": 3.5, + "confidence": 0.94 + }, + { + "text": "you", + "start": 3.5, + "end": 3.72, + "confidence": 0.993 + }, + { + "text": "on", + "start": 3.72, + "end": 4.26, + "confidence": 0.918 + }, + { + "text": "your", + "start": 4.26, + "end": 4.3, + "confidence": 0.976 + }, + { + "text": "Soyuz-EA", + "start": 4.3, + "end": 5.36, + "confidence": 0.455 + }, + { + "text": "GLEM-E-G-E-A.", + "start": 5.36, + "end": 6.76, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 10.9, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.149, + "words": [ + { + "text": "All", + "start": 10.82, + "end": 10.86, + "confidence": 0.006 + }, + { + "text": "right.", + "start": 10.86, + "end": 10.9, + "confidence": 0.72 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.02, + "end": 12.06, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.168, + "words": [ + { + "text": "Okay.", + "start": 12.02, + "end": 12.06, + "confidence": 0.168 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 12.08, + "end": 13.44, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.131, + "words": [ + { + "text": "Yeah,", + "start": 12.08, + "end": 12.68, + "confidence": 0.079 + }, + { + "text": "sir.", + "start": 12.68, + "end": 13.44, + "confidence": 0.218 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.66, + "end": 14.28, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.047, + "words": [ + { + "text": "Yeah,", + "start": 13.66, + "end": 13.82, + "confidence": 0.11 + }, + { + "text": "sir.", + "start": 13.82, + "end": 14.28, + "confidence": 0.02 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 14.3, + "end": 14.98, + "text": " Let's take that camera.", + "tokens": [ + 961, + 311, + 747, + 300, + 2799, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.033, + "words": [ + { + "text": "Let's", + "start": 14.3, + "end": 14.54, + "confidence": 0.036 + }, + { + "text": "take", + "start": 14.54, + "end": 14.58, + "confidence": 0.026 + }, + { + "text": "that", + "start": 14.58, + "end": 14.62, + "confidence": 0.031 + }, + { + "text": "camera.", + "start": 14.62, + "end": 14.98, + "confidence": 0.034 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 15.0, + "end": 19.22, + "text": " Let's say it makes it want to go on the helmet we were going to have in B-1.", + "tokens": [ + 961, + 311, + 584, + 309, + 1669, + 309, + 528, + 281, + 352, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.299, + "words": [ + { + "text": "Let's", + "start": 15.0, + "end": 15.38, + "confidence": 0.273 + }, + { + "text": "say", + "start": 15.38, + "end": 15.58, + "confidence": 0.349 + }, + { + "text": "it", + "start": 15.58, + "end": 15.76, + "confidence": 0.01 + }, + { + "text": "makes", + "start": 15.76, + "end": 15.8, + "confidence": 0.736 + }, + { + "text": "it", + "start": 15.8, + "end": 16.04, + "confidence": 0.921 + }, + { + "text": "want", + "start": 16.04, + "end": 16.16, + "confidence": 0.297 + }, + { + "text": "to", + "start": 16.16, + "end": 16.34, + "confidence": 0.97 + }, + { + "text": "go", + "start": 16.34, + "end": 16.44, + "confidence": 0.932 + }, + { + "text": "on", + "start": 16.44, + "end": 16.74, + "confidence": 0.912 + }, + { + "text": "the", + "start": 16.74, + "end": 16.86, + "confidence": 0.496 + }, + { + "text": "helmet", + "start": 16.86, + "end": 17.34, + "confidence": 0.005 + }, + { + "text": "we", + "start": 17.34, + "end": 17.64, + "confidence": 0.01 + }, + { + "text": "were", + "start": 17.64, + "end": 17.86, + "confidence": 0.08 + }, + { + "text": "going", + "start": 17.86, + "end": 18.04, + "confidence": 0.658 + }, + { + "text": "to", + "start": 18.04, + "end": 18.2, + "confidence": 0.966 + }, + { + "text": "have", + "start": 18.2, + "end": 18.26, + "confidence": 0.957 + }, + { + "text": "in", + "start": 18.26, + "end": 18.64, + "confidence": 0.863 + }, + { + "text": "B-1.", + "start": 18.64, + "end": 19.22, + "confidence": 0.506 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.38, + "end": 19.98, + "text": " The other one.", + "tokens": [ + 440, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.13, + "words": [ + { + "text": "The", + "start": 19.38, + "end": 19.42, + "confidence": 0.005 + }, + { + "text": "other", + "start": 19.42, + "end": 19.68, + "confidence": 0.186 + }, + { + "text": "one.", + "start": 19.68, + "end": 19.98, + "confidence": 0.586 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 20.12, + "end": 22.9, + "text": " And you can put the other one on the mic helmet.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.366, + "words": [ + { + "text": "And", + "start": 20.12, + "end": 20.26, + "confidence": 0.343 + }, + { + "text": "you", + "start": 20.26, + "end": 20.38, + "confidence": 0.89 + }, + { + "text": "can", + "start": 20.38, + "end": 20.54, + "confidence": 0.087 + }, + { + "text": "put", + "start": 20.54, + "end": 20.7, + "confidence": 0.901 + }, + { + "text": "the", + "start": 20.7, + "end": 20.86, + "confidence": 0.979 + }, + { + "text": "other", + "start": 20.86, + "end": 21.06, + "confidence": 0.997 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.973 + }, + { + "text": "on", + "start": 21.26, + "end": 21.9, + "confidence": 0.968 + }, + { + "text": "the", + "start": 21.9, + "end": 21.94, + "confidence": 0.09 + }, + { + "text": "mic", + "start": 21.94, + "end": 22.58, + "confidence": 0.682 + }, + { + "text": "helmet.", + "start": 22.58, + "end": 22.9, + "confidence": 0.066 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 22.92, + "end": 24.8, + "text": " We'll show it to you in a quick screen.", + "tokens": [ + 492, + 603, + 855, + 309, + 281, + 291, + 294, + 257, + 1702, + 2568, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.122, + "words": [ + { + "text": "We'll", + "start": 22.92, + "end": 23.22, + "confidence": 0.08 + }, + { + "text": "show", + "start": 23.22, + "end": 23.32, + "confidence": 0.011 + }, + { + "text": "it", + "start": 23.32, + "end": 23.54, + "confidence": 0.15 + }, + { + "text": "to", + "start": 23.54, + "end": 23.64, + "confidence": 0.255 + }, + { + "text": "you", + "start": 23.64, + "end": 23.78, + "confidence": 0.703 + }, + { + "text": "in", + "start": 23.78, + "end": 23.96, + "confidence": 0.231 + }, + { + "text": "a", + "start": 23.96, + "end": 24.12, + "confidence": 0.736 + }, + { + "text": "quick", + "start": 24.12, + "end": 24.16, + "confidence": 0.018 + }, + { + "text": "screen.", + "start": 24.16, + "end": 24.8, + "confidence": 0.122 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 24.94, + "end": 25.26, + "text": " Over.", + "tokens": [ + 4886, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5338267664755544, + "compression_ratio": 1.4690265486725664, + "no_speech_prob": 0.4499393105506897, + "confidence": 0.125, + "words": [ + { + "text": "Over.", + "start": 24.94, + "end": 25.26, + "confidence": 0.125 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 25.52, + "end": 27.52, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.108, + "words": [ + { + "text": "All", + "start": 25.52, + "end": 26.14, + "confidence": 0.003 + }, + { + "text": "right.", + "start": 26.14, + "end": 27.52, + "confidence": 0.677 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 31.32, + "end": 32.48, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.045, + "words": [ + { + "text": "Yeah,", + "start": 31.32, + "end": 31.56, + "confidence": 0.033 + }, + { + "text": "sir.", + "start": 31.56, + "end": 32.48, + "confidence": 0.062 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 32.66, + "end": 33.06, + "text": " All right.", + "tokens": [ + 1057, + 558, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.223, + "words": [ + { + "text": "All", + "start": 32.66, + "end": 32.94, + "confidence": 0.047 + }, + { + "text": "right.", + "start": 32.94, + "end": 33.06, + "confidence": 0.488 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 33.08, + "end": 34.2, + "text": " Yeah, sir.", + "tokens": [ + 865, + 11, + 4735, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.035, + "words": [ + { + "text": "Yeah,", + "start": 33.08, + "end": 33.36, + "confidence": 0.144 + }, + { + "text": "sir.", + "start": 33.36, + "end": 34.2, + "confidence": 0.009 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 34.52, + "end": 35.4, + "text": " There's a better helmet than B-1.", + "tokens": [ + 821, + 311, + 257, + 1101, + 15922, + 813, + 363, + 12, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.014, + "words": [ + { + "text": "There's", + "start": 34.52, + "end": 34.56, + "confidence": 0.046 + }, + { + "text": "a", + "start": 34.56, + "end": 34.64, + "confidence": 0.037 + }, + { + "text": "better", + "start": 34.64, + "end": 34.94, + "confidence": 0.0 + }, + { + "text": "helmet", + "start": 34.94, + "end": 34.98, + "confidence": 0.0 + }, + { + "text": "than", + "start": 34.98, + "end": 35.02, + "confidence": 0.179 + }, + { + "text": "B-1.", + "start": 35.02, + "end": 35.4, + "confidence": 0.033 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 35.42, + "end": 35.94, + "text": " That's the other one.", + "tokens": [ + 663, + 311, + 264, + 661, + 472, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.162, + "words": [ + { + "text": "That's", + "start": 35.42, + "end": 35.48, + "confidence": 0.082 + }, + { + "text": "the", + "start": 35.48, + "end": 35.52, + "confidence": 0.167 + }, + { + "text": "other", + "start": 35.52, + "end": 35.68, + "confidence": 0.412 + }, + { + "text": "one.", + "start": 35.68, + "end": 35.94, + "confidence": 0.197 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 35.96, + "end": 36.34, + "text": " Nice.", + "tokens": [ + 5490, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.007, + "words": [ + { + "text": "Nice.", + "start": 35.96, + "end": 36.34, + "confidence": 0.007 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 37.68, + "end": 38.48, + "text": " Let's go in there.", + "tokens": [ + 961, + 311, + 352, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.125, + "words": [ + { + "text": "Let's", + "start": 37.68, + "end": 38.16, + "confidence": 0.022 + }, + { + "text": "go", + "start": 38.16, + "end": 38.2, + "confidence": 0.626 + }, + { + "text": "in", + "start": 38.2, + "end": 38.24, + "confidence": 0.037 + }, + { + "text": "there.", + "start": 38.24, + "end": 38.48, + "confidence": 0.579 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 38.6, + "end": 39.24, + "text": " At least we're safe.", + "tokens": [ + 1711, + 1935, + 321, + 434, + 3273, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.125, + "words": [ + { + "text": "At", + "start": 38.6, + "end": 38.64, + "confidence": 0.001 + }, + { + "text": "least", + "start": 38.64, + "end": 38.86, + "confidence": 0.311 + }, + { + "text": "we're", + "start": 38.86, + "end": 39.08, + "confidence": 0.12 + }, + { + "text": "safe.", + "start": 39.08, + "end": 39.24, + "confidence": 0.799 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 39.94, + "end": 40.54, + "text": " We've got them in there.", + "tokens": [ + 492, + 600, + 658, + 552, + 294, + 456, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.076, + "words": [ + { + "text": "We've", + "start": 39.94, + "end": 40.38, + "confidence": 0.06 + }, + { + "text": "got", + "start": 40.38, + "end": 40.42, + "confidence": 0.737 + }, + { + "text": "them", + "start": 40.42, + "end": 40.46, + "confidence": 0.004 + }, + { + "text": "in", + "start": 40.46, + "end": 40.5, + "confidence": 0.039 + }, + { + "text": "there.", + "start": 40.5, + "end": 40.54, + "confidence": 0.198 + } + ] + }, + { + "id": 21, + "seek": 2600, + "start": 40.54, + "end": 41.28, + "text": " Helmet bags.", + "tokens": [ + 6128, + 5537, + 10405, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.001, + "words": [ + { + "text": "Helmet", + "start": 40.54, + "end": 40.78, + "confidence": 0.001 + }, + { + "text": "bags.", + "start": 40.78, + "end": 41.28, + "confidence": 0.002 + } + ] + }, + { + "id": 22, + "seek": 2600, + "start": 43.52, + "end": 47.74, + "text": " And, uh, I guess we have helmets in the helmet bag.", + "tokens": [ + 400, + 11, + 2232, + 11, + 286, + 2041, + 321, + 362, + 42022, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.208, + "words": [ + { + "text": "And,", + "start": 43.52, + "end": 43.56, + "confidence": 0.087 + }, + { + "text": "uh,", + "start": 43.56, + "end": 44.02, + "confidence": 0.767 + }, + { + "text": "I", + "start": 44.02, + "end": 44.06, + "confidence": 0.321 + }, + { + "text": "guess", + "start": 44.06, + "end": 44.32, + "confidence": 0.236 + }, + { + "text": "we", + "start": 44.32, + "end": 45.0, + "confidence": 0.078 + }, + { + "text": "have", + "start": 45.0, + "end": 45.6, + "confidence": 0.06 + }, + { + "text": "helmets", + "start": 45.6, + "end": 46.62, + "confidence": 0.004 + }, + { + "text": "in", + "start": 46.62, + "end": 46.82, + "confidence": 0.6 + }, + { + "text": "the", + "start": 46.82, + "end": 46.98, + "confidence": 0.614 + }, + { + "text": "helmet", + "start": 46.98, + "end": 47.2, + "confidence": 0.823 + }, + { + "text": "bag.", + "start": 47.2, + "end": 47.74, + "confidence": 0.363 + } + ] + }, + { + "id": 23, + "seek": 2600, + "start": 48.24, + "end": 49.84, + "text": " At least it's in the helmet bag.", + "tokens": [ + 1711, + 1935, + 309, + 311, + 294, + 264, + 15922, + 3411, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.112, + "words": [ + { + "text": "At", + "start": 48.24, + "end": 48.38, + "confidence": 0.064 + }, + { + "text": "least", + "start": 48.38, + "end": 48.66, + "confidence": 0.795 + }, + { + "text": "it's", + "start": 48.66, + "end": 49.02, + "confidence": 0.347 + }, + { + "text": "in", + "start": 49.02, + "end": 49.06, + "confidence": 0.172 + }, + { + "text": "the", + "start": 49.06, + "end": 49.1, + "confidence": 0.242 + }, + { + "text": "helmet", + "start": 49.1, + "end": 49.22, + "confidence": 0.003 + }, + { + "text": "bag.", + "start": 49.22, + "end": 49.84, + "confidence": 0.062 + } + ] + }, + { + "id": 24, + "seek": 2600, + "start": 50.22, + "end": 50.94, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.099, + "words": [ + { + "text": "Right", + "start": 50.22, + "end": 50.48, + "confidence": 0.006 + }, + { + "text": "here.", + "start": 50.48, + "end": 50.94, + "confidence": 0.416 + } + ] + }, + { + "id": 25, + "seek": 2600, + "start": 51.54, + "end": 52.24, + "text": " Right here.", + "tokens": [ + 1779, + 510, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.04, + "words": [ + { + "text": "Right", + "start": 51.54, + "end": 51.8, + "confidence": 0.008 + }, + { + "text": "here.", + "start": 51.8, + "end": 52.24, + "confidence": 0.091 + } + ] + }, + { + "id": 26, + "seek": 2600, + "start": 53.2, + "end": 55.42, + "text": " Yeah, we're taking it next day out of the field up.", + "tokens": [ + 865, + 11, + 321, + 434, + 1940, + 309, + 958, + 786, + 484, + 295, + 264, + 2519, + 493, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5128642573501124, + "compression_ratio": 1.728643216080402, + "no_speech_prob": 0.0011020583333447576, + "confidence": 0.367, + "words": [ + { + "text": "Yeah,", + "start": 53.2, + "end": 53.38, + "confidence": 0.47 + }, + { + "text": "we're", + "start": 53.38, + "end": 53.7, + "confidence": 0.742 + }, + { + "text": "taking", + "start": 53.7, + "end": 53.74, + "confidence": 0.322 + }, + { + "text": "it", + "start": 53.74, + "end": 53.86, + "confidence": 0.082 + }, + { + "text": "next", + "start": 53.86, + "end": 53.96, + "confidence": 0.545 + }, + { + "text": "day", + "start": 53.96, + "end": 54.16, + "confidence": 0.817 + }, + { + "text": "out", + "start": 54.16, + "end": 54.32, + "confidence": 0.29 + }, + { + "text": "of", + "start": 54.32, + "end": 54.54, + "confidence": 0.351 + }, + { + "text": "the", + "start": 54.54, + "end": 54.68, + "confidence": 0.432 + }, + { + "text": "field", + "start": 54.68, + "end": 55.2, + "confidence": 0.233 + }, + { + "text": "up.", + "start": 55.2, + "end": 55.42, + "confidence": 0.235 + } + ] + }, + { + "id": 27, + "seek": 5500, + "start": 55.44, + "end": 56.12, + "text": " Yeah, we were.", + "tokens": [ + 865, + 11, + 321, + 645, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.02, + "words": [ + { + "text": "Yeah,", + "start": 55.44, + "end": 55.48, + "confidence": 0.024 + }, + { + "text": "we", + "start": 55.48, + "end": 55.52, + "confidence": 0.022 + }, + { + "text": "were.", + "start": 55.52, + "end": 56.12, + "confidence": 0.017 + } + ] + }, + { + "id": 28, + "seek": 5500, + "start": 56.5, + "end": 57.48, + "text": " You want to hack me on that?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 300, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.019, + "words": [ + { + "text": "You", + "start": 56.5, + "end": 56.62, + "confidence": 0.007 + }, + { + "text": "want", + "start": 56.62, + "end": 57.0, + "confidence": 0.01 + }, + { + "text": "to", + "start": 57.0, + "end": 57.22, + "confidence": 0.257 + }, + { + "text": "hack", + "start": 57.22, + "end": 57.36, + "confidence": 0.0 + }, + { + "text": "me", + "start": 57.36, + "end": 57.4, + "confidence": 0.054 + }, + { + "text": "on", + "start": 57.4, + "end": 57.44, + "confidence": 0.005 + }, + { + "text": "that?", + "start": 57.44, + "end": 57.48, + "confidence": 0.22 + } + ] + }, + { + "id": 29, + "seek": 5500, + "start": 57.48, + "end": 58.26, + "text": " Yeah, we were.", + "tokens": [ + 865, + 11, + 321, + 645, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.047, + "words": [ + { + "text": "Yeah,", + "start": 57.48, + "end": 57.96, + "confidence": 0.029 + }, + { + "text": "we", + "start": 57.96, + "end": 58.06, + "confidence": 0.096 + }, + { + "text": "were.", + "start": 58.06, + "end": 58.26, + "confidence": 0.053 + } + ] + }, + { + "id": 30, + "seek": 5500, + "start": 58.28, + "end": 59.48, + "text": " You want to hack me on that?", + "tokens": [ + 509, + 528, + 281, + 10339, + 385, + 322, + 300, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.055, + "words": [ + { + "text": "You", + "start": 58.28, + "end": 58.46, + "confidence": 0.029 + }, + { + "text": "want", + "start": 58.46, + "end": 58.6, + "confidence": 0.107 + }, + { + "text": "to", + "start": 58.6, + "end": 58.64, + "confidence": 0.376 + }, + { + "text": "hack", + "start": 58.64, + "end": 58.76, + "confidence": 0.0 + }, + { + "text": "me", + "start": 58.76, + "end": 58.96, + "confidence": 0.442 + }, + { + "text": "on", + "start": 58.96, + "end": 59.18, + "confidence": 0.266 + }, + { + "text": "that?", + "start": 59.18, + "end": 59.48, + "confidence": 0.186 + } + ] + }, + { + "id": 31, + "seek": 5500, + "start": 59.5, + "end": 61.96, + "text": " With the cover, I tried it already.", + "tokens": [ + 2022, + 264, + 2060, + 11, + 286, + 3031, + 309, + 1217, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.202, + "words": [ + { + "text": "With", + "start": 59.5, + "end": 60.1, + "confidence": 0.012 + }, + { + "text": "the", + "start": 60.1, + "end": 60.82, + "confidence": 0.238 + }, + { + "text": "cover,", + "start": 60.82, + "end": 61.3, + "confidence": 0.165 + }, + { + "text": "I", + "start": 61.3, + "end": 61.34, + "confidence": 0.826 + }, + { + "text": "tried", + "start": 61.34, + "end": 61.52, + "confidence": 0.019 + }, + { + "text": "it", + "start": 61.52, + "end": 61.72, + "confidence": 0.699 + }, + { + "text": "already.", + "start": 61.72, + "end": 61.96, + "confidence": 0.832 + } + ] + }, + { + "id": 32, + "seek": 5500, + "start": 62.56, + "end": 63.16, + "text": " Okay, fine.", + "tokens": [ + 1033, + 11, + 2489, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.542, + "words": [ + { + "text": "Okay,", + "start": 62.56, + "end": 62.98, + "confidence": 0.49 + }, + { + "text": "fine.", + "start": 62.98, + "end": 63.16, + "confidence": 0.601 + } + ] + }, + { + "id": 33, + "seek": 5500, + "start": 63.2, + "end": 64.16, + "text": " We weren't sure of that.", + "tokens": [ + 492, + 4999, + 380, + 988, + 295, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.75, + "words": [ + { + "text": "We", + "start": 63.2, + "end": 63.36, + "confidence": 0.468 + }, + { + "text": "weren't", + "start": 63.36, + "end": 63.6, + "confidence": 0.939 + }, + { + "text": "sure", + "start": 63.6, + "end": 63.8, + "confidence": 0.916 + }, + { + "text": "of", + "start": 63.8, + "end": 63.88, + "confidence": 0.594 + }, + { + "text": "that.", + "start": 63.88, + "end": 64.16, + "confidence": 0.771 + } + ] + }, + { + "id": 34, + "seek": 5500, + "start": 64.48, + "end": 65.14, + "text": " Just a suggestion.", + "tokens": [ + 1449, + 257, + 16541, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.163, + "words": [ + { + "text": "Just", + "start": 64.48, + "end": 64.64, + "confidence": 0.011 + }, + { + "text": "a", + "start": 64.64, + "end": 64.78, + "confidence": 0.428 + }, + { + "text": "suggestion.", + "start": 64.78, + "end": 65.14, + "confidence": 0.386 + } + ] + }, + { + "id": 35, + "seek": 5500, + "start": 65.16, + "end": 68.04, + "text": " We thought we'd, uh, say you could check it out.", + "tokens": [ + 492, + 1194, + 321, + 1116, + 11, + 2232, + 11, + 584, + 291, + 727, + 1520, + 309, + 484, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.546, + "words": [ + { + "text": "We", + "start": 65.16, + "end": 65.42, + "confidence": 0.164 + }, + { + "text": "thought", + "start": 65.42, + "end": 65.62, + "confidence": 0.835 + }, + { + "text": "we'd,", + "start": 65.62, + "end": 66.14, + "confidence": 0.391 + }, + { + "text": "uh,", + "start": 66.14, + "end": 66.96, + "confidence": 0.678 + }, + { + "text": "say", + "start": 66.96, + "end": 67.0, + "confidence": 0.181 + }, + { + "text": "you", + "start": 67.0, + "end": 67.14, + "confidence": 0.825 + }, + { + "text": "could", + "start": 67.14, + "end": 67.3, + "confidence": 0.715 + }, + { + "text": "check", + "start": 67.3, + "end": 67.44, + "confidence": 0.637 + }, + { + "text": "it", + "start": 67.44, + "end": 67.66, + "confidence": 0.982 + }, + { + "text": "out.", + "start": 67.66, + "end": 68.04, + "confidence": 0.908 + } + ] + }, + { + "id": 36, + "seek": 5500, + "start": 68.24, + "end": 69.46, + "text": " It's not much of an order to turn that.", + "tokens": [ + 467, + 311, + 406, + 709, + 295, + 364, + 1668, + 281, + 1261, + 300, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.139, + "words": [ + { + "text": "It's", + "start": 68.24, + "end": 68.46, + "confidence": 0.077 + }, + { + "text": "not", + "start": 68.46, + "end": 68.5, + "confidence": 0.09 + }, + { + "text": "much", + "start": 68.5, + "end": 68.64, + "confidence": 0.342 + }, + { + "text": "of", + "start": 68.64, + "end": 68.78, + "confidence": 0.388 + }, + { + "text": "an", + "start": 68.78, + "end": 68.9, + "confidence": 0.399 + }, + { + "text": "order", + "start": 68.9, + "end": 68.94, + "confidence": 0.018 + }, + { + "text": "to", + "start": 68.94, + "end": 69.14, + "confidence": 0.122 + }, + { + "text": "turn", + "start": 69.14, + "end": 69.18, + "confidence": 0.447 + }, + { + "text": "that.", + "start": 69.18, + "end": 69.46, + "confidence": 0.116 + } + ] + }, + { + "id": 37, + "seek": 5500, + "start": 70.38, + "end": 71.98, + "text": " So, uh, I guess we're going to come up with this.", + "tokens": [ + 407, + 11, + 2232, + 11, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.449, + "words": [ + { + "text": "So,", + "start": 70.38, + "end": 70.42, + "confidence": 0.514 + }, + { + "text": "uh,", + "start": 70.42, + "end": 70.5, + "confidence": 0.38 + }, + { + "text": "I", + "start": 70.5, + "end": 70.54, + "confidence": 0.946 + }, + { + "text": "guess", + "start": 70.54, + "end": 70.8, + "confidence": 0.99 + }, + { + "text": "we're", + "start": 70.8, + "end": 71.1, + "confidence": 0.534 + }, + { + "text": "going", + "start": 71.1, + "end": 71.28, + "confidence": 0.106 + }, + { + "text": "to", + "start": 71.28, + "end": 71.42, + "confidence": 0.915 + }, + { + "text": "come", + "start": 71.42, + "end": 71.48, + "confidence": 0.48 + }, + { + "text": "up", + "start": 71.48, + "end": 71.68, + "confidence": 0.579 + }, + { + "text": "with", + "start": 71.68, + "end": 71.88, + "confidence": 0.809 + }, + { + "text": "this.", + "start": 71.88, + "end": 71.98, + "confidence": 0.166 + } + ] + }, + { + "id": 38, + "seek": 5500, + "start": 72.0, + "end": 72.8, + "text": " Let us know.", + "tokens": [ + 961, + 505, + 458, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.45, + "words": [ + { + "text": "Let", + "start": 72.0, + "end": 72.14, + "confidence": 0.221 + }, + { + "text": "us", + "start": 72.14, + "end": 72.3, + "confidence": 0.272 + }, + { + "text": "know.", + "start": 72.3, + "end": 72.8, + "confidence": 0.826 + } + ] + }, + { + "id": 39, + "seek": 5500, + "start": 72.82, + "end": 73.02, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.049, + "words": [ + { + "text": "Okay.", + "start": 72.82, + "end": 73.02, + "confidence": 0.049 + } + ] + }, + { + "id": 40, + "seek": 5500, + "start": 74.2, + "end": 75.48, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.077, + "words": [ + { + "text": "No", + "start": 74.2, + "end": 74.24, + "confidence": 0.005 + }, + { + "text": "problem.", + "start": 74.24, + "end": 75.48, + "confidence": 0.295 + } + ] + }, + { + "id": 41, + "seek": 5500, + "start": 75.72, + "end": 75.94, + "text": " Okay.", + "tokens": [ + 1033, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.263, + "words": [ + { + "text": "Okay.", + "start": 75.72, + "end": 75.94, + "confidence": 0.263 + } + ] + }, + { + "id": 42, + "seek": 5500, + "start": 75.96, + "end": 76.66, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.588, + "words": [ + { + "text": "No", + "start": 75.96, + "end": 76.24, + "confidence": 0.447 + }, + { + "text": "problem.", + "start": 76.24, + "end": 76.66, + "confidence": 0.673 + } + ] + }, + { + "id": 43, + "seek": 5500, + "start": 78.42, + "end": 78.5, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.031, + "words": [ + { + "text": "No", + "start": 78.42, + "end": 78.46, + "confidence": 0.032 + }, + { + "text": "problem.", + "start": 78.46, + "end": 78.5, + "confidence": 0.03 + } + ] + }, + { + "id": 44, + "seek": 5500, + "start": 78.5, + "end": 79.28, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.013, + "words": [ + { + "text": "No", + "start": 78.5, + "end": 78.7, + "confidence": 0.001 + }, + { + "text": "problem.", + "start": 78.7, + "end": 79.28, + "confidence": 0.044 + } + ] + }, + { + "id": 45, + "seek": 5500, + "start": 79.3, + "end": 80.3, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.31608172647314137, + "compression_ratio": 1.8318181818181818, + "no_speech_prob": 0.00039152163662947714, + "confidence": 0.017, + "words": [ + { + "text": "No", + "start": 79.3, + "end": 79.48, + "confidence": 0.002 + }, + { + "text": "problem.", + "start": 79.48, + "end": 80.3, + "confidence": 0.056 + } + ] + }, + { + "id": 46, + "seek": 8000, + "start": 80.32, + "end": 84.94, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3978840112686157, + "compression_ratio": 1.5217391304347827, + "no_speech_prob": 0.0009465877083130181, + "confidence": 0.029, + "words": [ + { + "text": "No", + "start": 80.32, + "end": 80.54, + "confidence": 0.004 + }, + { + "text": "problem.", + "start": 80.54, + "end": 84.94, + "confidence": 0.082 + } + ] + }, + { + "id": 47, + "seek": 8000, + "start": 85.24, + "end": 87.1, + "text": " No problem.", + "tokens": [ + 883, + 1154, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.3978840112686157, + "compression_ratio": 1.5217391304347827, + "no_speech_prob": 0.0009465877083130181, + "confidence": 0.03, + "words": [ + { + "text": "No", + "start": 85.24, + "end": 85.4, + "confidence": 0.002 + }, + { + "text": "problem.", + "start": 85.4, + "end": 87.1, + "confidence": 0.128 + } + ] + }, + { + "id": 48, + "seek": 8800, + "start": 87.52, + "end": 88.98, + "text": " No problem.", + "tokens": [ + 50364, + 883, + 1154, + 13, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.649966835975647, + "compression_ratio": 0.5789473684210527, + "no_speech_prob": 0.0005261301412247121, + "confidence": 0.037, + "words": [ + { + "text": "No", + "start": 87.52, + "end": 88.9, + "confidence": 0.003 + }, + { + "text": "problem.", + "start": 88.9, + "end": 88.98, + "confidence": 0.137 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive.cuda/naive_apollo11.mp3.words.json b/tests/expected/naive.cuda/naive_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1d7628fb1df3720ce20a998436c90ebb59cb86c7 --- /dev/null +++ b/tests/expected/naive.cuda/naive_apollo11.mp3.words.json @@ -0,0 +1,3814 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.64, + "end": 6.84, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7235679626464844, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993799924850464, + "confidence": 0.494, + "words": [ + { + "text": "Apollo", + "start": 0.64, + "end": 0.98, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.98, + "end": 1.52, + "confidence": 0.842 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.78, + "confidence": 0.974 + }, + { + "text": "we", + "start": 1.78, + "end": 1.98, + "confidence": 0.453 + }, + { + "text": "got", + "start": 1.98, + "end": 2.16, + "confidence": 0.791 + }, + { + "text": "a", + "start": 2.16, + "end": 2.32, + "confidence": 0.992 + }, + { + "text": "recommendation", + "start": 2.32, + "end": 3.08, + "confidence": 0.97 + }, + { + "text": "for", + "start": 3.08, + "end": 3.5, + "confidence": 0.945 + }, + { + "text": "you", + "start": 3.5, + "end": 3.72, + "confidence": 0.99 + }, + { + "text": "on", + "start": 3.72, + "end": 4.2, + "confidence": 0.935 + }, + { + "text": "your", + "start": 4.2, + "end": 4.24, + "confidence": 0.974 + }, + { + "text": "Soyuz-VA", + "start": 4.24, + "end": 5.32, + "confidence": 0.327 + }, + { + "text": "GLEME", + "start": 5.32, + "end": 6.02, + "confidence": 0.178 + }, + { + "text": "GVA.", + "start": 6.02, + "end": 6.84, + "confidence": 0.321 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.82, + "end": 19.28, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7235679626464844, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993799924850464, + "confidence": 0.335, + "words": [ + { + "text": "Alright,", + "start": 10.82, + "end": 12.06, + "confidence": 0.126 + }, + { + "text": "okay,", + "start": 12.06, + "end": 13.06, + "confidence": 0.394 + }, + { + "text": "we", + "start": 13.06, + "end": 13.1, + "confidence": 0.478 + }, + { + "text": "like", + "start": 13.1, + "end": 13.48, + "confidence": 0.493 + }, + { + "text": "to", + "start": 13.48, + "end": 13.66, + "confidence": 0.128 + }, + { + "text": "say", + "start": 13.66, + "end": 14.94, + "confidence": 0.084 + }, + { + "text": "that", + "start": 14.94, + "end": 15.54, + "confidence": 0.225 + }, + { + "text": "they", + "start": 15.54, + "end": 15.58, + "confidence": 0.328 + }, + { + "text": "make", + "start": 15.58, + "end": 15.78, + "confidence": 0.162 + }, + { + "text": "the", + "start": 15.78, + "end": 15.92, + "confidence": 0.226 + }, + { + "text": "one", + "start": 15.92, + "end": 16.1, + "confidence": 0.559 + }, + { + "text": "that's", + "start": 16.1, + "end": 16.36, + "confidence": 0.367 + }, + { + "text": "on", + "start": 16.36, + "end": 16.6, + "confidence": 0.387 + }, + { + "text": "the", + "start": 16.6, + "end": 16.84, + "confidence": 0.354 + }, + { + "text": "helmet", + "start": 16.84, + "end": 17.36, + "confidence": 0.205 + }, + { + "text": "we're", + "start": 17.36, + "end": 17.84, + "confidence": 0.297 + }, + { + "text": "going", + "start": 17.84, + "end": 18.04, + "confidence": 0.432 + }, + { + "text": "to", + "start": 18.04, + "end": 18.18, + "confidence": 0.448 + }, + { + "text": "have", + "start": 18.18, + "end": 18.24, + "confidence": 0.761 + }, + { + "text": "in", + "start": 18.24, + "end": 18.46, + "confidence": 0.743 + }, + { + "text": "B1.", + "start": 18.46, + "end": 19.28, + "confidence": 0.753 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 19.38, + "end": 24.7, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7235679626464844, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44993799924850464, + "confidence": 0.123, + "words": [ + { + "text": "And", + "start": 19.38, + "end": 20.14, + "confidence": 0.321 + }, + { + "text": "you", + "start": 20.14, + "end": 20.36, + "confidence": 0.904 + }, + { + "text": "can", + "start": 20.36, + "end": 20.54, + "confidence": 0.363 + }, + { + "text": "put", + "start": 20.54, + "end": 20.72, + "confidence": 0.939 + }, + { + "text": "the", + "start": 20.72, + "end": 20.86, + "confidence": 0.978 + }, + { + "text": "other", + "start": 20.86, + "end": 21.06, + "confidence": 0.995 + }, + { + "text": "one", + "start": 21.06, + "end": 21.26, + "confidence": 0.965 + }, + { + "text": "on", + "start": 21.26, + "end": 21.68, + "confidence": 0.961 + }, + { + "text": "the", + "start": 21.68, + "end": 21.74, + "confidence": 0.199 + }, + { + "text": "mic", + "start": 21.74, + "end": 22.58, + "confidence": 0.694 + }, + { + "text": "helmet", + "start": 22.58, + "end": 22.94, + "confidence": 0.007 + }, + { + "text": "with", + "start": 22.94, + "end": 23.2, + "confidence": 0.059 + }, + { + "text": "those", + "start": 23.2, + "end": 23.46, + "confidence": 0.046 + }, + { + "text": "GVA", + "start": 23.46, + "end": 23.86, + "confidence": 0.001 + }, + { + "text": "blizzard", + "start": 23.86, + "end": 24.32, + "confidence": 0.032 + }, + { + "text": "frames.", + "start": 24.32, + "end": 24.7, + "confidence": 0.231 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 31.36, + "end": 54.68, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.10793833414713541, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.0011014570482075214, + "confidence": 0.887, + "words": [ + { + "text": "Alright,", + "start": 31.36, + "end": 31.98, + "confidence": 0.077 + }, + { + "text": "got", + "start": 31.98, + "end": 32.02, + "confidence": 0.169 + }, + { + "text": "them,", + "start": 32.02, + "end": 32.52, + "confidence": 0.189 + }, + { + "text": "got", + "start": 32.52, + "end": 33.0, + "confidence": 0.429 + }, + { + "text": "them,", + "start": 33.0, + "end": 33.78, + "confidence": 0.613 + }, + { + "text": "got", + "start": 33.78, + "end": 33.82, + "confidence": 0.67 + }, + { + "text": "them,", + "start": 33.82, + "end": 34.56, + "confidence": 0.495 + }, + { + "text": "got", + "start": 34.56, + "end": 34.74, + "confidence": 0.664 + }, + { + "text": "them,", + "start": 34.74, + "end": 34.78, + "confidence": 0.486 + }, + { + "text": "got", + "start": 34.78, + "end": 35.02, + "confidence": 0.756 + }, + { + "text": "them,", + "start": 35.02, + "end": 35.06, + "confidence": 0.565 + }, + { + "text": "got", + "start": 35.06, + "end": 35.1, + "confidence": 0.781 + }, + { + "text": "them,", + "start": 35.1, + "end": 35.14, + "confidence": 0.684 + }, + { + "text": "got", + "start": 35.14, + "end": 35.32, + "confidence": 0.858 + }, + { + "text": "them,", + "start": 35.32, + "end": 35.98, + "confidence": 0.803 + }, + { + "text": "got", + "start": 35.98, + "end": 36.02, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 36.02, + "end": 36.06, + "confidence": 0.767 + }, + { + "text": "got", + "start": 36.06, + "end": 36.1, + "confidence": 0.584 + }, + { + "text": "them,", + "start": 36.1, + "end": 36.14, + "confidence": 0.843 + }, + { + "text": "got", + "start": 36.14, + "end": 36.18, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 36.18, + "end": 36.22, + "confidence": 0.887 + }, + { + "text": "got", + "start": 36.22, + "end": 36.26, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 36.26, + "end": 36.4, + "confidence": 0.903 + }, + { + "text": "got", + "start": 36.4, + "end": 36.44, + "confidence": 0.939 + }, + { + "text": "them,", + "start": 36.44, + "end": 36.48, + "confidence": 0.909 + }, + { + "text": "got", + "start": 36.48, + "end": 36.52, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 36.52, + "end": 36.56, + "confidence": 0.902 + }, + { + "text": "got", + "start": 36.56, + "end": 36.6, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 36.6, + "end": 36.64, + "confidence": 0.904 + }, + { + "text": "got", + "start": 36.64, + "end": 36.68, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 36.68, + "end": 36.72, + "confidence": 0.911 + }, + { + "text": "got", + "start": 36.72, + "end": 36.76, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.76, + "end": 36.8, + "confidence": 0.918 + }, + { + "text": "got", + "start": 36.8, + "end": 36.84, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.84, + "end": 36.88, + "confidence": 0.923 + }, + { + "text": "got", + "start": 36.88, + "end": 36.92, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 36.92, + "end": 36.96, + "confidence": 0.93 + }, + { + "text": "got", + "start": 36.96, + "end": 37.0, + "confidence": 0.938 + }, + { + "text": "them,", + "start": 37.0, + "end": 37.04, + "confidence": 0.936 + }, + { + "text": "got", + "start": 37.04, + "end": 37.08, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 37.08, + "end": 37.12, + "confidence": 0.941 + }, + { + "text": "got", + "start": 37.12, + "end": 37.16, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 37.16, + "end": 37.2, + "confidence": 0.944 + }, + { + "text": "got", + "start": 37.2, + "end": 37.24, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 37.24, + "end": 37.28, + "confidence": 0.947 + }, + { + "text": "got", + "start": 37.28, + "end": 37.32, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 37.32, + "end": 37.36, + "confidence": 0.95 + }, + { + "text": "got", + "start": 37.36, + "end": 37.4, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 37.4, + "end": 37.44, + "confidence": 0.953 + }, + { + "text": "got", + "start": 37.44, + "end": 37.48, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 37.48, + "end": 37.52, + "confidence": 0.956 + }, + { + "text": "got", + "start": 37.52, + "end": 37.56, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 37.56, + "end": 37.6, + "confidence": 0.956 + }, + { + "text": "got", + "start": 37.6, + "end": 37.64, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 37.64, + "end": 37.68, + "confidence": 0.957 + }, + { + "text": "got", + "start": 37.68, + "end": 37.72, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 37.72, + "end": 37.76, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.76, + "end": 37.8, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 37.8, + "end": 37.84, + "confidence": 0.961 + }, + { + "text": "got", + "start": 37.84, + "end": 37.88, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 37.88, + "end": 37.92, + "confidence": 0.962 + }, + { + "text": "got", + "start": 37.92, + "end": 37.96, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 37.96, + "end": 38.0, + "confidence": 0.964 + }, + { + "text": "got", + "start": 38.0, + "end": 38.04, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.04, + "end": 38.08, + "confidence": 0.965 + }, + { + "text": "got", + "start": 38.08, + "end": 38.12, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.12, + "end": 38.16, + "confidence": 0.965 + }, + { + "text": "got", + "start": 38.16, + "end": 38.2, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 38.2, + "end": 38.24, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.24, + "end": 38.28, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.28, + "end": 38.32, + "confidence": 0.969 + }, + { + "text": "got", + "start": 38.32, + "end": 38.36, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.36, + "end": 38.4, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.4, + "end": 38.44, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 38.44, + "end": 38.48, + "confidence": 0.971 + }, + { + "text": "got", + "start": 38.48, + "end": 38.52, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 38.52, + "end": 38.56, + "confidence": 0.972 + }, + { + "text": "got", + "start": 38.56, + "end": 38.6, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 38.6, + "end": 38.64, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.64, + "end": 38.68, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 38.68, + "end": 38.72, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.72, + "end": 38.76, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 38.76, + "end": 38.8, + "confidence": 0.974 + }, + { + "text": "got", + "start": 38.8, + "end": 38.84, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 38.84, + "end": 38.88, + "confidence": 0.976 + }, + { + "text": "got", + "start": 38.88, + "end": 38.92, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 38.92, + "end": 38.96, + "confidence": 0.978 + }, + { + "text": "got", + "start": 38.96, + "end": 39.0, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 39.0, + "end": 39.04, + "confidence": 0.978 + }, + { + "text": "got", + "start": 39.04, + "end": 39.08, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 39.08, + "end": 39.12, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.12, + "end": 39.16, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 39.16, + "end": 39.2, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.2, + "end": 39.24, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 39.24, + "end": 39.28, + "confidence": 0.981 + }, + { + "text": "got", + "start": 39.28, + "end": 39.32, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 39.32, + "end": 39.36, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.36, + "end": 39.4, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 39.4, + "end": 39.44, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.44, + "end": 39.48, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.48, + "end": 39.52, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.52, + "end": 39.56, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.56, + "end": 39.6, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.6, + "end": 39.64, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 39.64, + "end": 39.68, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.68, + "end": 39.72, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.72, + "end": 39.76, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.76, + "end": 39.8, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.8, + "end": 39.84, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.84, + "end": 39.88, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.88, + "end": 39.92, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.92, + "end": 39.96, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.96, + "end": 40.0, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.0, + "end": 40.04, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 40.04, + "end": 40.08, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.08, + "end": 40.12, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.12, + "end": 40.16, + "confidence": 0.986 + }, + { + "text": "got", + "start": 40.16, + "end": 40.2, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 40.2, + "end": 40.24, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.24, + "end": 40.28, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.28, + "end": 40.32, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.32, + "end": 40.36, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.36, + "end": 40.4, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.4, + "end": 40.44, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 40.44, + "end": 40.48, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.48, + "end": 40.52, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.52, + "end": 40.56, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.56, + "end": 40.6, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.6, + "end": 40.64, + "confidence": 0.987 + }, + { + "text": "got", + "start": 40.64, + "end": 40.68, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.68, + "end": 40.86, + "confidence": 0.989 + }, + { + "text": "got", + "start": 40.86, + "end": 40.9, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.9, + "end": 40.94, + "confidence": 0.989 + }, + { + "text": "got", + "start": 40.94, + "end": 40.98, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.98, + "end": 41.24, + "confidence": 0.989 + }, + { + "text": "got", + "start": 41.24, + "end": 41.36, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 41.36, + "end": 42.3, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.3, + "end": 42.5, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 42.5, + "end": 42.98, + "confidence": 0.989 + }, + { + "text": "got", + "start": 42.98, + "end": 44.02, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.02, + "end": 44.42, + "confidence": 0.989 + }, + { + "text": "got", + "start": 44.42, + "end": 44.78, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.78, + "end": 45.16, + "confidence": 0.989 + }, + { + "text": "got", + "start": 45.16, + "end": 46.22, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.22, + "end": 46.52, + "confidence": 0.99 + }, + { + "text": "got", + "start": 46.52, + "end": 46.72, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.72, + "end": 47.72, + "confidence": 0.99 + }, + { + "text": "got", + "start": 47.72, + "end": 48.58, + "confidence": 0.993 + }, + { + "text": "them", + "start": 48.58, + "end": 54.68, + "confidence": 0.996 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 72.0, + "end": 84.66, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.05313390894321048, + "compression_ratio": 29.52, + "no_speech_prob": 0.24548843502998352, + "confidence": 0.84, + "words": [ + { + "text": "got", + "start": 72.0, + "end": 72.04, + "confidence": 0.005 + }, + { + "text": "them,", + "start": 72.04, + "end": 72.08, + "confidence": 0.007 + }, + { + "text": "got", + "start": 72.08, + "end": 72.12, + "confidence": 0.049 + }, + { + "text": "them,", + "start": 72.12, + "end": 72.16, + "confidence": 0.35 + }, + { + "text": "got", + "start": 72.16, + "end": 72.2, + "confidence": 0.65 + }, + { + "text": "them,", + "start": 72.2, + "end": 72.24, + "confidence": 0.549 + }, + { + "text": "got", + "start": 72.24, + "end": 72.28, + "confidence": 0.837 + }, + { + "text": "them,", + "start": 72.28, + "end": 72.32, + "confidence": 0.614 + }, + { + "text": "got", + "start": 72.32, + "end": 72.36, + "confidence": 0.891 + }, + { + "text": "them,", + "start": 72.36, + "end": 72.4, + "confidence": 0.665 + }, + { + "text": "got", + "start": 72.4, + "end": 72.44, + "confidence": 0.909 + }, + { + "text": "them,", + "start": 72.44, + "end": 72.48, + "confidence": 0.689 + }, + { + "text": "got", + "start": 72.48, + "end": 72.52, + "confidence": 0.861 + }, + { + "text": "them,", + "start": 72.52, + "end": 72.56, + "confidence": 0.772 + }, + { + "text": "got", + "start": 72.56, + "end": 72.6, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 72.6, + "end": 72.64, + "confidence": 0.84 + }, + { + "text": "got", + "start": 72.64, + "end": 72.68, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 72.68, + "end": 72.72, + "confidence": 0.633 + }, + { + "text": "got", + "start": 72.72, + "end": 72.76, + "confidence": 0.173 + }, + { + "text": "them,", + "start": 72.76, + "end": 72.8, + "confidence": 0.776 + }, + { + "text": "got", + "start": 72.8, + "end": 72.84, + "confidence": 0.824 + }, + { + "text": "them,", + "start": 72.84, + "end": 72.88, + "confidence": 0.832 + }, + { + "text": "got", + "start": 72.88, + "end": 72.92, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 72.92, + "end": 72.96, + "confidence": 0.849 + }, + { + "text": "got", + "start": 72.96, + "end": 73.0, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 73.0, + "end": 73.04, + "confidence": 0.867 + }, + { + "text": "got", + "start": 73.04, + "end": 73.08, + "confidence": 0.87 + }, + { + "text": "them,", + "start": 73.08, + "end": 73.12, + "confidence": 0.893 + }, + { + "text": "got", + "start": 73.12, + "end": 73.16, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 73.16, + "end": 73.2, + "confidence": 0.905 + }, + { + "text": "got", + "start": 73.2, + "end": 73.24, + "confidence": 0.898 + }, + { + "text": "them,", + "start": 73.24, + "end": 73.28, + "confidence": 0.906 + }, + { + "text": "got", + "start": 73.28, + "end": 73.32, + "confidence": 0.876 + }, + { + "text": "them,", + "start": 73.32, + "end": 73.36, + "confidence": 0.916 + }, + { + "text": "got", + "start": 73.36, + "end": 73.4, + "confidence": 0.885 + }, + { + "text": "them,", + "start": 73.4, + "end": 73.44, + "confidence": 0.923 + }, + { + "text": "got", + "start": 73.44, + "end": 73.48, + "confidence": 0.892 + }, + { + "text": "them,", + "start": 73.48, + "end": 73.52, + "confidence": 0.931 + }, + { + "text": "got", + "start": 73.52, + "end": 73.56, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 73.56, + "end": 73.6, + "confidence": 0.935 + }, + { + "text": "got", + "start": 73.6, + "end": 73.64, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 73.64, + "end": 73.68, + "confidence": 0.941 + }, + { + "text": "got", + "start": 73.68, + "end": 73.72, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 73.72, + "end": 73.76, + "confidence": 0.944 + }, + { + "text": "got", + "start": 73.76, + "end": 73.8, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 73.8, + "end": 73.84, + "confidence": 0.948 + }, + { + "text": "got", + "start": 73.84, + "end": 73.88, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 73.88, + "end": 73.92, + "confidence": 0.95 + }, + { + "text": "got", + "start": 73.92, + "end": 73.96, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 73.96, + "end": 74.0, + "confidence": 0.953 + }, + { + "text": "got", + "start": 74.0, + "end": 74.04, + "confidence": 0.927 + }, + { + "text": "them,", + "start": 74.04, + "end": 74.08, + "confidence": 0.956 + }, + { + "text": "got", + "start": 74.08, + "end": 74.12, + "confidence": 0.927 + }, + { + "text": "them,", + "start": 74.12, + "end": 74.16, + "confidence": 0.956 + }, + { + "text": "got", + "start": 74.16, + "end": 74.2, + "confidence": 0.928 + }, + { + "text": "them,", + "start": 74.2, + "end": 74.24, + "confidence": 0.957 + }, + { + "text": "got", + "start": 74.24, + "end": 74.28, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.28, + "end": 74.32, + "confidence": 0.959 + }, + { + "text": "got", + "start": 74.32, + "end": 74.36, + "confidence": 0.93 + }, + { + "text": "them,", + "start": 74.36, + "end": 74.4, + "confidence": 0.96 + }, + { + "text": "got", + "start": 74.4, + "end": 74.44, + "confidence": 0.931 + }, + { + "text": "them,", + "start": 74.44, + "end": 74.48, + "confidence": 0.961 + }, + { + "text": "got", + "start": 74.48, + "end": 74.52, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 74.52, + "end": 74.56, + "confidence": 0.962 + }, + { + "text": "got", + "start": 74.56, + "end": 74.6, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 74.6, + "end": 74.64, + "confidence": 0.963 + }, + { + "text": "got", + "start": 74.64, + "end": 74.68, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 74.68, + "end": 74.72, + "confidence": 0.963 + }, + { + "text": "got", + "start": 74.72, + "end": 74.76, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 74.76, + "end": 74.8, + "confidence": 0.964 + }, + { + "text": "got", + "start": 74.8, + "end": 74.84, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 74.84, + "end": 74.88, + "confidence": 0.964 + }, + { + "text": "got", + "start": 74.88, + "end": 74.92, + "confidence": 0.939 + }, + { + "text": "them,", + "start": 74.92, + "end": 74.96, + "confidence": 0.965 + }, + { + "text": "got", + "start": 74.96, + "end": 75.0, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 75.0, + "end": 75.04, + "confidence": 0.966 + }, + { + "text": "got", + "start": 75.04, + "end": 75.08, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 75.08, + "end": 75.12, + "confidence": 0.967 + }, + { + "text": "got", + "start": 75.12, + "end": 75.16, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 75.16, + "end": 75.2, + "confidence": 0.968 + }, + { + "text": "got", + "start": 75.2, + "end": 75.24, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 75.24, + "end": 75.28, + "confidence": 0.968 + }, + { + "text": "got", + "start": 75.28, + "end": 75.32, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 75.32, + "end": 75.36, + "confidence": 0.969 + }, + { + "text": "got", + "start": 75.36, + "end": 75.4, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 75.4, + "end": 75.44, + "confidence": 0.969 + }, + { + "text": "got", + "start": 75.44, + "end": 75.48, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 75.48, + "end": 75.52, + "confidence": 0.97 + }, + { + "text": "got", + "start": 75.52, + "end": 75.56, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 75.56, + "end": 75.6, + "confidence": 0.971 + }, + { + "text": "got", + "start": 75.6, + "end": 75.64, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 75.64, + "end": 75.68, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.68, + "end": 75.72, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 75.72, + "end": 75.76, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.76, + "end": 75.8, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 75.8, + "end": 75.84, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.84, + "end": 75.88, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 75.88, + "end": 75.92, + "confidence": 0.973 + }, + { + "text": "got", + "start": 75.92, + "end": 75.96, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 75.96, + "end": 76.0, + "confidence": 0.975 + }, + { + "text": "got", + "start": 76.0, + "end": 76.04, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 76.04, + "end": 76.08, + "confidence": 0.975 + }, + { + "text": "got", + "start": 76.08, + "end": 76.12, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 76.12, + "end": 76.16, + "confidence": 0.976 + }, + { + "text": "got", + "start": 76.16, + "end": 76.2, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.2, + "end": 76.24, + "confidence": 0.976 + }, + { + "text": "got", + "start": 76.24, + "end": 76.28, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 76.28, + "end": 76.32, + "confidence": 0.975 + }, + { + "text": "got", + "start": 76.32, + "end": 76.36, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 76.36, + "end": 76.4, + "confidence": 0.977 + }, + { + "text": "got", + "start": 76.4, + "end": 76.44, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 76.44, + "end": 76.48, + "confidence": 0.977 + }, + { + "text": "got", + "start": 76.48, + "end": 76.52, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 76.52, + "end": 76.56, + "confidence": 0.978 + }, + { + "text": "got", + "start": 76.56, + "end": 76.6, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 76.6, + "end": 76.64, + "confidence": 0.978 + }, + { + "text": "got", + "start": 76.64, + "end": 76.68, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 76.68, + "end": 76.72, + "confidence": 0.979 + }, + { + "text": "got", + "start": 76.72, + "end": 76.76, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 76.76, + "end": 76.8, + "confidence": 0.978 + }, + { + "text": "got", + "start": 76.8, + "end": 76.84, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 76.84, + "end": 76.88, + "confidence": 0.98 + }, + { + "text": "got", + "start": 76.88, + "end": 76.92, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 76.92, + "end": 76.96, + "confidence": 0.98 + }, + { + "text": "got", + "start": 76.96, + "end": 77.0, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 77.0, + "end": 77.04, + "confidence": 0.98 + }, + { + "text": "got", + "start": 77.04, + "end": 77.08, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 77.08, + "end": 77.12, + "confidence": 0.98 + }, + { + "text": "got", + "start": 77.12, + "end": 77.16, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 77.16, + "end": 77.2, + "confidence": 0.98 + }, + { + "text": "got", + "start": 77.2, + "end": 77.24, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 77.24, + "end": 77.28, + "confidence": 0.981 + }, + { + "text": "got", + "start": 77.28, + "end": 77.32, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 77.32, + "end": 77.36, + "confidence": 0.982 + }, + { + "text": "got", + "start": 77.36, + "end": 77.4, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 77.4, + "end": 77.44, + "confidence": 0.983 + }, + { + "text": "got", + "start": 77.44, + "end": 77.48, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 77.48, + "end": 77.52, + "confidence": 0.982 + }, + { + "text": "got", + "start": 77.52, + "end": 77.56, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 77.56, + "end": 77.6, + "confidence": 0.984 + }, + { + "text": "got", + "start": 77.6, + "end": 77.64, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 77.64, + "end": 78.74, + "confidence": 0.983 + }, + { + "text": "got", + "start": 78.74, + "end": 78.8, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 78.8, + "end": 79.8, + "confidence": 0.984 + }, + { + "text": "got", + "start": 79.8, + "end": 80.08, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 80.08, + "end": 83.98, + "confidence": 0.984 + }, + { + "text": "got", + "start": 83.98, + "end": 84.16, + "confidence": 0.991 + }, + { + "text": "them", + "start": 84.16, + "end": 84.66, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 84.68, + "end": 94.48, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04979915789959142, + "compression_ratio": 29.52, + "no_speech_prob": 0.6971050500869751, + "confidence": 0.848, + "words": [ + { + "text": "got", + "start": 84.68, + "end": 85.42, + "confidence": 0.0 + }, + { + "text": "them,", + "start": 85.42, + "end": 85.92, + "confidence": 0.023 + }, + { + "text": "got", + "start": 85.92, + "end": 86.74, + "confidence": 0.821 + }, + { + "text": "them,", + "start": 86.74, + "end": 87.2, + "confidence": 0.604 + }, + { + "text": "got", + "start": 87.2, + "end": 88.62, + "confidence": 0.84 + }, + { + "text": "them,", + "start": 88.62, + "end": 88.66, + "confidence": 0.451 + }, + { + "text": "got", + "start": 88.66, + "end": 88.84, + "confidence": 0.887 + }, + { + "text": "them,", + "start": 88.84, + "end": 88.88, + "confidence": 0.382 + }, + { + "text": "got", + "start": 88.88, + "end": 88.92, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 88.92, + "end": 88.96, + "confidence": 0.353 + }, + { + "text": "got", + "start": 88.96, + "end": 89.0, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 89.0, + "end": 89.04, + "confidence": 0.352 + }, + { + "text": "got", + "start": 89.04, + "end": 89.08, + "confidence": 0.869 + }, + { + "text": "them,", + "start": 89.08, + "end": 89.12, + "confidence": 0.423 + }, + { + "text": "got", + "start": 89.12, + "end": 89.16, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 89.16, + "end": 89.2, + "confidence": 0.528 + }, + { + "text": "got", + "start": 89.2, + "end": 89.24, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 89.24, + "end": 89.28, + "confidence": 0.619 + }, + { + "text": "got", + "start": 89.28, + "end": 89.32, + "confidence": 0.65 + }, + { + "text": "them,", + "start": 89.32, + "end": 89.36, + "confidence": 0.71 + }, + { + "text": "got", + "start": 89.36, + "end": 89.4, + "confidence": 0.835 + }, + { + "text": "them,", + "start": 89.4, + "end": 89.44, + "confidence": 0.815 + }, + { + "text": "got", + "start": 89.44, + "end": 89.48, + "confidence": 0.908 + }, + { + "text": "them,", + "start": 89.48, + "end": 89.52, + "confidence": 0.874 + }, + { + "text": "got", + "start": 89.52, + "end": 89.56, + "confidence": 0.925 + }, + { + "text": "them,", + "start": 89.56, + "end": 89.6, + "confidence": 0.907 + }, + { + "text": "got", + "start": 89.6, + "end": 89.64, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.64, + "end": 89.68, + "confidence": 0.927 + }, + { + "text": "got", + "start": 89.68, + "end": 89.72, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 89.72, + "end": 89.76, + "confidence": 0.941 + }, + { + "text": "got", + "start": 89.76, + "end": 89.8, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 89.8, + "end": 89.84, + "confidence": 0.942 + }, + { + "text": "got", + "start": 89.84, + "end": 89.88, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 89.88, + "end": 89.92, + "confidence": 0.943 + }, + { + "text": "got", + "start": 89.92, + "end": 89.96, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 89.96, + "end": 90.0, + "confidence": 0.945 + }, + { + "text": "got", + "start": 90.0, + "end": 90.04, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 90.04, + "end": 90.08, + "confidence": 0.952 + }, + { + "text": "got", + "start": 90.08, + "end": 90.12, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 90.12, + "end": 90.16, + "confidence": 0.955 + }, + { + "text": "got", + "start": 90.16, + "end": 90.2, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 90.2, + "end": 90.24, + "confidence": 0.961 + }, + { + "text": "got", + "start": 90.24, + "end": 90.28, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 90.28, + "end": 90.32, + "confidence": 0.964 + }, + { + "text": "got", + "start": 90.32, + "end": 90.36, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 90.36, + "end": 90.4, + "confidence": 0.967 + }, + { + "text": "got", + "start": 90.4, + "end": 90.44, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 90.44, + "end": 90.48, + "confidence": 0.97 + }, + { + "text": "got", + "start": 90.48, + "end": 90.52, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 90.52, + "end": 90.56, + "confidence": 0.973 + }, + { + "text": "got", + "start": 90.56, + "end": 90.6, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 90.6, + "end": 90.64, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.64, + "end": 90.68, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 90.68, + "end": 90.72, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.72, + "end": 90.76, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 90.76, + "end": 90.8, + "confidence": 0.978 + }, + { + "text": "got", + "start": 90.8, + "end": 90.84, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 90.84, + "end": 90.88, + "confidence": 0.979 + }, + { + "text": "got", + "start": 90.88, + "end": 90.92, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 90.92, + "end": 90.96, + "confidence": 0.981 + }, + { + "text": "got", + "start": 90.96, + "end": 91.0, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 91.0, + "end": 91.04, + "confidence": 0.981 + }, + { + "text": "got", + "start": 91.04, + "end": 91.08, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 91.08, + "end": 91.12, + "confidence": 0.982 + }, + { + "text": "got", + "start": 91.12, + "end": 91.16, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 91.16, + "end": 91.2, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.2, + "end": 91.24, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 91.24, + "end": 91.28, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.28, + "end": 91.32, + "confidence": 0.96 + }, + { + "text": "them,", + "start": 91.32, + "end": 91.36, + "confidence": 0.984 + }, + { + "text": "got", + "start": 91.36, + "end": 91.4, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 91.4, + "end": 91.44, + "confidence": 0.983 + }, + { + "text": "got", + "start": 91.44, + "end": 91.48, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 91.48, + "end": 91.52, + "confidence": 0.985 + }, + { + "text": "got", + "start": 91.52, + "end": 91.56, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 91.56, + "end": 91.6, + "confidence": 0.985 + }, + { + "text": "got", + "start": 91.6, + "end": 91.64, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 91.64, + "end": 91.68, + "confidence": 0.986 + }, + { + "text": "got", + "start": 91.68, + "end": 91.72, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 91.72, + "end": 91.76, + "confidence": 0.986 + }, + { + "text": "got", + "start": 91.76, + "end": 91.8, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 91.8, + "end": 91.84, + "confidence": 0.987 + }, + { + "text": "got", + "start": 91.84, + "end": 91.88, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 91.88, + "end": 91.92, + "confidence": 0.987 + }, + { + "text": "got", + "start": 91.92, + "end": 91.96, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 91.96, + "end": 92.0, + "confidence": 0.987 + }, + { + "text": "got", + "start": 92.0, + "end": 92.04, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 92.04, + "end": 92.08, + "confidence": 0.988 + }, + { + "text": "got", + "start": 92.08, + "end": 92.12, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 92.12, + "end": 92.16, + "confidence": 0.988 + }, + { + "text": "got", + "start": 92.16, + "end": 92.2, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 92.2, + "end": 92.24, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.24, + "end": 92.28, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 92.28, + "end": 92.32, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.32, + "end": 92.36, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 92.36, + "end": 92.4, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.4, + "end": 92.44, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 92.44, + "end": 92.48, + "confidence": 0.989 + }, + { + "text": "got", + "start": 92.48, + "end": 92.52, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 92.52, + "end": 92.56, + "confidence": 0.99 + }, + { + "text": "got", + "start": 92.56, + "end": 92.6, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 92.6, + "end": 92.64, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.64, + "end": 92.68, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 92.68, + "end": 92.72, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.72, + "end": 92.76, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 92.76, + "end": 92.8, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.8, + "end": 92.84, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 92.84, + "end": 92.88, + "confidence": 0.991 + }, + { + "text": "got", + "start": 92.88, + "end": 92.92, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 92.92, + "end": 92.96, + "confidence": 0.992 + }, + { + "text": "got", + "start": 92.96, + "end": 93.0, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 93.0, + "end": 93.04, + "confidence": 0.992 + }, + { + "text": "got", + "start": 93.04, + "end": 93.08, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 93.08, + "end": 93.12, + "confidence": 0.992 + }, + { + "text": "got", + "start": 93.12, + "end": 93.16, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 93.16, + "end": 93.2, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.2, + "end": 93.24, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.24, + "end": 93.28, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.28, + "end": 93.32, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.32, + "end": 93.36, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.36, + "end": 93.4, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.4, + "end": 93.44, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.44, + "end": 93.48, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.48, + "end": 93.52, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.52, + "end": 93.56, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 93.56, + "end": 93.6, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.6, + "end": 93.64, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.64, + "end": 93.68, + "confidence": 0.993 + }, + { + "text": "got", + "start": 93.68, + "end": 93.72, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.72, + "end": 93.76, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.76, + "end": 93.8, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.8, + "end": 93.84, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.84, + "end": 93.88, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.88, + "end": 93.92, + "confidence": 0.994 + }, + { + "text": "got", + "start": 93.92, + "end": 93.96, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 93.96, + "end": 94.0, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.0, + "end": 94.04, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 94.04, + "end": 94.08, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.08, + "end": 94.12, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 94.12, + "end": 94.16, + "confidence": 0.995 + }, + { + "text": "got", + "start": 94.16, + "end": 94.2, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 94.2, + "end": 94.24, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.24, + "end": 94.28, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 94.28, + "end": 94.32, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.32, + "end": 94.36, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 94.36, + "end": 94.4, + "confidence": 0.994 + }, + { + "text": "got", + "start": 94.4, + "end": 94.44, + "confidence": 0.995 + }, + { + "text": "them", + "start": 94.44, + "end": 94.48, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive/accurate_apollo11.mp3.words.json b/tests/expected/naive/accurate_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f0cf1ef6d3d7467988994c5788156f699c361d05 --- /dev/null +++ b/tests/expected/naive/accurate_apollo11.mp3.words.json @@ -0,0 +1,2421 @@ +{ + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A. All right. Okay. Yeah, sir. I'll take that camera. They make the one that's on the helmet we were going to have in B-1. The other one. And you can put the other one on Mike's helmet. We'll show it to you in a blink of an eye. Over. All right, sir. All right, sir. There's a bit of an helmet that's in B-1. That's the other one, Mike's helmet. Mike, go in there. At least we're safe. We've got him in there. Helmet tag. And, uh, we've got him in there. Yeah. Helmet. In the helmet tag. At least it's in the helmet tag. Roger. Roger. We're thinking Nick and me on it. Yeah, we're thinking Nick and me on it. See you later. Hey, we were going to hang me on it. We were going to hang you on it with a cover. I tried it already. OK, fine. We weren't sure of that. It's just a suggestion. We thought we'd take you to check it out. It's not much of an order to turn that. So, uh, I guess we're going to come up with this. Let us know. OK, no problem. OK, no problem. We'll let you know when the end of the lunch. Thank you.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.54, + "end": 6.66, + "text": " Apollo 11, Houston, we got a recommendation for you on your Soyuz-EA GLEM-E-G-E-A.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 11, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 36, + 32, + 460, + 2634, + 44, + 12, + 36, + 12, + 38, + 12, + 36, + 12, + 32, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.578, + "words": [ + { + "text": "Apollo", + "start": 0.54, + "end": 0.92, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.92, + "end": 1.26, + "confidence": 0.842 + }, + { + "text": "Houston,", + "start": 1.52, + "end": 1.72, + "confidence": 0.665 + }, + { + "text": "we", + "start": 1.9, + "end": 1.94, + "confidence": 0.983 + }, + { + "text": "got", + "start": 1.94, + "end": 2.1, + "confidence": 0.743 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.993 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.977 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.94 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.993 + }, + { + "text": "on", + "start": 3.6, + "end": 3.74, + "confidence": 0.918 + }, + { + "text": "your", + "start": 3.74, + "end": 3.94, + "confidence": 0.976 + }, + { + "text": "Soyuz-EA", + "start": 3.94, + "end": 5.26, + "confidence": 0.455 + }, + { + "text": "GLEM-E-G-E-A.", + "start": 5.26, + "end": 6.66, + "confidence": 0.436 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 11.08, + "text": " All right.", + "tokens": [ + 50714, + 1057, + 558, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.14, + "words": [ + { + "text": "All", + "start": 10.8, + "end": 10.82, + "confidence": 0.005 + }, + { + "text": "right.", + "start": 10.82, + "end": 11.08, + "confidence": 0.747 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.2, + "end": 12.22, + "text": " Okay.", + "tokens": [ + 50914, + 1033, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.287, + "words": [ + { + "text": "Okay.", + "start": 12.2, + "end": 12.22, + "confidence": 0.287 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 12.22, + "end": 13.38, + "text": " Yeah, sir.", + "tokens": [ + 50964, + 865, + 11, + 4735, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.124, + "words": [ + { + "text": "Yeah,", + "start": 12.22, + "end": 12.64, + "confidence": 0.113 + }, + { + "text": "sir.", + "start": 12.76, + "end": 13.38, + "confidence": 0.137 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.38, + "end": 14.5, + "text": " I'll take that camera.", + "tokens": [ + 51014, + 286, + 603, + 747, + 300, + 2799, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.187, + "words": [ + { + "text": "I'll", + "start": 13.38, + "end": 13.78, + "confidence": 0.076 + }, + { + "text": "take", + "start": 13.78, + "end": 14.0, + "confidence": 0.182 + }, + { + "text": "that", + "start": 14.0, + "end": 14.18, + "confidence": 0.179 + }, + { + "text": "camera.", + "start": 14.18, + "end": 14.5, + "confidence": 0.471 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 14.5, + "end": 19.08, + "text": " They make the one that's on the helmet we were going to have in B-1.", + "tokens": [ + 51114, + 814, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 645, + 516, + 281, + 362, + 294, + 363, + 12, + 16, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.262, + "words": [ + { + "text": "They", + "start": 14.5, + "end": 14.92, + "confidence": 0.036 + }, + { + "text": "make", + "start": 14.92, + "end": 15.7, + "confidence": 0.356 + }, + { + "text": "the", + "start": 15.7, + "end": 15.86, + "confidence": 0.081 + }, + { + "text": "one", + "start": 15.86, + "end": 16.08, + "confidence": 0.553 + }, + { + "text": "that's", + "start": 16.08, + "end": 16.26, + "confidence": 0.408 + }, + { + "text": "on", + "start": 16.26, + "end": 16.46, + "confidence": 0.746 + }, + { + "text": "the", + "start": 16.46, + "end": 16.74, + "confidence": 0.445 + }, + { + "text": "helmet", + "start": 16.74, + "end": 17.24, + "confidence": 0.026 + }, + { + "text": "we", + "start": 17.24, + "end": 17.52, + "confidence": 0.006 + }, + { + "text": "were", + "start": 17.52, + "end": 17.78, + "confidence": 0.095 + }, + { + "text": "going", + "start": 17.78, + "end": 17.94, + "confidence": 0.451 + }, + { + "text": "to", + "start": 17.94, + "end": 17.98, + "confidence": 0.969 + }, + { + "text": "have", + "start": 17.98, + "end": 18.16, + "confidence": 0.965 + }, + { + "text": "in", + "start": 18.16, + "end": 18.36, + "confidence": 0.8 + }, + { + "text": "B-1.", + "start": 18.36, + "end": 19.08, + "confidence": 0.479 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 19.08, + "end": 19.84, + "text": " The other one.", + "tokens": [ + 51314, + 440, + 661, + 472, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.126, + "words": [ + { + "text": "The", + "start": 19.08, + "end": 19.38, + "confidence": 0.004 + }, + { + "text": "other", + "start": 19.38, + "end": 19.62, + "confidence": 0.141 + }, + { + "text": "one.", + "start": 19.62, + "end": 19.84, + "confidence": 0.691 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 20.04, + "end": 22.82, + "text": " And you can put the other one on Mike's helmet.", + "tokens": [ + 51364, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 6602, + 311, + 15922, + 13, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.523, + "words": [ + { + "text": "And", + "start": 20.04, + "end": 20.22, + "confidence": 0.344 + }, + { + "text": "you", + "start": 20.22, + "end": 20.32, + "confidence": 0.892 + }, + { + "text": "can", + "start": 20.32, + "end": 20.48, + "confidence": 0.126 + }, + { + "text": "put", + "start": 20.48, + "end": 20.64, + "confidence": 0.894 + }, + { + "text": "the", + "start": 20.64, + "end": 20.84, + "confidence": 0.979 + }, + { + "text": "other", + "start": 20.84, + "end": 21.0, + "confidence": 0.996 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.972 + }, + { + "text": "on", + "start": 21.18, + "end": 21.38, + "confidence": 0.969 + }, + { + "text": "Mike's", + "start": 21.38, + "end": 22.72, + "confidence": 0.232 + }, + { + "text": "helmet.", + "start": 22.72, + "end": 22.82, + "confidence": 0.494 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 22.82, + "end": 24.76, + "text": " We'll show it to you in a blink of an eye.", + "tokens": [ + 51514, + 492, + 603, + 855, + 309, + 281, + 291, + 294, + 257, + 24667, + 295, + 364, + 3313, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.19, + "words": [ + { + "text": "We'll", + "start": 22.82, + "end": 23.08, + "confidence": 0.07 + }, + { + "text": "show", + "start": 23.08, + "end": 23.24, + "confidence": 0.005 + }, + { + "text": "it", + "start": 23.24, + "end": 23.38, + "confidence": 0.133 + }, + { + "text": "to", + "start": 23.38, + "end": 23.52, + "confidence": 0.369 + }, + { + "text": "you", + "start": 23.52, + "end": 23.66, + "confidence": 0.663 + }, + { + "text": "in", + "start": 23.66, + "end": 23.86, + "confidence": 0.277 + }, + { + "text": "a", + "start": 23.86, + "end": 23.88, + "confidence": 0.752 + }, + { + "text": "blink", + "start": 23.88, + "end": 24.06, + "confidence": 0.004 + }, + { + "text": "of", + "start": 24.06, + "end": 24.28, + "confidence": 0.929 + }, + { + "text": "an", + "start": 24.28, + "end": 24.44, + "confidence": 0.943 + }, + { + "text": "eye.", + "start": 24.44, + "end": 24.76, + "confidence": 0.89 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 24.86, + "end": 25.1, + "text": " Over.", + "tokens": [ + 51614, + 4886, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.5374142219280374, + "compression_ratio": 1.4337899543378996, + "no_speech_prob": 0.44995221495628357, + "confidence": 0.117, + "words": [ + { + "text": "Over.", + "start": 24.86, + "end": 25.1, + "confidence": 0.117 + } + ] + }, + { + "id": 10, + "seek": 3000, + "start": 31.3, + "end": 32.08, + "text": " All right, sir.", + "tokens": [ + 50414, + 1057, + 558, + 11, + 4735, + 13, + 50514 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.106, + "words": [ + { + "text": "All", + "start": 31.3, + "end": 31.52, + "confidence": 0.022 + }, + { + "text": "right,", + "start": 31.52, + "end": 31.62, + "confidence": 0.647 + }, + { + "text": "sir.", + "start": 31.74, + "end": 32.08, + "confidence": 0.038 + } + ] + }, + { + "id": 11, + "seek": 3000, + "start": 32.62, + "end": 33.3, + "text": " All right, sir.", + "tokens": [ + 50514, + 1057, + 558, + 11, + 4735, + 13, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.044, + "words": [ + { + "text": "All", + "start": 32.62, + "end": 32.84, + "confidence": 0.014 + }, + { + "text": "right,", + "start": 32.84, + "end": 32.92, + "confidence": 0.831 + }, + { + "text": "sir.", + "start": 33.08, + "end": 33.3, + "confidence": 0.004 + } + ] + }, + { + "id": 12, + "seek": 3000, + "start": 33.5, + "end": 34.98, + "text": " There's a bit of an helmet that's in B-1.", + "tokens": [ + 50564, + 821, + 311, + 257, + 857, + 295, + 364, + 15922, + 300, + 311, + 294, + 363, + 12, + 16, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.078, + "words": [ + { + "text": "There's", + "start": 33.5, + "end": 33.58, + "confidence": 0.05 + }, + { + "text": "a", + "start": 33.58, + "end": 33.6, + "confidence": 0.352 + }, + { + "text": "bit", + "start": 33.6, + "end": 33.74, + "confidence": 0.164 + }, + { + "text": "of", + "start": 33.74, + "end": 33.88, + "confidence": 0.681 + }, + { + "text": "an", + "start": 33.88, + "end": 34.02, + "confidence": 0.374 + }, + { + "text": "helmet", + "start": 34.02, + "end": 34.2, + "confidence": 0.002 + }, + { + "text": "that's", + "start": 34.2, + "end": 34.46, + "confidence": 0.207 + }, + { + "text": "in", + "start": 34.46, + "end": 34.64, + "confidence": 0.439 + }, + { + "text": "B-1.", + "start": 34.64, + "end": 34.98, + "confidence": 0.021 + } + ] + }, + { + "id": 13, + "seek": 3000, + "start": 35.5, + "end": 36.44, + "text": " That's the other one, Mike's helmet.", + "tokens": [ + 50664, + 663, + 311, + 264, + 661, + 472, + 11, + 6602, + 311, + 15922, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.036, + "words": [ + { + "text": "That's", + "start": 35.5, + "end": 35.72, + "confidence": 0.114 + }, + { + "text": "the", + "start": 35.72, + "end": 35.74, + "confidence": 0.009 + }, + { + "text": "other", + "start": 35.74, + "end": 35.76, + "confidence": 0.011 + }, + { + "text": "one,", + "start": 35.76, + "end": 35.82, + "confidence": 0.531 + }, + { + "text": "Mike's", + "start": 35.92, + "end": 36.2, + "confidence": 0.074 + }, + { + "text": "helmet.", + "start": 36.2, + "end": 36.44, + "confidence": 0.001 + } + ] + }, + { + "id": 14, + "seek": 3000, + "start": 37.64, + "end": 38.36, + "text": " Mike, go in there.", + "tokens": [ + 50764, + 6602, + 11, + 352, + 294, + 456, + 13, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.15, + "words": [ + { + "text": "Mike,", + "start": 37.64, + "end": 37.84, + "confidence": 0.06 + }, + { + "text": "go", + "start": 37.98, + "end": 38.12, + "confidence": 0.784 + }, + { + "text": "in", + "start": 38.12, + "end": 38.24, + "confidence": 0.081 + }, + { + "text": "there.", + "start": 38.24, + "end": 38.36, + "confidence": 0.221 + } + ] + }, + { + "id": 15, + "seek": 3000, + "start": 38.52, + "end": 39.22, + "text": " At least we're safe.", + "tokens": [ + 50814, + 1711, + 1935, + 321, + 434, + 3273, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.193, + "words": [ + { + "text": "At", + "start": 38.52, + "end": 38.72, + "confidence": 0.008 + }, + { + "text": "least", + "start": 38.72, + "end": 38.84, + "confidence": 0.893 + }, + { + "text": "we're", + "start": 38.84, + "end": 39.0, + "confidence": 0.112 + }, + { + "text": "safe.", + "start": 39.0, + "end": 39.22, + "confidence": 0.753 + } + ] + }, + { + "id": 16, + "seek": 3000, + "start": 39.86, + "end": 40.98, + "text": " We've got him in there.", + "tokens": [ + 50864, + 492, + 600, + 658, + 796, + 294, + 456, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.454, + "words": [ + { + "text": "We've", + "start": 39.86, + "end": 40.38, + "confidence": 0.178 + }, + { + "text": "got", + "start": 40.38, + "end": 40.52, + "confidence": 0.888 + }, + { + "text": "him", + "start": 40.52, + "end": 40.64, + "confidence": 0.481 + }, + { + "text": "in", + "start": 40.64, + "end": 40.74, + "confidence": 0.89 + }, + { + "text": "there.", + "start": 40.74, + "end": 40.98, + "confidence": 0.576 + } + ] + }, + { + "id": 17, + "seek": 3000, + "start": 41.5, + "end": 41.94, + "text": " Helmet tag.", + "tokens": [ + 50964, + 6128, + 5537, + 6162, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.015, + "words": [ + { + "text": "Helmet", + "start": 41.5, + "end": 41.64, + "confidence": 0.002 + }, + { + "text": "tag.", + "start": 41.64, + "end": 41.94, + "confidence": 0.094 + } + ] + }, + { + "id": 18, + "seek": 3000, + "start": 42.5, + "end": 44.68, + "text": " And, uh, we've got him in there.", + "tokens": [ + 51014, + 400, + 11, + 2232, + 11, + 321, + 600, + 658, + 796, + 294, + 456, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.088, + "words": [ + { + "text": "And,", + "start": 42.5, + "end": 43.34, + "confidence": 0.22 + }, + { + "text": "uh,", + "start": 43.62, + "end": 43.76, + "confidence": 0.725 + }, + { + "text": "we've", + "start": 43.88, + "end": 44.16, + "confidence": 0.044 + }, + { + "text": "got", + "start": 44.16, + "end": 44.52, + "confidence": 0.258 + }, + { + "text": "him", + "start": 44.52, + "end": 44.54, + "confidence": 0.001 + }, + { + "text": "in", + "start": 44.54, + "end": 44.56, + "confidence": 0.194 + }, + { + "text": "there.", + "start": 44.56, + "end": 44.68, + "confidence": 0.032 + } + ] + }, + { + "id": 19, + "seek": 3000, + "start": 44.68, + "end": 45.4, + "text": " Yeah.", + "tokens": [ + 51114, + 865, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.195, + "words": [ + { + "text": "Yeah.", + "start": 44.68, + "end": 45.4, + "confidence": 0.195 + } + ] + }, + { + "id": 20, + "seek": 3000, + "start": 46.22, + "end": 46.58, + "text": " Helmet.", + "tokens": [ + 51164, + 6128, + 5537, + 13, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.015, + "words": [ + { + "text": "Helmet.", + "start": 46.22, + "end": 46.58, + "confidence": 0.015 + } + ] + }, + { + "id": 21, + "seek": 3000, + "start": 46.58, + "end": 47.4, + "text": " In the helmet tag.", + "tokens": [ + 51214, + 682, + 264, + 15922, + 6162, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.044, + "words": [ + { + "text": "In", + "start": 46.58, + "end": 46.76, + "confidence": 0.008 + }, + { + "text": "the", + "start": 46.76, + "end": 46.92, + "confidence": 0.416 + }, + { + "text": "helmet", + "start": 46.92, + "end": 47.08, + "confidence": 0.146 + }, + { + "text": "tag.", + "start": 47.08, + "end": 47.4, + "confidence": 0.019 + } + ] + }, + { + "id": 22, + "seek": 3000, + "start": 48.2, + "end": 49.52, + "text": " At least it's in the helmet tag.", + "tokens": [ + 51264, + 1711, + 1935, + 309, + 311, + 294, + 264, + 15922, + 6162, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.097, + "words": [ + { + "text": "At", + "start": 48.2, + "end": 48.34, + "confidence": 0.095 + }, + { + "text": "least", + "start": 48.34, + "end": 48.6, + "confidence": 0.76 + }, + { + "text": "it's", + "start": 48.6, + "end": 48.86, + "confidence": 0.339 + }, + { + "text": "in", + "start": 48.86, + "end": 48.96, + "confidence": 0.151 + }, + { + "text": "the", + "start": 48.96, + "end": 49.1, + "confidence": 0.223 + }, + { + "text": "helmet", + "start": 49.1, + "end": 49.18, + "confidence": 0.006 + }, + { + "text": "tag.", + "start": 49.18, + "end": 49.52, + "confidence": 0.021 + } + ] + }, + { + "id": 23, + "seek": 3000, + "start": 50.12, + "end": 50.44, + "text": " Roger.", + "tokens": [ + 51364, + 17666, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.07, + "words": [ + { + "text": "Roger.", + "start": 50.12, + "end": 50.44, + "confidence": 0.07 + } + ] + }, + { + "id": 24, + "seek": 3000, + "start": 51.52, + "end": 51.74, + "text": " Roger.", + "tokens": [ + 51414, + 17666, + 13, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.006, + "words": [ + { + "text": "Roger.", + "start": 51.52, + "end": 51.74, + "confidence": 0.006 + } + ] + }, + { + "id": 25, + "seek": 3000, + "start": 51.74, + "end": 53.0, + "text": " We're thinking Nick and me on it.", + "tokens": [ + 51464, + 492, + 434, + 1953, + 9449, + 293, + 385, + 322, + 309, + 13, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.018, + "words": [ + { + "text": "We're", + "start": 51.74, + "end": 52.26, + "confidence": 0.046 + }, + { + "text": "thinking", + "start": 52.26, + "end": 52.28, + "confidence": 0.0 + }, + { + "text": "Nick", + "start": 52.28, + "end": 52.54, + "confidence": 0.0 + }, + { + "text": "and", + "start": 52.54, + "end": 52.66, + "confidence": 0.156 + }, + { + "text": "me", + "start": 52.66, + "end": 52.76, + "confidence": 0.021 + }, + { + "text": "on", + "start": 52.76, + "end": 52.92, + "confidence": 0.846 + }, + { + "text": "it.", + "start": 52.92, + "end": 53.0, + "confidence": 0.341 + } + ] + }, + { + "id": 26, + "seek": 3000, + "start": 53.06, + "end": 54.32, + "text": " Yeah, we're thinking Nick and me on it.", + "tokens": [ + 51514, + 865, + 11, + 321, + 434, + 1953, + 9449, + 293, + 385, + 322, + 309, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.156, + "words": [ + { + "text": "Yeah,", + "start": 53.06, + "end": 53.26, + "confidence": 0.698 + }, + { + "text": "we're", + "start": 53.36, + "end": 53.44, + "confidence": 0.676 + }, + { + "text": "thinking", + "start": 53.44, + "end": 53.62, + "confidence": 0.723 + }, + { + "text": "Nick", + "start": 53.62, + "end": 53.86, + "confidence": 0.0 + }, + { + "text": "and", + "start": 53.86, + "end": 54.02, + "confidence": 0.012 + }, + { + "text": "me", + "start": 54.02, + "end": 54.08, + "confidence": 0.039 + }, + { + "text": "on", + "start": 54.08, + "end": 54.22, + "confidence": 0.153 + }, + { + "text": "it.", + "start": 54.22, + "end": 54.32, + "confidence": 0.534 + } + ] + }, + { + "id": 27, + "seek": 3000, + "start": 54.5, + "end": 55.3, + "text": " See you later.", + "tokens": [ + 51614, + 3008, + 291, + 1780, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.261, + "words": [ + { + "text": "See", + "start": 54.5, + "end": 55.06, + "confidence": 0.018 + }, + { + "text": "you", + "start": 55.06, + "end": 55.18, + "confidence": 0.609 + }, + { + "text": "later.", + "start": 55.18, + "end": 55.3, + "confidence": 0.655 + } + ] + }, + { + "id": 28, + "seek": 3000, + "start": 56.46, + "end": 57.9, + "text": " Hey, we were going to hang me on it.", + "tokens": [ + 51664, + 1911, + 11, + 321, + 645, + 516, + 281, + 3967, + 385, + 322, + 309, + 13, + 51764 + ], + "temperature": 0.0, + "avg_logprob": -0.4253494538456561, + "compression_ratio": 1.9318181818181819, + "no_speech_prob": 0.06506537646055222, + "confidence": 0.311, + "words": [ + { + "text": "Hey,", + "start": 56.46, + "end": 56.6, + "confidence": 0.248 + }, + { + "text": "we", + "start": 56.66, + "end": 56.76, + "confidence": 0.807 + }, + { + "text": "were", + "start": 56.76, + "end": 56.9, + "confidence": 0.278 + }, + { + "text": "going", + "start": 56.9, + "end": 57.12, + "confidence": 0.21 + }, + { + "text": "to", + "start": 57.12, + "end": 57.26, + "confidence": 0.816 + }, + { + "text": "hang", + "start": 57.26, + "end": 57.36, + "confidence": 0.051 + }, + { + "text": "me", + "start": 57.36, + "end": 57.58, + "confidence": 0.374 + }, + { + "text": "on", + "start": 57.58, + "end": 57.76, + "confidence": 0.893 + }, + { + "text": "it.", + "start": 57.76, + "end": 57.9, + "confidence": 0.256 + } + ] + }, + { + "id": 29, + "seek": 5800, + "start": 57.9, + "end": 61.04, + "text": " We were going to hang you on it with a cover.", + "tokens": [ + 50364, + 492, + 645, + 516, + 281, + 3967, + 291, + 322, + 309, + 365, + 257, + 2060, + 13, + 50514 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.218, + "words": [ + { + "text": "We", + "start": 57.9, + "end": 58.04, + "confidence": 0.272 + }, + { + "text": "were", + "start": 58.04, + "end": 58.22, + "confidence": 0.281 + }, + { + "text": "going", + "start": 58.22, + "end": 58.46, + "confidence": 0.207 + }, + { + "text": "to", + "start": 58.46, + "end": 58.62, + "confidence": 0.12 + }, + { + "text": "hang", + "start": 58.62, + "end": 58.72, + "confidence": 0.458 + }, + { + "text": "you", + "start": 58.72, + "end": 58.94, + "confidence": 0.028 + }, + { + "text": "on", + "start": 58.94, + "end": 59.16, + "confidence": 0.86 + }, + { + "text": "it", + "start": 59.16, + "end": 59.46, + "confidence": 0.016 + }, + { + "text": "with", + "start": 59.46, + "end": 59.92, + "confidence": 0.336 + }, + { + "text": "a", + "start": 59.92, + "end": 60.36, + "confidence": 0.464 + }, + { + "text": "cover.", + "start": 60.36, + "end": 61.04, + "confidence": 0.466 + } + ] + }, + { + "id": 30, + "seek": 5800, + "start": 61.04, + "end": 61.8, + "text": " I tried it already.", + "tokens": [ + 50514, + 286, + 3031, + 309, + 1217, + 13, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.164, + "words": [ + { + "text": "I", + "start": 61.04, + "end": 61.16, + "confidence": 0.016 + }, + { + "text": "tried", + "start": 61.16, + "end": 61.44, + "confidence": 0.193 + }, + { + "text": "it", + "start": 61.44, + "end": 61.68, + "confidence": 0.089 + }, + { + "text": "already.", + "start": 61.68, + "end": 61.8, + "confidence": 0.659 + } + ] + }, + { + "id": 31, + "seek": 5800, + "start": 62.52, + "end": 63.06, + "text": " OK, fine.", + "tokens": [ + 50564, + 2264, + 11, + 2489, + 13, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.353, + "words": [ + { + "text": "OK,", + "start": 62.52, + "end": 62.76, + "confidence": 0.189 + }, + { + "text": "fine.", + "start": 62.88, + "end": 63.06, + "confidence": 0.657 + } + ] + }, + { + "id": 32, + "seek": 5800, + "start": 63.06, + "end": 63.98, + "text": " We weren't sure of that.", + "tokens": [ + 50614, + 492, + 4999, + 380, + 988, + 295, + 300, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.694, + "words": [ + { + "text": "We", + "start": 63.06, + "end": 63.26, + "confidence": 0.33 + }, + { + "text": "weren't", + "start": 63.26, + "end": 63.56, + "confidence": 0.844 + }, + { + "text": "sure", + "start": 63.56, + "end": 63.72, + "confidence": 0.907 + }, + { + "text": "of", + "start": 63.72, + "end": 63.84, + "confidence": 0.648 + }, + { + "text": "that.", + "start": 63.84, + "end": 63.98, + "confidence": 0.75 + } + ] + }, + { + "id": 33, + "seek": 5800, + "start": 63.98, + "end": 65.1, + "text": " It's just a suggestion.", + "tokens": [ + 50664, + 467, + 311, + 445, + 257, + 16541, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.203, + "words": [ + { + "text": "It's", + "start": 63.98, + "end": 64.6, + "confidence": 0.043 + }, + { + "text": "just", + "start": 64.6, + "end": 64.66, + "confidence": 0.141 + }, + { + "text": "a", + "start": 64.66, + "end": 64.78, + "confidence": 0.742 + }, + { + "text": "suggestion.", + "start": 64.78, + "end": 65.1, + "confidence": 0.602 + } + ] + }, + { + "id": 34, + "seek": 5800, + "start": 65.1, + "end": 67.7, + "text": " We thought we'd take you to check it out.", + "tokens": [ + 50714, + 492, + 1194, + 321, + 1116, + 747, + 291, + 281, + 1520, + 309, + 484, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.488, + "words": [ + { + "text": "We", + "start": 65.1, + "end": 65.32, + "confidence": 0.138 + }, + { + "text": "thought", + "start": 65.32, + "end": 65.54, + "confidence": 0.817 + }, + { + "text": "we'd", + "start": 65.54, + "end": 65.9, + "confidence": 0.781 + }, + { + "text": "take", + "start": 65.9, + "end": 66.14, + "confidence": 0.01 + }, + { + "text": "you", + "start": 66.14, + "end": 67.08, + "confidence": 0.821 + }, + { + "text": "to", + "start": 67.08, + "end": 67.24, + "confidence": 0.961 + }, + { + "text": "check", + "start": 67.24, + "end": 67.4, + "confidence": 0.834 + }, + { + "text": "it", + "start": 67.4, + "end": 67.58, + "confidence": 0.952 + }, + { + "text": "out.", + "start": 67.58, + "end": 67.7, + "confidence": 0.922 + } + ] + }, + { + "id": 35, + "seek": 5800, + "start": 68.2, + "end": 69.26, + "text": " It's not much of an order to turn that.", + "tokens": [ + 50864, + 467, + 311, + 406, + 709, + 295, + 364, + 1668, + 281, + 1261, + 300, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.141, + "words": [ + { + "text": "It's", + "start": 68.2, + "end": 68.32, + "confidence": 0.065 + }, + { + "text": "not", + "start": 68.32, + "end": 68.4, + "confidence": 0.234 + }, + { + "text": "much", + "start": 68.4, + "end": 68.54, + "confidence": 0.283 + }, + { + "text": "of", + "start": 68.54, + "end": 68.68, + "confidence": 0.509 + }, + { + "text": "an", + "start": 68.68, + "end": 68.84, + "confidence": 0.278 + }, + { + "text": "order", + "start": 68.84, + "end": 68.88, + "confidence": 0.073 + }, + { + "text": "to", + "start": 68.88, + "end": 69.02, + "confidence": 0.058 + }, + { + "text": "turn", + "start": 69.02, + "end": 69.12, + "confidence": 0.179 + }, + { + "text": "that.", + "start": 69.12, + "end": 69.26, + "confidence": 0.121 + } + ] + }, + { + "id": 36, + "seek": 5800, + "start": 69.26, + "end": 71.92, + "text": " So, uh, I guess we're going to come up with this.", + "tokens": [ + 50914, + 407, + 11, + 2232, + 11, + 286, + 2041, + 321, + 434, + 516, + 281, + 808, + 493, + 365, + 341, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.459, + "words": [ + { + "text": "So,", + "start": 69.26, + "end": 69.6, + "confidence": 0.443 + }, + { + "text": "uh,", + "start": 69.7, + "end": 69.84, + "confidence": 0.424 + }, + { + "text": "I", + "start": 70.36, + "end": 70.5, + "confidence": 0.955 + }, + { + "text": "guess", + "start": 70.5, + "end": 70.66, + "confidence": 0.982 + }, + { + "text": "we're", + "start": 70.66, + "end": 71.1, + "confidence": 0.523 + }, + { + "text": "going", + "start": 71.1, + "end": 71.24, + "confidence": 0.114 + }, + { + "text": "to", + "start": 71.24, + "end": 71.36, + "confidence": 0.944 + }, + { + "text": "come", + "start": 71.36, + "end": 71.44, + "confidence": 0.418 + }, + { + "text": "up", + "start": 71.44, + "end": 71.62, + "confidence": 0.738 + }, + { + "text": "with", + "start": 71.62, + "end": 71.78, + "confidence": 0.876 + }, + { + "text": "this.", + "start": 71.78, + "end": 71.92, + "confidence": 0.179 + } + ] + }, + { + "id": 37, + "seek": 5800, + "start": 71.92, + "end": 72.46, + "text": " Let us know.", + "tokens": [ + 51064, + 961, + 505, + 458, + 13, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.334, + "words": [ + { + "text": "Let", + "start": 71.92, + "end": 72.08, + "confidence": 0.052 + }, + { + "text": "us", + "start": 72.08, + "end": 72.24, + "confidence": 0.376 + }, + { + "text": "know.", + "start": 72.24, + "end": 72.46, + "confidence": 0.8 + } + ] + }, + { + "id": 38, + "seek": 5800, + "start": 73.68, + "end": 75.18, + "text": " OK, no problem.", + "tokens": [ + 51164, + 2264, + 11, + 572, + 1154, + 13, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.129, + "words": [ + { + "text": "OK,", + "start": 73.68, + "end": 74.48, + "confidence": 0.019 + }, + { + "text": "no", + "start": 74.58, + "end": 74.86, + "confidence": 0.131 + }, + { + "text": "problem.", + "start": 74.86, + "end": 75.18, + "confidence": 0.843 + } + ] + }, + { + "id": 39, + "seek": 5800, + "start": 75.18, + "end": 76.46, + "text": " OK, no problem.", + "tokens": [ + 51214, + 2264, + 11, + 572, + 1154, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.46, + "words": [ + { + "text": "OK,", + "start": 75.18, + "end": 75.8, + "confidence": 0.175 + }, + { + "text": "no", + "start": 75.94, + "end": 76.18, + "confidence": 0.945 + }, + { + "text": "problem.", + "start": 76.18, + "end": 76.46, + "confidence": 0.844 + } + ] + }, + { + "id": 40, + "seek": 5800, + "start": 76.46, + "end": 78.3, + "text": " We'll let you know when the end of the lunch.", + "tokens": [ + 51264, + 492, + 603, + 718, + 291, + 458, + 562, + 264, + 917, + 295, + 264, + 6349, + 13, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.2743584679775551, + "compression_ratio": 1.613953488372093, + "no_speech_prob": 0.03351878747344017, + "confidence": 0.382, + "words": [ + { + "text": "We'll", + "start": 76.46, + "end": 76.76, + "confidence": 0.07 + }, + { + "text": "let", + "start": 76.76, + "end": 76.94, + "confidence": 0.496 + }, + { + "text": "you", + "start": 76.94, + "end": 77.1, + "confidence": 0.452 + }, + { + "text": "know", + "start": 77.1, + "end": 77.2, + "confidence": 0.982 + }, + { + "text": "when", + "start": 77.2, + "end": 77.32, + "confidence": 0.642 + }, + { + "text": "the", + "start": 77.32, + "end": 77.4, + "confidence": 0.838 + }, + { + "text": "end", + "start": 77.4, + "end": 77.52, + "confidence": 0.951 + }, + { + "text": "of", + "start": 77.52, + "end": 77.64, + "confidence": 0.984 + }, + { + "text": "the", + "start": 77.64, + "end": 78.12, + "confidence": 0.35 + }, + { + "text": "lunch.", + "start": 78.12, + "end": 78.3, + "confidence": 0.225 + } + ] + }, + { + "id": 41, + "seek": 8800, + "start": 88.82, + "end": 88.98, + "text": " Thank you.", + "tokens": [ + 50364, + 1044, + 291, + 13, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.5118929942448934, + "compression_ratio": 0.5555555555555556, + "no_speech_prob": 0.987617552280426, + "confidence": 0.361, + "words": [ + { + "text": "Thank", + "start": 88.82, + "end": 88.94, + "confidence": 0.073 + }, + { + "text": "you.", + "start": 88.94, + "end": 88.98, + "confidence": 0.803 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/naive/naive_apollo11.mp3.words.json b/tests/expected/naive/naive_apollo11.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e00c676200523f5674122cc04df12bc79a397390 --- /dev/null +++ b/tests/expected/naive/naive_apollo11.mp3.words.json @@ -0,0 +1,3820 @@ +{ + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA. Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1. And you can put the other one on the mic helmet with those GVA blizzard frames. Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.54, + "end": 6.56, + "text": " Apollo 11, Houston we got a recommendation for you on your Soyuz-VA GLEME GVA.", + "tokens": [ + 50364, + 25187, + 2975, + 11, + 18717, + 321, + 658, + 257, + 11879, + 337, + 291, + 322, + 428, + 24758, + 3334, + 12, + 20914, + 460, + 2634, + 15454, + 460, + 20914, + 13, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.7225993307013261, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.494, + "words": [ + { + "text": "Apollo", + "start": 0.54, + "end": 0.92, + "confidence": 0.425 + }, + { + "text": "11,", + "start": 0.92, + "end": 1.26, + "confidence": 0.842 + }, + { + "text": "Houston", + "start": 1.52, + "end": 1.72, + "confidence": 0.974 + }, + { + "text": "we", + "start": 1.72, + "end": 1.92, + "confidence": 0.453 + }, + { + "text": "got", + "start": 1.92, + "end": 2.1, + "confidence": 0.791 + }, + { + "text": "a", + "start": 2.1, + "end": 2.26, + "confidence": 0.992 + }, + { + "text": "recommendation", + "start": 2.26, + "end": 2.86, + "confidence": 0.97 + }, + { + "text": "for", + "start": 2.86, + "end": 3.44, + "confidence": 0.945 + }, + { + "text": "you", + "start": 3.44, + "end": 3.6, + "confidence": 0.99 + }, + { + "text": "on", + "start": 3.6, + "end": 3.72, + "confidence": 0.935 + }, + { + "text": "your", + "start": 3.72, + "end": 3.92, + "confidence": 0.974 + }, + { + "text": "Soyuz-VA", + "start": 3.92, + "end": 5.16, + "confidence": 0.327 + }, + { + "text": "GLEME", + "start": 5.16, + "end": 5.72, + "confidence": 0.178 + }, + { + "text": "GVA.", + "start": 5.72, + "end": 6.56, + "confidence": 0.321 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 10.8, + "end": 19.08, + "text": " Alright, okay, we like to say that they make the one that's on the helmet we're going to have in B1.", + "tokens": [ + 50714, + 2798, + 11, + 1392, + 11, + 321, + 411, + 281, + 584, + 300, + 436, + 652, + 264, + 472, + 300, + 311, + 322, + 264, + 15922, + 321, + 434, + 516, + 281, + 362, + 294, + 363, + 16, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.7225993307013261, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.311, + "words": [ + { + "text": "Alright,", + "start": 10.8, + "end": 10.82, + "confidence": 0.055 + }, + { + "text": "okay,", + "start": 11.46, + "end": 12.2, + "confidence": 0.359 + }, + { + "text": "we", + "start": 12.44, + "end": 12.96, + "confidence": 0.347 + }, + { + "text": "like", + "start": 12.96, + "end": 13.28, + "confidence": 0.512 + }, + { + "text": "to", + "start": 13.28, + "end": 13.6, + "confidence": 0.156 + }, + { + "text": "say", + "start": 13.6, + "end": 14.9, + "confidence": 0.086 + }, + { + "text": "that", + "start": 14.9, + "end": 15.32, + "confidence": 0.273 + }, + { + "text": "they", + "start": 15.32, + "end": 15.46, + "confidence": 0.358 + }, + { + "text": "make", + "start": 15.46, + "end": 15.7, + "confidence": 0.127 + }, + { + "text": "the", + "start": 15.7, + "end": 15.86, + "confidence": 0.165 + }, + { + "text": "one", + "start": 15.86, + "end": 16.08, + "confidence": 0.428 + }, + { + "text": "that's", + "start": 16.08, + "end": 16.26, + "confidence": 0.357 + }, + { + "text": "on", + "start": 16.26, + "end": 16.46, + "confidence": 0.314 + }, + { + "text": "the", + "start": 16.46, + "end": 16.76, + "confidence": 0.498 + }, + { + "text": "helmet", + "start": 16.76, + "end": 17.26, + "confidence": 0.306 + }, + { + "text": "we're", + "start": 17.26, + "end": 17.78, + "confidence": 0.256 + }, + { + "text": "going", + "start": 17.78, + "end": 17.94, + "confidence": 0.478 + }, + { + "text": "to", + "start": 17.94, + "end": 18.1, + "confidence": 0.522 + }, + { + "text": "have", + "start": 18.1, + "end": 18.18, + "confidence": 0.804 + }, + { + "text": "in", + "start": 18.18, + "end": 18.36, + "confidence": 0.772 + }, + { + "text": "B1.", + "start": 18.36, + "end": 19.08, + "confidence": 0.738 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 19.08, + "end": 24.54, + "text": " And you can put the other one on the mic helmet with those GVA blizzard frames.", + "tokens": [ + 51314, + 400, + 291, + 393, + 829, + 264, + 661, + 472, + 322, + 264, + 3123, + 15922, + 365, + 729, + 460, + 20914, + 888, + 31062, + 12083, + 13, + 51614 + ], + "temperature": 0.0, + "avg_logprob": -0.7225993307013261, + "compression_ratio": 1.39247311827957, + "no_speech_prob": 0.44998496770858765, + "confidence": 0.129, + "words": [ + { + "text": "And", + "start": 19.08, + "end": 20.14, + "confidence": 0.232 + }, + { + "text": "you", + "start": 20.14, + "end": 20.32, + "confidence": 0.905 + }, + { + "text": "can", + "start": 20.32, + "end": 20.5, + "confidence": 0.422 + }, + { + "text": "put", + "start": 20.5, + "end": 20.64, + "confidence": 0.942 + }, + { + "text": "the", + "start": 20.64, + "end": 20.82, + "confidence": 0.983 + }, + { + "text": "other", + "start": 20.82, + "end": 21.0, + "confidence": 0.994 + }, + { + "text": "one", + "start": 21.0, + "end": 21.18, + "confidence": 0.965 + }, + { + "text": "on", + "start": 21.18, + "end": 21.36, + "confidence": 0.965 + }, + { + "text": "the", + "start": 21.36, + "end": 21.78, + "confidence": 0.187 + }, + { + "text": "mic", + "start": 21.78, + "end": 22.48, + "confidence": 0.586 + }, + { + "text": "helmet", + "start": 22.48, + "end": 22.82, + "confidence": 0.029 + }, + { + "text": "with", + "start": 22.82, + "end": 23.06, + "confidence": 0.047 + }, + { + "text": "those", + "start": 23.06, + "end": 23.3, + "confidence": 0.036 + }, + { + "text": "GVA", + "start": 23.3, + "end": 23.74, + "confidence": 0.001 + }, + { + "text": "blizzard", + "start": 23.74, + "end": 24.18, + "confidence": 0.022 + }, + { + "text": "frames.", + "start": 24.18, + "end": 24.54, + "confidence": 0.225 + } + ] + }, + { + "id": 3, + "seek": 2500, + "start": 31.34, + "end": 54.52, + "text": " Alright, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 2798, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.11149242824978299, + "compression_ratio": 24.096774193548388, + "no_speech_prob": 0.4302051365375519, + "confidence": 0.885, + "words": [ + { + "text": "Alright,", + "start": 31.34, + "end": 31.52, + "confidence": 0.069 + }, + { + "text": "got", + "start": 31.82, + "end": 31.86, + "confidence": 0.144 + }, + { + "text": "them,", + "start": 31.86, + "end": 32.1, + "confidence": 0.194 + }, + { + "text": "got", + "start": 32.38, + "end": 32.88, + "confidence": 0.473 + }, + { + "text": "them,", + "start": 32.88, + "end": 33.48, + "confidence": 0.611 + }, + { + "text": "got", + "start": 33.5, + "end": 33.52, + "confidence": 0.574 + }, + { + "text": "them,", + "start": 33.52, + "end": 33.54, + "confidence": 0.51 + }, + { + "text": "got", + "start": 33.76, + "end": 33.78, + "confidence": 0.599 + }, + { + "text": "them,", + "start": 33.78, + "end": 33.88, + "confidence": 0.488 + }, + { + "text": "got", + "start": 34.12, + "end": 34.14, + "confidence": 0.724 + }, + { + "text": "them,", + "start": 34.14, + "end": 34.16, + "confidence": 0.567 + }, + { + "text": "got", + "start": 34.16, + "end": 34.18, + "confidence": 0.761 + }, + { + "text": "them,", + "start": 34.18, + "end": 34.34, + "confidence": 0.689 + }, + { + "text": "got", + "start": 34.34, + "end": 34.36, + "confidence": 0.851 + }, + { + "text": "them,", + "start": 34.36, + "end": 34.5, + "confidence": 0.813 + }, + { + "text": "got", + "start": 34.5, + "end": 34.64, + "confidence": 0.916 + }, + { + "text": "them,", + "start": 34.64, + "end": 34.66, + "confidence": 0.756 + }, + { + "text": "got", + "start": 34.82, + "end": 34.92, + "confidence": 0.467 + }, + { + "text": "them,", + "start": 34.92, + "end": 35.0, + "confidence": 0.851 + }, + { + "text": "got", + "start": 35.0, + "end": 35.02, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 35.02, + "end": 35.1, + "confidence": 0.897 + }, + { + "text": "got", + "start": 35.1, + "end": 35.28, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 35.28, + "end": 35.64, + "confidence": 0.911 + }, + { + "text": "got", + "start": 35.66, + "end": 35.68, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 35.68, + "end": 35.7, + "confidence": 0.917 + }, + { + "text": "got", + "start": 35.74, + "end": 35.76, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 35.76, + "end": 35.78, + "confidence": 0.915 + }, + { + "text": "got", + "start": 36.04, + "end": 36.36, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 36.36, + "end": 36.68, + "confidence": 0.91 + }, + { + "text": "got", + "start": 36.68, + "end": 36.7, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 36.7, + "end": 36.72, + "confidence": 0.916 + }, + { + "text": "got", + "start": 36.72, + "end": 36.74, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 36.74, + "end": 36.76, + "confidence": 0.92 + }, + { + "text": "got", + "start": 36.76, + "end": 36.78, + "confidence": 0.926 + }, + { + "text": "them,", + "start": 36.78, + "end": 36.8, + "confidence": 0.924 + }, + { + "text": "got", + "start": 36.8, + "end": 36.82, + "confidence": 0.932 + }, + { + "text": "them,", + "start": 36.82, + "end": 36.84, + "confidence": 0.93 + }, + { + "text": "got", + "start": 36.84, + "end": 36.86, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 36.86, + "end": 36.88, + "confidence": 0.937 + }, + { + "text": "got", + "start": 36.88, + "end": 37.46, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 37.46, + "end": 37.82, + "confidence": 0.942 + }, + { + "text": "got", + "start": 37.82, + "end": 37.84, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 37.84, + "end": 38.12, + "confidence": 0.945 + }, + { + "text": "got", + "start": 38.12, + "end": 38.14, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 38.14, + "end": 38.16, + "confidence": 0.948 + }, + { + "text": "got", + "start": 38.16, + "end": 38.18, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 38.18, + "end": 38.2, + "confidence": 0.951 + }, + { + "text": "got", + "start": 38.2, + "end": 38.22, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 38.22, + "end": 38.54, + "confidence": 0.953 + }, + { + "text": "got", + "start": 38.54, + "end": 38.56, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 38.56, + "end": 38.58, + "confidence": 0.956 + }, + { + "text": "got", + "start": 38.58, + "end": 38.6, + "confidence": 0.952 + }, + { + "text": "them,", + "start": 38.6, + "end": 38.62, + "confidence": 0.956 + }, + { + "text": "got", + "start": 38.62, + "end": 38.64, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 38.64, + "end": 38.66, + "confidence": 0.958 + }, + { + "text": "got", + "start": 38.66, + "end": 38.68, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.68, + "end": 38.7, + "confidence": 0.961 + }, + { + "text": "got", + "start": 38.7, + "end": 38.72, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 38.72, + "end": 38.74, + "confidence": 0.961 + }, + { + "text": "got", + "start": 38.74, + "end": 38.76, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.76, + "end": 38.78, + "confidence": 0.962 + }, + { + "text": "got", + "start": 38.78, + "end": 38.8, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 38.8, + "end": 38.82, + "confidence": 0.964 + }, + { + "text": "got", + "start": 38.82, + "end": 38.84, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 38.84, + "end": 38.86, + "confidence": 0.964 + }, + { + "text": "got", + "start": 38.86, + "end": 38.88, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 38.88, + "end": 38.9, + "confidence": 0.965 + }, + { + "text": "got", + "start": 38.9, + "end": 38.92, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 38.92, + "end": 38.94, + "confidence": 0.968 + }, + { + "text": "got", + "start": 38.94, + "end": 38.96, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 38.96, + "end": 38.98, + "confidence": 0.969 + }, + { + "text": "got", + "start": 38.98, + "end": 39.0, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 39.0, + "end": 39.02, + "confidence": 0.968 + }, + { + "text": "got", + "start": 39.02, + "end": 39.04, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 39.04, + "end": 39.06, + "confidence": 0.971 + }, + { + "text": "got", + "start": 39.06, + "end": 39.08, + "confidence": 0.967 + }, + { + "text": "them,", + "start": 39.08, + "end": 39.1, + "confidence": 0.971 + }, + { + "text": "got", + "start": 39.1, + "end": 39.12, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 39.12, + "end": 39.14, + "confidence": 0.974 + }, + { + "text": "got", + "start": 39.14, + "end": 39.16, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 39.16, + "end": 39.18, + "confidence": 0.974 + }, + { + "text": "got", + "start": 39.18, + "end": 39.2, + "confidence": 0.972 + }, + { + "text": "them,", + "start": 39.2, + "end": 39.22, + "confidence": 0.975 + }, + { + "text": "got", + "start": 39.22, + "end": 39.24, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 39.24, + "end": 39.26, + "confidence": 0.976 + }, + { + "text": "got", + "start": 39.26, + "end": 39.28, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 39.28, + "end": 39.3, + "confidence": 0.978 + }, + { + "text": "got", + "start": 39.3, + "end": 39.32, + "confidence": 0.978 + }, + { + "text": "them,", + "start": 39.32, + "end": 39.34, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.34, + "end": 39.36, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 39.36, + "end": 39.38, + "confidence": 0.979 + }, + { + "text": "got", + "start": 39.38, + "end": 39.4, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 39.4, + "end": 39.42, + "confidence": 0.98 + }, + { + "text": "got", + "start": 39.42, + "end": 39.44, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 39.44, + "end": 39.46, + "confidence": 0.981 + }, + { + "text": "got", + "start": 39.46, + "end": 39.48, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 39.48, + "end": 39.5, + "confidence": 0.982 + }, + { + "text": "got", + "start": 39.5, + "end": 39.52, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 39.52, + "end": 39.54, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.54, + "end": 39.56, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 39.56, + "end": 39.58, + "confidence": 0.984 + }, + { + "text": "got", + "start": 39.58, + "end": 39.6, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 39.6, + "end": 39.62, + "confidence": 0.983 + }, + { + "text": "got", + "start": 39.62, + "end": 39.64, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.64, + "end": 39.66, + "confidence": 0.984 + }, + { + "text": "got", + "start": 39.66, + "end": 39.68, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 39.68, + "end": 39.7, + "confidence": 0.985 + }, + { + "text": "got", + "start": 39.7, + "end": 39.72, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.72, + "end": 39.74, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.74, + "end": 39.76, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 39.76, + "end": 39.78, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.78, + "end": 39.8, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.8, + "end": 39.82, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.82, + "end": 39.84, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 39.84, + "end": 39.86, + "confidence": 0.986 + }, + { + "text": "got", + "start": 39.86, + "end": 39.88, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 39.88, + "end": 39.9, + "confidence": 0.987 + }, + { + "text": "got", + "start": 39.9, + "end": 39.92, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 39.92, + "end": 39.94, + "confidence": 0.987 + }, + { + "text": "got", + "start": 39.94, + "end": 39.96, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 39.96, + "end": 39.98, + "confidence": 0.988 + }, + { + "text": "got", + "start": 39.98, + "end": 40.0, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.0, + "end": 40.02, + "confidence": 0.988 + }, + { + "text": "got", + "start": 40.02, + "end": 40.42, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 40.42, + "end": 40.68, + "confidence": 0.988 + }, + { + "text": "got", + "start": 41.04, + "end": 41.06, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 41.06, + "end": 41.54, + "confidence": 0.988 + }, + { + "text": "got", + "start": 41.88, + "end": 41.9, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 41.9, + "end": 42.48, + "confidence": 0.987 + }, + { + "text": "got", + "start": 42.68, + "end": 42.7, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 42.7, + "end": 43.0, + "confidence": 0.989 + }, + { + "text": "got", + "start": 43.54, + "end": 44.06, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 44.06, + "end": 45.16, + "confidence": 0.989 + }, + { + "text": "got", + "start": 45.16, + "end": 45.46, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 45.46, + "end": 45.8, + "confidence": 0.99 + }, + { + "text": "got", + "start": 45.82, + "end": 46.22, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 46.22, + "end": 46.38, + "confidence": 0.989 + }, + { + "text": "got", + "start": 46.6, + "end": 47.0, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 47.0, + "end": 47.76, + "confidence": 0.99 + }, + { + "text": "got", + "start": 48.18, + "end": 48.46, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 48.46, + "end": 48.9, + "confidence": 0.99 + }, + { + "text": "got", + "start": 48.9, + "end": 49.2, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 49.2, + "end": 49.84, + "confidence": 0.99 + }, + { + "text": "got", + "start": 50.36, + "end": 50.38, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 50.38, + "end": 50.76, + "confidence": 0.99 + }, + { + "text": "got", + "start": 51.5, + "end": 51.74, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 51.74, + "end": 52.34, + "confidence": 0.99 + }, + { + "text": "got", + "start": 52.44, + "end": 53.28, + "confidence": 0.994 + }, + { + "text": "them", + "start": 53.28, + "end": 54.52, + "confidence": 0.996 + } + ] + }, + { + "id": 4, + "seek": 5500, + "start": 54.52, + "end": 84.46, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.05316366529250893, + "compression_ratio": 29.52, + "no_speech_prob": 0.2454655021429062, + "confidence": 0.844, + "words": [ + { + "text": "got", + "start": 54.52, + "end": 54.82, + "confidence": 0.002 + }, + { + "text": "them,", + "start": 54.82, + "end": 54.84, + "confidence": 0.016 + }, + { + "text": "got", + "start": 54.84, + "end": 55.14, + "confidence": 0.039 + }, + { + "text": "them,", + "start": 55.14, + "end": 55.56, + "confidence": 0.421 + }, + { + "text": "got", + "start": 55.56, + "end": 56.58, + "confidence": 0.645 + }, + { + "text": "them,", + "start": 56.58, + "end": 56.6, + "confidence": 0.595 + }, + { + "text": "got", + "start": 56.6, + "end": 57.36, + "confidence": 0.78 + }, + { + "text": "them,", + "start": 57.36, + "end": 57.52, + "confidence": 0.639 + }, + { + "text": "got", + "start": 57.76, + "end": 58.22, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 58.22, + "end": 58.26, + "confidence": 0.668 + }, + { + "text": "got", + "start": 58.74, + "end": 58.78, + "confidence": 0.876 + }, + { + "text": "them,", + "start": 58.78, + "end": 59.46, + "confidence": 0.672 + }, + { + "text": "got", + "start": 59.88, + "end": 59.9, + "confidence": 0.836 + }, + { + "text": "them,", + "start": 59.9, + "end": 60.32, + "confidence": 0.74 + }, + { + "text": "got", + "start": 60.52, + "end": 60.6, + "confidence": 0.884 + }, + { + "text": "them,", + "start": 60.6, + "end": 60.7, + "confidence": 0.814 + }, + { + "text": "got", + "start": 60.8, + "end": 60.98, + "confidence": 0.911 + }, + { + "text": "them,", + "start": 60.98, + "end": 61.78, + "confidence": 0.567 + }, + { + "text": "got", + "start": 61.78, + "end": 61.8, + "confidence": 0.122 + }, + { + "text": "them,", + "start": 61.8, + "end": 61.82, + "confidence": 0.748 + }, + { + "text": "got", + "start": 61.82, + "end": 61.84, + "confidence": 0.75 + }, + { + "text": "them,", + "start": 61.84, + "end": 61.86, + "confidence": 0.818 + }, + { + "text": "got", + "start": 61.86, + "end": 61.88, + "confidence": 0.797 + }, + { + "text": "them,", + "start": 61.88, + "end": 61.9, + "confidence": 0.841 + }, + { + "text": "got", + "start": 61.9, + "end": 61.92, + "confidence": 0.81 + }, + { + "text": "them,", + "start": 61.92, + "end": 61.94, + "confidence": 0.864 + }, + { + "text": "got", + "start": 61.94, + "end": 61.96, + "confidence": 0.846 + }, + { + "text": "them,", + "start": 61.96, + "end": 61.98, + "confidence": 0.893 + }, + { + "text": "got", + "start": 61.98, + "end": 62.0, + "confidence": 0.889 + }, + { + "text": "them,", + "start": 62.0, + "end": 62.02, + "confidence": 0.91 + }, + { + "text": "got", + "start": 62.02, + "end": 62.04, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 62.04, + "end": 62.06, + "confidence": 0.912 + }, + { + "text": "got", + "start": 62.06, + "end": 62.08, + "confidence": 0.877 + }, + { + "text": "them,", + "start": 62.08, + "end": 62.1, + "confidence": 0.923 + }, + { + "text": "got", + "start": 62.1, + "end": 62.12, + "confidence": 0.888 + }, + { + "text": "them,", + "start": 62.12, + "end": 62.14, + "confidence": 0.932 + }, + { + "text": "got", + "start": 62.14, + "end": 62.16, + "confidence": 0.896 + }, + { + "text": "them,", + "start": 62.16, + "end": 62.18, + "confidence": 0.94 + }, + { + "text": "got", + "start": 62.5, + "end": 62.72, + "confidence": 0.906 + }, + { + "text": "them,", + "start": 62.72, + "end": 62.74, + "confidence": 0.945 + }, + { + "text": "got", + "start": 62.74, + "end": 62.76, + "confidence": 0.912 + }, + { + "text": "them,", + "start": 62.76, + "end": 62.78, + "confidence": 0.95 + }, + { + "text": "got", + "start": 62.78, + "end": 62.8, + "confidence": 0.917 + }, + { + "text": "them,", + "start": 62.8, + "end": 62.82, + "confidence": 0.954 + }, + { + "text": "got", + "start": 62.82, + "end": 62.84, + "confidence": 0.923 + }, + { + "text": "them,", + "start": 62.84, + "end": 62.86, + "confidence": 0.957 + }, + { + "text": "got", + "start": 62.86, + "end": 62.88, + "confidence": 0.929 + }, + { + "text": "them,", + "start": 62.88, + "end": 62.9, + "confidence": 0.959 + }, + { + "text": "got", + "start": 62.9, + "end": 62.92, + "confidence": 0.933 + }, + { + "text": "them,", + "start": 62.92, + "end": 62.94, + "confidence": 0.962 + }, + { + "text": "got", + "start": 62.94, + "end": 62.96, + "confidence": 0.934 + }, + { + "text": "them,", + "start": 62.96, + "end": 62.98, + "confidence": 0.965 + }, + { + "text": "got", + "start": 62.98, + "end": 63.0, + "confidence": 0.935 + }, + { + "text": "them,", + "start": 63.0, + "end": 63.02, + "confidence": 0.965 + }, + { + "text": "got", + "start": 63.02, + "end": 63.04, + "confidence": 0.937 + }, + { + "text": "them,", + "start": 63.04, + "end": 63.06, + "confidence": 0.966 + }, + { + "text": "got", + "start": 63.06, + "end": 63.08, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 63.08, + "end": 63.1, + "confidence": 0.967 + }, + { + "text": "got", + "start": 63.1, + "end": 63.12, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 63.12, + "end": 63.14, + "confidence": 0.969 + }, + { + "text": "got", + "start": 63.14, + "end": 63.16, + "confidence": 0.942 + }, + { + "text": "them,", + "start": 63.16, + "end": 63.18, + "confidence": 0.969 + }, + { + "text": "got", + "start": 63.18, + "end": 63.2, + "confidence": 0.943 + }, + { + "text": "them,", + "start": 63.2, + "end": 63.22, + "confidence": 0.97 + }, + { + "text": "got", + "start": 63.22, + "end": 63.24, + "confidence": 0.944 + }, + { + "text": "them,", + "start": 63.24, + "end": 63.26, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.26, + "end": 63.28, + "confidence": 0.946 + }, + { + "text": "them,", + "start": 63.28, + "end": 63.3, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.3, + "end": 63.32, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 63.32, + "end": 63.34, + "confidence": 0.972 + }, + { + "text": "got", + "start": 63.34, + "end": 63.36, + "confidence": 0.949 + }, + { + "text": "them,", + "start": 63.36, + "end": 63.38, + "confidence": 0.971 + }, + { + "text": "got", + "start": 63.38, + "end": 63.4, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 63.4, + "end": 63.42, + "confidence": 0.973 + }, + { + "text": "got", + "start": 63.42, + "end": 63.44, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 63.44, + "end": 63.46, + "confidence": 0.973 + }, + { + "text": "got", + "start": 63.46, + "end": 63.48, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 63.48, + "end": 63.5, + "confidence": 0.974 + }, + { + "text": "got", + "start": 63.5, + "end": 63.52, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 63.52, + "end": 63.54, + "confidence": 0.974 + }, + { + "text": "got", + "start": 63.54, + "end": 63.56, + "confidence": 0.957 + }, + { + "text": "them,", + "start": 63.56, + "end": 63.58, + "confidence": 0.975 + }, + { + "text": "got", + "start": 63.58, + "end": 63.6, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 63.6, + "end": 63.62, + "confidence": 0.975 + }, + { + "text": "got", + "start": 63.62, + "end": 63.64, + "confidence": 0.962 + }, + { + "text": "them,", + "start": 63.64, + "end": 63.66, + "confidence": 0.975 + }, + { + "text": "got", + "start": 63.66, + "end": 63.68, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 63.68, + "end": 63.7, + "confidence": 0.976 + }, + { + "text": "got", + "start": 63.7, + "end": 63.72, + "confidence": 0.965 + }, + { + "text": "them,", + "start": 63.72, + "end": 63.74, + "confidence": 0.976 + }, + { + "text": "got", + "start": 63.74, + "end": 63.76, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 63.76, + "end": 63.78, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.78, + "end": 63.8, + "confidence": 0.97 + }, + { + "text": "them,", + "start": 63.8, + "end": 63.82, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.82, + "end": 63.84, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 63.84, + "end": 63.86, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.86, + "end": 63.88, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 63.88, + "end": 63.9, + "confidence": 0.977 + }, + { + "text": "got", + "start": 63.9, + "end": 63.92, + "confidence": 0.975 + }, + { + "text": "them,", + "start": 63.92, + "end": 63.94, + "confidence": 0.979 + }, + { + "text": "got", + "start": 63.94, + "end": 63.96, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 63.96, + "end": 63.98, + "confidence": 0.979 + }, + { + "text": "got", + "start": 63.98, + "end": 64.0, + "confidence": 0.977 + }, + { + "text": "them,", + "start": 64.0, + "end": 64.02, + "confidence": 0.979 + }, + { + "text": "got", + "start": 64.02, + "end": 64.04, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 64.04, + "end": 64.06, + "confidence": 0.979 + }, + { + "text": "got", + "start": 64.06, + "end": 64.08, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 64.08, + "end": 64.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 64.1, + "end": 64.12, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 64.12, + "end": 64.4, + "confidence": 0.98 + }, + { + "text": "got", + "start": 64.42, + "end": 64.58, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 64.58, + "end": 64.64, + "confidence": 0.98 + }, + { + "text": "got", + "start": 64.74, + "end": 65.06, + "confidence": 0.982 + }, + { + "text": "them,", + "start": 65.06, + "end": 65.22, + "confidence": 0.98 + }, + { + "text": "got", + "start": 65.22, + "end": 65.98, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 65.98, + "end": 66.26, + "confidence": 0.981 + }, + { + "text": "got", + "start": 66.28, + "end": 67.32, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 67.32, + "end": 67.44, + "confidence": 0.982 + }, + { + "text": "got", + "start": 67.68, + "end": 67.7, + "confidence": 0.984 + }, + { + "text": "them,", + "start": 67.7, + "end": 67.88, + "confidence": 0.981 + }, + { + "text": "got", + "start": 67.92, + "end": 68.8, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 68.8, + "end": 69.12, + "confidence": 0.982 + }, + { + "text": "got", + "start": 69.26, + "end": 69.28, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 69.28, + "end": 69.42, + "confidence": 0.982 + }, + { + "text": "got", + "start": 69.82, + "end": 69.84, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 69.84, + "end": 70.16, + "confidence": 0.982 + }, + { + "text": "got", + "start": 70.16, + "end": 70.52, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 70.52, + "end": 71.38, + "confidence": 0.981 + }, + { + "text": "got", + "start": 71.38, + "end": 71.58, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 71.58, + "end": 71.94, + "confidence": 0.982 + }, + { + "text": "got", + "start": 71.94, + "end": 72.12, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 72.12, + "end": 72.76, + "confidence": 0.983 + }, + { + "text": "got", + "start": 72.9, + "end": 73.56, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 73.56, + "end": 74.08, + "confidence": 0.983 + }, + { + "text": "got", + "start": 74.68, + "end": 75.08, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 75.08, + "end": 75.18, + "confidence": 0.984 + }, + { + "text": "got", + "start": 75.2, + "end": 76.42, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 76.42, + "end": 76.6, + "confidence": 0.983 + }, + { + "text": "got", + "start": 76.6, + "end": 76.8, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 76.8, + "end": 77.06, + "confidence": 0.985 + }, + { + "text": "got", + "start": 77.06, + "end": 77.44, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 77.44, + "end": 77.78, + "confidence": 0.984 + }, + { + "text": "got", + "start": 78.22, + "end": 78.24, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 78.24, + "end": 78.44, + "confidence": 0.985 + }, + { + "text": "got", + "start": 79.86, + "end": 79.9, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 79.9, + "end": 82.46, + "confidence": 0.985 + }, + { + "text": "got", + "start": 82.46, + "end": 84.44, + "confidence": 0.992 + }, + { + "text": "them", + "start": 84.44, + "end": 84.46, + "confidence": 0.994 + } + ] + }, + { + "id": 5, + "seek": 8500, + "start": 85.2, + "end": 91.86, + "text": " got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them, got them", + "tokens": [ + 50364, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552, + 11, + 658, + 552 + ], + "temperature": 0.0, + "avg_logprob": -0.04273154596576776, + "compression_ratio": 29.52, + "no_speech_prob": 0.6358686685562134, + "confidence": 0.854, + "words": [ + { + "text": "got", + "start": 85.2, + "end": 85.36, + "confidence": 0.0 + }, + { + "text": "them,", + "start": 85.36, + "end": 85.42, + "confidence": 0.024 + }, + { + "text": "got", + "start": 85.68, + "end": 86.68, + "confidence": 0.791 + }, + { + "text": "them,", + "start": 86.68, + "end": 86.82, + "confidence": 0.583 + }, + { + "text": "got", + "start": 87.4, + "end": 87.6, + "confidence": 0.828 + }, + { + "text": "them,", + "start": 87.6, + "end": 88.02, + "confidence": 0.486 + }, + { + "text": "got", + "start": 88.02, + "end": 88.88, + "confidence": 0.881 + }, + { + "text": "them,", + "start": 88.88, + "end": 88.9, + "confidence": 0.425 + }, + { + "text": "got", + "start": 88.92, + "end": 88.94, + "confidence": 0.902 + }, + { + "text": "them,", + "start": 88.94, + "end": 88.96, + "confidence": 0.397 + }, + { + "text": "got", + "start": 88.96, + "end": 88.98, + "confidence": 0.897 + }, + { + "text": "them,", + "start": 88.98, + "end": 89.0, + "confidence": 0.4 + }, + { + "text": "got", + "start": 89.0, + "end": 89.02, + "confidence": 0.869 + }, + { + "text": "them,", + "start": 89.02, + "end": 89.06, + "confidence": 0.47 + }, + { + "text": "got", + "start": 89.18, + "end": 89.2, + "confidence": 0.89 + }, + { + "text": "them,", + "start": 89.2, + "end": 89.22, + "confidence": 0.57 + }, + { + "text": "got", + "start": 89.22, + "end": 89.24, + "confidence": 0.91 + }, + { + "text": "them,", + "start": 89.24, + "end": 89.26, + "confidence": 0.632 + }, + { + "text": "got", + "start": 89.26, + "end": 89.28, + "confidence": 0.598 + }, + { + "text": "them,", + "start": 89.28, + "end": 89.3, + "confidence": 0.743 + }, + { + "text": "got", + "start": 89.3, + "end": 89.32, + "confidence": 0.819 + }, + { + "text": "them,", + "start": 89.32, + "end": 89.34, + "confidence": 0.84 + }, + { + "text": "got", + "start": 89.34, + "end": 89.36, + "confidence": 0.899 + }, + { + "text": "them,", + "start": 89.36, + "end": 89.38, + "confidence": 0.888 + }, + { + "text": "got", + "start": 89.38, + "end": 89.4, + "confidence": 0.918 + }, + { + "text": "them,", + "start": 89.4, + "end": 89.42, + "confidence": 0.914 + }, + { + "text": "got", + "start": 89.42, + "end": 89.44, + "confidence": 0.936 + }, + { + "text": "them,", + "start": 89.44, + "end": 89.46, + "confidence": 0.932 + }, + { + "text": "got", + "start": 89.46, + "end": 89.48, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 89.48, + "end": 89.5, + "confidence": 0.944 + }, + { + "text": "got", + "start": 89.5, + "end": 89.52, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 89.52, + "end": 89.54, + "confidence": 0.945 + }, + { + "text": "got", + "start": 89.54, + "end": 89.56, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 89.56, + "end": 89.58, + "confidence": 0.944 + }, + { + "text": "got", + "start": 89.58, + "end": 89.6, + "confidence": 0.947 + }, + { + "text": "them,", + "start": 89.6, + "end": 89.62, + "confidence": 0.946 + }, + { + "text": "got", + "start": 89.62, + "end": 89.64, + "confidence": 0.94 + }, + { + "text": "them,", + "start": 89.64, + "end": 89.66, + "confidence": 0.952 + }, + { + "text": "got", + "start": 89.66, + "end": 89.68, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.68, + "end": 89.7, + "confidence": 0.954 + }, + { + "text": "got", + "start": 89.7, + "end": 89.72, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.72, + "end": 89.74, + "confidence": 0.96 + }, + { + "text": "got", + "start": 89.74, + "end": 89.76, + "confidence": 0.941 + }, + { + "text": "them,", + "start": 89.76, + "end": 89.78, + "confidence": 0.963 + }, + { + "text": "got", + "start": 89.78, + "end": 89.8, + "confidence": 0.945 + }, + { + "text": "them,", + "start": 89.8, + "end": 89.82, + "confidence": 0.966 + }, + { + "text": "got", + "start": 89.82, + "end": 89.84, + "confidence": 0.948 + }, + { + "text": "them,", + "start": 89.84, + "end": 89.86, + "confidence": 0.968 + }, + { + "text": "got", + "start": 89.86, + "end": 89.88, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 89.88, + "end": 89.9, + "confidence": 0.971 + }, + { + "text": "got", + "start": 89.9, + "end": 89.92, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 89.92, + "end": 89.94, + "confidence": 0.975 + }, + { + "text": "got", + "start": 89.94, + "end": 89.96, + "confidence": 0.95 + }, + { + "text": "them,", + "start": 89.96, + "end": 89.98, + "confidence": 0.975 + }, + { + "text": "got", + "start": 89.98, + "end": 90.0, + "confidence": 0.951 + }, + { + "text": "them,", + "start": 90.0, + "end": 90.02, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.02, + "end": 90.04, + "confidence": 0.953 + }, + { + "text": "them,", + "start": 90.04, + "end": 90.06, + "confidence": 0.977 + }, + { + "text": "got", + "start": 90.06, + "end": 90.08, + "confidence": 0.954 + }, + { + "text": "them,", + "start": 90.08, + "end": 90.1, + "confidence": 0.979 + }, + { + "text": "got", + "start": 90.1, + "end": 90.12, + "confidence": 0.955 + }, + { + "text": "them,", + "start": 90.12, + "end": 90.14, + "confidence": 0.979 + }, + { + "text": "got", + "start": 90.14, + "end": 90.16, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 90.16, + "end": 90.18, + "confidence": 0.981 + }, + { + "text": "got", + "start": 90.18, + "end": 90.2, + "confidence": 0.956 + }, + { + "text": "them,", + "start": 90.2, + "end": 90.22, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.22, + "end": 90.24, + "confidence": 0.958 + }, + { + "text": "them,", + "start": 90.24, + "end": 90.26, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.26, + "end": 90.28, + "confidence": 0.959 + }, + { + "text": "them,", + "start": 90.28, + "end": 90.3, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.3, + "end": 90.32, + "confidence": 0.961 + }, + { + "text": "them,", + "start": 90.32, + "end": 90.34, + "confidence": 0.982 + }, + { + "text": "got", + "start": 90.34, + "end": 90.36, + "confidence": 0.963 + }, + { + "text": "them,", + "start": 90.36, + "end": 90.38, + "confidence": 0.984 + }, + { + "text": "got", + "start": 90.38, + "end": 90.4, + "confidence": 0.964 + }, + { + "text": "them,", + "start": 90.4, + "end": 90.42, + "confidence": 0.984 + }, + { + "text": "got", + "start": 90.42, + "end": 90.44, + "confidence": 0.966 + }, + { + "text": "them,", + "start": 90.44, + "end": 90.46, + "confidence": 0.985 + }, + { + "text": "got", + "start": 90.46, + "end": 90.48, + "confidence": 0.968 + }, + { + "text": "them,", + "start": 90.48, + "end": 90.5, + "confidence": 0.985 + }, + { + "text": "got", + "start": 90.5, + "end": 90.52, + "confidence": 0.969 + }, + { + "text": "them,", + "start": 90.52, + "end": 90.54, + "confidence": 0.986 + }, + { + "text": "got", + "start": 90.54, + "end": 90.56, + "confidence": 0.971 + }, + { + "text": "them,", + "start": 90.56, + "end": 90.58, + "confidence": 0.986 + }, + { + "text": "got", + "start": 90.58, + "end": 90.6, + "confidence": 0.973 + }, + { + "text": "them,", + "start": 90.6, + "end": 90.62, + "confidence": 0.987 + }, + { + "text": "got", + "start": 90.62, + "end": 90.64, + "confidence": 0.974 + }, + { + "text": "them,", + "start": 90.64, + "end": 90.66, + "confidence": 0.987 + }, + { + "text": "got", + "start": 90.66, + "end": 90.68, + "confidence": 0.976 + }, + { + "text": "them,", + "start": 90.68, + "end": 90.7, + "confidence": 0.987 + }, + { + "text": "got", + "start": 90.7, + "end": 90.72, + "confidence": 0.979 + }, + { + "text": "them,", + "start": 90.72, + "end": 90.74, + "confidence": 0.988 + }, + { + "text": "got", + "start": 90.74, + "end": 90.76, + "confidence": 0.98 + }, + { + "text": "them,", + "start": 90.76, + "end": 90.78, + "confidence": 0.989 + }, + { + "text": "got", + "start": 90.78, + "end": 90.8, + "confidence": 0.981 + }, + { + "text": "them,", + "start": 90.8, + "end": 90.82, + "confidence": 0.989 + }, + { + "text": "got", + "start": 90.82, + "end": 90.84, + "confidence": 0.983 + }, + { + "text": "them,", + "start": 90.84, + "end": 90.86, + "confidence": 0.989 + }, + { + "text": "got", + "start": 90.86, + "end": 90.88, + "confidence": 0.985 + }, + { + "text": "them,", + "start": 90.88, + "end": 90.9, + "confidence": 0.99 + }, + { + "text": "got", + "start": 90.9, + "end": 90.92, + "confidence": 0.986 + }, + { + "text": "them,", + "start": 90.92, + "end": 90.94, + "confidence": 0.99 + }, + { + "text": "got", + "start": 90.94, + "end": 90.96, + "confidence": 0.987 + }, + { + "text": "them,", + "start": 90.96, + "end": 90.98, + "confidence": 0.991 + }, + { + "text": "got", + "start": 90.98, + "end": 91.0, + "confidence": 0.988 + }, + { + "text": "them,", + "start": 91.0, + "end": 91.02, + "confidence": 0.991 + }, + { + "text": "got", + "start": 91.02, + "end": 91.04, + "confidence": 0.989 + }, + { + "text": "them,", + "start": 91.04, + "end": 91.06, + "confidence": 0.991 + }, + { + "text": "got", + "start": 91.06, + "end": 91.08, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 91.08, + "end": 91.1, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.1, + "end": 91.12, + "confidence": 0.99 + }, + { + "text": "them,", + "start": 91.12, + "end": 91.14, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.14, + "end": 91.16, + "confidence": 0.991 + }, + { + "text": "them,", + "start": 91.16, + "end": 91.18, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.18, + "end": 91.2, + "confidence": 0.992 + }, + { + "text": "them,", + "start": 91.2, + "end": 91.22, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.22, + "end": 91.24, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.24, + "end": 91.26, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.26, + "end": 91.28, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.28, + "end": 91.3, + "confidence": 0.992 + }, + { + "text": "got", + "start": 91.3, + "end": 91.32, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.32, + "end": 91.34, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.34, + "end": 91.36, + "confidence": 0.993 + }, + { + "text": "them,", + "start": 91.36, + "end": 91.38, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.38, + "end": 91.4, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.4, + "end": 91.42, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.42, + "end": 91.44, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.44, + "end": 91.46, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.46, + "end": 91.48, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.48, + "end": 91.5, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.5, + "end": 91.52, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.52, + "end": 91.54, + "confidence": 0.993 + }, + { + "text": "got", + "start": 91.54, + "end": 91.56, + "confidence": 0.994 + }, + { + "text": "them,", + "start": 91.56, + "end": 91.58, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.58, + "end": 91.6, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.6, + "end": 91.62, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.62, + "end": 91.64, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.64, + "end": 91.66, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.66, + "end": 91.68, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.68, + "end": 91.7, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.7, + "end": 91.72, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.72, + "end": 91.74, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.74, + "end": 91.76, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.76, + "end": 91.78, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.78, + "end": 91.8, + "confidence": 0.995 + }, + { + "text": "them,", + "start": 91.8, + "end": 91.82, + "confidence": 0.994 + }, + { + "text": "got", + "start": 91.82, + "end": 91.84, + "confidence": 0.995 + }, + { + "text": "them", + "start": 91.84, + "end": 91.86, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/punctuations_no/bonjour.wav.csv b/tests/expected/punctuations_no/bonjour.wav.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cbf6b2289864210ec6373166aa3e624eba1bde6 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.csv @@ -0,0 +1 @@ +Bonjour !,0.14,0.94 diff --git a/tests/expected/punctuations_no/bonjour.wav.srt b/tests/expected/punctuations_no/bonjour.wav.srt new file mode 100644 index 0000000000000000000000000000000000000000..d8d205dc3daf944fcf8cbd38edc5f45287fb5510 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour ! + diff --git a/tests/expected/punctuations_no/bonjour.wav.tsv b/tests/expected/punctuations_no/bonjour.wav.tsv new file mode 100644 index 0000000000000000000000000000000000000000..4244f49ea0e650559f762b334550dd60e0a2f2a6 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour ! diff --git a/tests/expected/punctuations_no/bonjour.wav.txt b/tests/expected/punctuations_no/bonjour.wav.txt new file mode 100644 index 0000000000000000000000000000000000000000..6625d5f9893711f1c711cc5a3695219d1f4d9cc4 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.txt @@ -0,0 +1 @@ +Bonjour ! diff --git a/tests/expected/punctuations_no/bonjour.wav.vtt b/tests/expected/punctuations_no/bonjour.wav.vtt new file mode 100644 index 0000000000000000000000000000000000000000..7e287580a17d47bd8d18089e9179177468d702ab --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour ! + diff --git a/tests/expected/punctuations_no/bonjour.wav.words.csv b/tests/expected/punctuations_no/bonjour.wav.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..2ebbe298c64c70895fa7d45404203bc3a58d99d0 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.csv @@ -0,0 +1 @@ +Bonjour,0.14,0.94 diff --git a/tests/expected/punctuations_no/bonjour.wav.words.json b/tests/expected/punctuations_no/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..ca00b5248e7c46eff6131b8b2010f2a7573691da --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7049755573272705, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08610370755195618, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_no/bonjour.wav.words.srt b/tests/expected/punctuations_no/bonjour.wav.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..cd14abb1f5b082f428f7e68574088414fa661db8 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour + diff --git a/tests/expected/punctuations_no/bonjour.wav.words.tsv b/tests/expected/punctuations_no/bonjour.wav.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..2ddd00966ddffa056b18e3903294c7ef5795ffe1 --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour diff --git a/tests/expected/punctuations_no/bonjour.wav.words.vtt b/tests/expected/punctuations_no/bonjour.wav.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..62824b031fa1a81beaf98f87974f3efca7c54b9b --- /dev/null +++ b/tests/expected/punctuations_no/bonjour.wav.words.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour + diff --git a/tests/expected/punctuations_no/punctuations.mp3.csv b/tests/expected/punctuations_no/punctuations.mp3.csv new file mode 100644 index 0000000000000000000000000000000000000000..99139dd1ffcb8551acde7072c34682a561976ead --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.csv @@ -0,0 +1 @@ +"Dis-moi, est-ce que l'avion vole ?",0.38,2.76 diff --git a/tests/expected/punctuations_no/punctuations.mp3.srt b/tests/expected/punctuations_no/punctuations.mp3.srt new file mode 100644 index 0000000000000000000000000000000000000000..877851ff9664283f0877d16b24551fa2d7a88226 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.srt @@ -0,0 +1,4 @@ +1 +00:00:00,380 --> 00:00:02,760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_no/punctuations.mp3.tsv b/tests/expected/punctuations_no/punctuations.mp3.tsv new file mode 100644 index 0000000000000000000000000000000000000000..58cf737627008302c822b962a66b0b7e46a3a1de --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.tsv @@ -0,0 +1,2 @@ +start end text +380 2760 Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_no/punctuations.mp3.txt b/tests/expected/punctuations_no/punctuations.mp3.txt new file mode 100644 index 0000000000000000000000000000000000000000..6490b685ff0adf8d95873de48095732fd91c30fd --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.txt @@ -0,0 +1 @@ +Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_no/punctuations.mp3.vtt b/tests/expected/punctuations_no/punctuations.mp3.vtt new file mode 100644 index 0000000000000000000000000000000000000000..74e8cfa02cc1b28b5ee2ff3c71bca6ce0e2c766a --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.380 --> 00:02.760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.csv b/tests/expected/punctuations_no/punctuations.mp3.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..69362f890ad8d996e811cebb54bfabcdd64daf63 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.csv @@ -0,0 +1,5 @@ +Dis-moi,0.38,1.1 +est-ce,1.28,1.5 +que,1.5,1.66 +l'avion,1.66,2.04 +vole,2.04,2.76 diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.json b/tests/expected/punctuations_no/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7237d01be62674e64d50e94665ae719544df0194 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26328243928797107, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03882359713315964, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi", + "start": 0.38, + "end": 1.1, + "confidence": 0.809 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.srt b/tests/expected/punctuations_no/punctuations.mp3.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..9a0041e682bad02b37134d8a0d764cd07b64f608 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.srt @@ -0,0 +1,20 @@ +1 +00:00:00,380 --> 00:00:01,100 +Dis-moi + +2 +00:00:01,280 --> 00:00:01,500 +est-ce + +3 +00:00:01,500 --> 00:00:01,660 +que + +4 +00:00:01,660 --> 00:00:02,040 +l'avion + +5 +00:00:02,040 --> 00:00:02,760 +vole + diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.tsv b/tests/expected/punctuations_no/punctuations.mp3.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..a9613c4fe6697b2aa5ea56eecd09fb9c18d32210 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.tsv @@ -0,0 +1,6 @@ +start end text +380 1100 Dis-moi +1280 1500 est-ce +1500 1660 que +1660 2040 l'avion +2040 2760 vole diff --git a/tests/expected/punctuations_no/punctuations.mp3.words.vtt b/tests/expected/punctuations_no/punctuations.mp3.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..abff0045258eeac08af804f7212f30c94c62c784 --- /dev/null +++ b/tests/expected/punctuations_no/punctuations.mp3.words.vtt @@ -0,0 +1,17 @@ +WEBVTT + +00:00.380 --> 00:01.100 +Dis-moi + +00:01.280 --> 00:01.500 +est-ce + +00:01.500 --> 00:01.660 +que + +00:01.660 --> 00:02.040 +l'avion + +00:02.040 --> 00:02.760 +vole + diff --git a/tests/expected/punctuations_yes/bonjour.wav.csv b/tests/expected/punctuations_yes/bonjour.wav.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cbf6b2289864210ec6373166aa3e624eba1bde6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.csv @@ -0,0 +1 @@ +Bonjour !,0.14,0.94 diff --git a/tests/expected/punctuations_yes/bonjour.wav.srt b/tests/expected/punctuations_yes/bonjour.wav.srt new file mode 100644 index 0000000000000000000000000000000000000000..d8d205dc3daf944fcf8cbd38edc5f45287fb5510 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/bonjour.wav.tsv b/tests/expected/punctuations_yes/bonjour.wav.tsv new file mode 100644 index 0000000000000000000000000000000000000000..4244f49ea0e650559f762b334550dd60e0a2f2a6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour ! diff --git a/tests/expected/punctuations_yes/bonjour.wav.txt b/tests/expected/punctuations_yes/bonjour.wav.txt new file mode 100644 index 0000000000000000000000000000000000000000..6625d5f9893711f1c711cc5a3695219d1f4d9cc4 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.txt @@ -0,0 +1 @@ +Bonjour ! diff --git a/tests/expected/punctuations_yes/bonjour.wav.vtt b/tests/expected/punctuations_yes/bonjour.wav.vtt new file mode 100644 index 0000000000000000000000000000000000000000..7e287580a17d47bd8d18089e9179177468d702ab --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.csv b/tests/expected/punctuations_yes/bonjour.wav.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..5cbf6b2289864210ec6373166aa3e624eba1bde6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.csv @@ -0,0 +1 @@ +Bonjour !,0.14,0.94 diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.json b/tests/expected/punctuations_yes/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..e0e9ea07a37c2520ea8ae3e722c2da4b0422e019 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.94, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50402 + ], + "temperature": 0.0, + "avg_logprob": -0.7049755573272705, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.08610370755195618, + "confidence": 0.964, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.94, + "confidence": 0.964 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.srt b/tests/expected/punctuations_yes/bonjour.wav.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..d8d205dc3daf944fcf8cbd38edc5f45287fb5510 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.srt @@ -0,0 +1,4 @@ +1 +00:00:00,140 --> 00:00:00,940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.tsv b/tests/expected/punctuations_yes/bonjour.wav.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..4244f49ea0e650559f762b334550dd60e0a2f2a6 --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.tsv @@ -0,0 +1,2 @@ +start end text +140 940 Bonjour ! diff --git a/tests/expected/punctuations_yes/bonjour.wav.words.vtt b/tests/expected/punctuations_yes/bonjour.wav.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..7e287580a17d47bd8d18089e9179177468d702ab --- /dev/null +++ b/tests/expected/punctuations_yes/bonjour.wav.words.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.140 --> 00:00.940 +Bonjour ! + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.csv b/tests/expected/punctuations_yes/punctuations.mp3.csv new file mode 100644 index 0000000000000000000000000000000000000000..99139dd1ffcb8551acde7072c34682a561976ead --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.csv @@ -0,0 +1 @@ +"Dis-moi, est-ce que l'avion vole ?",0.38,2.76 diff --git a/tests/expected/punctuations_yes/punctuations.mp3.srt b/tests/expected/punctuations_yes/punctuations.mp3.srt new file mode 100644 index 0000000000000000000000000000000000000000..877851ff9664283f0877d16b24551fa2d7a88226 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.srt @@ -0,0 +1,4 @@ +1 +00:00:00,380 --> 00:00:02,760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.tsv b/tests/expected/punctuations_yes/punctuations.mp3.tsv new file mode 100644 index 0000000000000000000000000000000000000000..58cf737627008302c822b962a66b0b7e46a3a1de --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.tsv @@ -0,0 +1,2 @@ +start end text +380 2760 Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_yes/punctuations.mp3.txt b/tests/expected/punctuations_yes/punctuations.mp3.txt new file mode 100644 index 0000000000000000000000000000000000000000..6490b685ff0adf8d95873de48095732fd91c30fd --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.txt @@ -0,0 +1 @@ +Dis-moi, est-ce que l'avion vole ? diff --git a/tests/expected/punctuations_yes/punctuations.mp3.vtt b/tests/expected/punctuations_yes/punctuations.mp3.vtt new file mode 100644 index 0000000000000000000000000000000000000000..74e8cfa02cc1b28b5ee2ff3c71bca6ce0e2c766a --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.380 --> 00:02.760 +Dis-moi, est-ce que l'avion vole ? + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.csv b/tests/expected/punctuations_yes/punctuations.mp3.words.csv new file mode 100644 index 0000000000000000000000000000000000000000..c6a4613439d9ffda67122981bdfc9d5896416c73 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.csv @@ -0,0 +1,5 @@ +"Dis-moi,",0.38,1.1 +est-ce,1.28,1.5 +que,1.5,1.66 +l'avion,1.66,2.04 +vole ?,2.04,2.76 diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.json b/tests/expected/punctuations_yes/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..440342c5261720c292eb5ec0f1827124decf0caf --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.json @@ -0,0 +1,68 @@ +{ + "text": " Dis-moi, est-ce que l'avion vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.38, + "end": 2.76, + "text": " Dis-moi, est-ce que l'avion vole ?", + "tokens": [ + 50364, + 4208, + 12, + 29292, + 11, + 871, + 12, + 384, + 631, + 287, + 6, + 706, + 313, + 49877, + 2506, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.26328243928797107, + "compression_ratio": 0.8095238095238095, + "no_speech_prob": 0.03882359713315964, + "confidence": 0.928, + "words": [ + { + "text": "Dis-moi,", + "start": 0.38, + "end": 1.1, + "confidence": 0.809 + }, + { + "text": "est-ce", + "start": 1.28, + "end": 1.5, + "confidence": 0.968 + }, + { + "text": "que", + "start": 1.5, + "end": 1.66, + "confidence": 0.978 + }, + { + "text": "l'avion", + "start": 1.66, + "end": 2.04, + "confidence": 0.993 + }, + { + "text": "vole ?", + "start": 2.04, + "end": 2.76, + "confidence": 0.898 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.srt b/tests/expected/punctuations_yes/punctuations.mp3.words.srt new file mode 100644 index 0000000000000000000000000000000000000000..91c0fbf35dff3f5043392a4b6035510d073372d4 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.srt @@ -0,0 +1,20 @@ +1 +00:00:00,380 --> 00:00:01,100 +Dis-moi, + +2 +00:00:01,280 --> 00:00:01,500 +est-ce + +3 +00:00:01,500 --> 00:00:01,660 +que + +4 +00:00:01,660 --> 00:00:02,040 +l'avion + +5 +00:00:02,040 --> 00:00:02,760 +vole ? + diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.tsv b/tests/expected/punctuations_yes/punctuations.mp3.words.tsv new file mode 100644 index 0000000000000000000000000000000000000000..f267961f36fcab7036a58e7e979625d89df087e4 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.tsv @@ -0,0 +1,6 @@ +start end text +380 1100 Dis-moi, +1280 1500 est-ce +1500 1660 que +1660 2040 l'avion +2040 2760 vole ? diff --git a/tests/expected/punctuations_yes/punctuations.mp3.words.vtt b/tests/expected/punctuations_yes/punctuations.mp3.words.vtt new file mode 100644 index 0000000000000000000000000000000000000000..50f0eea419d6d089ff2203180ee9a556da330597 --- /dev/null +++ b/tests/expected/punctuations_yes/punctuations.mp3.words.vtt @@ -0,0 +1,17 @@ +WEBVTT + +00:00.380 --> 00:01.100 +Dis-moi, + +00:01.280 --> 00:01.500 +est-ce + +00:01.500 --> 00:01.660 +que + +00:01.660 --> 00:02.040 +l'avion + +00:02.040 --> 00:02.760 +vole ? + diff --git a/tests/expected/small.en.cpu/arabic.mp3.words.json b/tests/expected/small.en.cpu/arabic.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..acb72a1674c019af75523e14dc77422914030a68 --- /dev/null +++ b/tests/expected/small.en.cpu/arabic.mp3.words.json @@ -0,0 +1,3346 @@ +{ + "text": " I am the one who is the one who is the one who is the one who is the one who is the one I am the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.0, + "end": 7.72, + "text": " I am the one who is the one who is the one who is the one who is the one who is the one", + "tokens": [ + 314, + 716, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530 + ], + "temperature": 0.0, + "avg_logprob": -0.23478534274631077, + "compression_ratio": 24.294117647058822, + "no_speech_prob": 0.6507940292358398, + "confidence": 0.358, + "words": [ + { + "text": "I", + "start": 1.0, + "end": 3.16, + "confidence": 0.053 + }, + { + "text": "am", + "start": 3.16, + "end": 4.04, + "confidence": 0.152 + }, + { + "text": "the", + "start": 4.04, + "end": 4.08, + "confidence": 0.143 + }, + { + "text": "one", + "start": 4.08, + "end": 6.1, + "confidence": 0.085 + }, + { + "text": "who", + "start": 6.1, + "end": 6.28, + "confidence": 0.718 + }, + { + "text": "is", + "start": 6.28, + "end": 6.32, + "confidence": 0.12 + }, + { + "text": "the", + "start": 6.32, + "end": 6.36, + "confidence": 0.134 + }, + { + "text": "one", + "start": 6.36, + "end": 6.4, + "confidence": 0.204 + }, + { + "text": "who", + "start": 6.4, + "end": 6.44, + "confidence": 0.618 + }, + { + "text": "is", + "start": 6.44, + "end": 6.96, + "confidence": 0.319 + }, + { + "text": "the", + "start": 6.96, + "end": 7.0, + "confidence": 0.382 + }, + { + "text": "one", + "start": 7.0, + "end": 7.24, + "confidence": 0.443 + }, + { + "text": "who", + "start": 7.24, + "end": 7.28, + "confidence": 0.448 + }, + { + "text": "is", + "start": 7.28, + "end": 7.32, + "confidence": 0.518 + }, + { + "text": "the", + "start": 7.32, + "end": 7.36, + "confidence": 0.543 + }, + { + "text": "one", + "start": 7.36, + "end": 7.4, + "confidence": 0.723 + }, + { + "text": "who", + "start": 7.4, + "end": 7.44, + "confidence": 0.433 + }, + { + "text": "is", + "start": 7.44, + "end": 7.48, + "confidence": 0.678 + }, + { + "text": "the", + "start": 7.48, + "end": 7.52, + "confidence": 0.636 + }, + { + "text": "one", + "start": 7.52, + "end": 7.56, + "confidence": 0.817 + }, + { + "text": "who", + "start": 7.56, + "end": 7.6, + "confidence": 0.505 + }, + { + "text": "is", + "start": 7.6, + "end": 7.64, + "confidence": 0.74 + }, + { + "text": "the", + "start": 7.64, + "end": 7.68, + "confidence": 0.697 + }, + { + "text": "one", + "start": 7.68, + "end": 7.72, + "confidence": 0.698 + } + ] + }, + { + "id": 1, + "seek": 700, + "start": 7.72, + "end": 37.02, + "text": " I am the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the", + "tokens": [ + 50363, + 314, + 716, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262 + ], + "temperature": 0.0, + "avg_logprob": -0.06210707770453559, + "compression_ratio": 24.5, + "no_speech_prob": 3.115955405519344e-05, + "confidence": 0.939, + "words": [ + { + "text": "I", + "start": 7.72, + "end": 9.42, + "confidence": 0.22 + }, + { + "text": "am", + "start": 9.42, + "end": 12.1, + "confidence": 0.81 + }, + { + "text": "the", + "start": 12.1, + "end": 12.48, + "confidence": 0.872 + }, + { + "text": "one", + "start": 12.48, + "end": 12.58, + "confidence": 0.927 + }, + { + "text": "who", + "start": 12.58, + "end": 13.4, + "confidence": 0.966 + }, + { + "text": "is", + "start": 13.4, + "end": 13.96, + "confidence": 0.934 + }, + { + "text": "the", + "start": 13.96, + "end": 14.56, + "confidence": 0.968 + }, + { + "text": "one", + "start": 14.56, + "end": 14.6, + "confidence": 0.989 + }, + { + "text": "who", + "start": 14.6, + "end": 14.64, + "confidence": 0.951 + }, + { + "text": "is", + "start": 14.64, + "end": 14.68, + "confidence": 0.965 + }, + { + "text": "the", + "start": 14.68, + "end": 15.04, + "confidence": 0.977 + }, + { + "text": "one", + "start": 15.04, + "end": 17.78, + "confidence": 0.993 + }, + { + "text": "who", + "start": 17.78, + "end": 17.82, + "confidence": 0.843 + }, + { + "text": "is", + "start": 17.82, + "end": 18.58, + "confidence": 0.977 + }, + { + "text": "the", + "start": 18.58, + "end": 18.62, + "confidence": 0.979 + }, + { + "text": "one", + "start": 18.62, + "end": 18.66, + "confidence": 0.991 + }, + { + "text": "who", + "start": 18.66, + "end": 19.46, + "confidence": 0.694 + }, + { + "text": "is", + "start": 19.46, + "end": 20.9, + "confidence": 0.979 + }, + { + "text": "the", + "start": 20.9, + "end": 20.94, + "confidence": 0.978 + }, + { + "text": "one", + "start": 20.94, + "end": 20.98, + "confidence": 0.99 + }, + { + "text": "who", + "start": 20.98, + "end": 21.02, + "confidence": 0.657 + }, + { + "text": "is", + "start": 21.02, + "end": 21.06, + "confidence": 0.977 + }, + { + "text": "the", + "start": 21.06, + "end": 21.52, + "confidence": 0.974 + }, + { + "text": "one", + "start": 21.52, + "end": 21.56, + "confidence": 0.983 + }, + { + "text": "who", + "start": 21.56, + "end": 21.6, + "confidence": 0.609 + }, + { + "text": "is", + "start": 21.6, + "end": 22.04, + "confidence": 0.978 + }, + { + "text": "the", + "start": 22.04, + "end": 22.5, + "confidence": 0.966 + }, + { + "text": "one", + "start": 22.5, + "end": 23.6, + "confidence": 0.986 + }, + { + "text": "who", + "start": 23.6, + "end": 24.76, + "confidence": 0.624 + }, + { + "text": "is", + "start": 24.76, + "end": 26.18, + "confidence": 0.978 + }, + { + "text": "the", + "start": 26.18, + "end": 26.8, + "confidence": 0.962 + }, + { + "text": "one", + "start": 26.8, + "end": 26.84, + "confidence": 0.984 + }, + { + "text": "who", + "start": 26.84, + "end": 26.88, + "confidence": 0.679 + }, + { + "text": "is", + "start": 26.88, + "end": 27.4, + "confidence": 0.978 + }, + { + "text": "the", + "start": 27.4, + "end": 28.14, + "confidence": 0.958 + }, + { + "text": "one", + "start": 28.14, + "end": 28.28, + "confidence": 0.98 + }, + { + "text": "who", + "start": 28.28, + "end": 28.32, + "confidence": 0.671 + }, + { + "text": "is", + "start": 28.32, + "end": 28.36, + "confidence": 0.979 + }, + { + "text": "the", + "start": 28.36, + "end": 28.4, + "confidence": 0.955 + }, + { + "text": "one", + "start": 28.4, + "end": 28.8, + "confidence": 0.975 + }, + { + "text": "who", + "start": 28.8, + "end": 29.14, + "confidence": 0.683 + }, + { + "text": "is", + "start": 29.14, + "end": 29.18, + "confidence": 0.978 + }, + { + "text": "the", + "start": 29.18, + "end": 29.22, + "confidence": 0.956 + }, + { + "text": "one", + "start": 29.22, + "end": 29.56, + "confidence": 0.972 + }, + { + "text": "who", + "start": 29.56, + "end": 29.74, + "confidence": 0.695 + }, + { + "text": "is", + "start": 29.74, + "end": 29.78, + "confidence": 0.978 + }, + { + "text": "the", + "start": 29.78, + "end": 29.82, + "confidence": 0.957 + }, + { + "text": "one", + "start": 29.82, + "end": 29.86, + "confidence": 0.971 + }, + { + "text": "who", + "start": 29.86, + "end": 29.9, + "confidence": 0.713 + }, + { + "text": "is", + "start": 29.9, + "end": 29.94, + "confidence": 0.98 + }, + { + "text": "the", + "start": 29.94, + "end": 29.98, + "confidence": 0.958 + }, + { + "text": "one", + "start": 29.98, + "end": 30.02, + "confidence": 0.971 + }, + { + "text": "who", + "start": 30.02, + "end": 30.06, + "confidence": 0.737 + }, + { + "text": "is", + "start": 30.06, + "end": 30.1, + "confidence": 0.982 + }, + { + "text": "the", + "start": 30.1, + "end": 30.14, + "confidence": 0.96 + }, + { + "text": "one", + "start": 30.14, + "end": 30.18, + "confidence": 0.971 + }, + { + "text": "who", + "start": 30.18, + "end": 30.22, + "confidence": 0.754 + }, + { + "text": "is", + "start": 30.22, + "end": 30.26, + "confidence": 0.984 + }, + { + "text": "the", + "start": 30.26, + "end": 30.3, + "confidence": 0.962 + }, + { + "text": "one", + "start": 30.3, + "end": 30.34, + "confidence": 0.973 + }, + { + "text": "who", + "start": 30.34, + "end": 30.38, + "confidence": 0.759 + }, + { + "text": "is", + "start": 30.38, + "end": 30.42, + "confidence": 0.986 + }, + { + "text": "the", + "start": 30.42, + "end": 30.46, + "confidence": 0.964 + }, + { + "text": "one", + "start": 30.46, + "end": 30.5, + "confidence": 0.974 + }, + { + "text": "who", + "start": 30.5, + "end": 30.54, + "confidence": 0.765 + }, + { + "text": "is", + "start": 30.54, + "end": 30.58, + "confidence": 0.987 + }, + { + "text": "the", + "start": 30.58, + "end": 30.62, + "confidence": 0.966 + }, + { + "text": "one", + "start": 30.62, + "end": 30.66, + "confidence": 0.975 + }, + { + "text": "who", + "start": 30.66, + "end": 30.7, + "confidence": 0.774 + }, + { + "text": "is", + "start": 30.7, + "end": 30.74, + "confidence": 0.988 + }, + { + "text": "the", + "start": 30.74, + "end": 30.78, + "confidence": 0.969 + }, + { + "text": "one", + "start": 30.78, + "end": 30.82, + "confidence": 0.975 + }, + { + "text": "who", + "start": 30.82, + "end": 30.86, + "confidence": 0.801 + }, + { + "text": "is", + "start": 30.86, + "end": 30.9, + "confidence": 0.989 + }, + { + "text": "the", + "start": 30.9, + "end": 30.94, + "confidence": 0.97 + }, + { + "text": "one", + "start": 30.94, + "end": 30.98, + "confidence": 0.976 + }, + { + "text": "who", + "start": 30.98, + "end": 31.02, + "confidence": 0.818 + }, + { + "text": "is", + "start": 31.02, + "end": 31.06, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.06, + "end": 31.1, + "confidence": 0.972 + }, + { + "text": "one", + "start": 31.1, + "end": 31.14, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.14, + "end": 31.18, + "confidence": 0.838 + }, + { + "text": "is", + "start": 31.18, + "end": 31.22, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.22, + "end": 31.26, + "confidence": 0.973 + }, + { + "text": "one", + "start": 31.26, + "end": 31.3, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.3, + "end": 31.34, + "confidence": 0.862 + }, + { + "text": "is", + "start": 31.34, + "end": 31.38, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.38, + "end": 31.42, + "confidence": 0.974 + }, + { + "text": "one", + "start": 31.42, + "end": 31.46, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.46, + "end": 31.5, + "confidence": 0.882 + }, + { + "text": "is", + "start": 31.5, + "end": 31.54, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.54, + "end": 31.58, + "confidence": 0.974 + }, + { + "text": "one", + "start": 31.58, + "end": 31.62, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.62, + "end": 31.66, + "confidence": 0.895 + }, + { + "text": "is", + "start": 31.66, + "end": 31.7, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.7, + "end": 31.74, + "confidence": 0.975 + }, + { + "text": "one", + "start": 31.74, + "end": 31.78, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.78, + "end": 31.82, + "confidence": 0.905 + }, + { + "text": "is", + "start": 31.82, + "end": 31.86, + "confidence": 0.989 + }, + { + "text": "the", + "start": 31.86, + "end": 31.9, + "confidence": 0.975 + }, + { + "text": "one", + "start": 31.9, + "end": 31.94, + "confidence": 0.977 + }, + { + "text": "who", + "start": 31.94, + "end": 31.98, + "confidence": 0.92 + }, + { + "text": "is", + "start": 31.98, + "end": 32.02, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.02, + "end": 32.06, + "confidence": 0.976 + }, + { + "text": "one", + "start": 32.06, + "end": 32.1, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.1, + "end": 32.14, + "confidence": 0.924 + }, + { + "text": "is", + "start": 32.14, + "end": 32.18, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.18, + "end": 32.22, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.22, + "end": 32.26, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.26, + "end": 32.3, + "confidence": 0.932 + }, + { + "text": "is", + "start": 32.3, + "end": 32.34, + "confidence": 0.989 + }, + { + "text": "the", + "start": 32.34, + "end": 32.38, + "confidence": 0.976 + }, + { + "text": "one", + "start": 32.38, + "end": 32.42, + "confidence": 0.977 + }, + { + "text": "who", + "start": 32.42, + "end": 32.46, + "confidence": 0.936 + }, + { + "text": "is", + "start": 32.46, + "end": 32.5, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.5, + "end": 32.54, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.54, + "end": 32.58, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.58, + "end": 32.62, + "confidence": 0.94 + }, + { + "text": "is", + "start": 32.62, + "end": 32.66, + "confidence": 0.99 + }, + { + "text": "the", + "start": 32.66, + "end": 32.7, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.7, + "end": 32.74, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.74, + "end": 32.78, + "confidence": 0.942 + }, + { + "text": "is", + "start": 32.78, + "end": 32.82, + "confidence": 0.989 + }, + { + "text": "the", + "start": 32.82, + "end": 32.86, + "confidence": 0.977 + }, + { + "text": "one", + "start": 32.86, + "end": 32.9, + "confidence": 0.978 + }, + { + "text": "who", + "start": 32.9, + "end": 32.94, + "confidence": 0.941 + }, + { + "text": "is", + "start": 32.94, + "end": 32.98, + "confidence": 0.989 + }, + { + "text": "the", + "start": 32.98, + "end": 33.02, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.02, + "end": 33.06, + "confidence": 0.978 + }, + { + "text": "who", + "start": 33.06, + "end": 33.1, + "confidence": 0.942 + }, + { + "text": "is", + "start": 33.1, + "end": 33.14, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.14, + "end": 33.18, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.18, + "end": 33.22, + "confidence": 0.978 + }, + { + "text": "who", + "start": 33.22, + "end": 33.26, + "confidence": 0.942 + }, + { + "text": "is", + "start": 33.26, + "end": 33.3, + "confidence": 0.99 + }, + { + "text": "the", + "start": 33.3, + "end": 33.34, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.34, + "end": 33.38, + "confidence": 0.978 + }, + { + "text": "who", + "start": 33.38, + "end": 33.42, + "confidence": 0.941 + }, + { + "text": "is", + "start": 33.42, + "end": 33.46, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.46, + "end": 33.5, + "confidence": 0.979 + }, + { + "text": "one", + "start": 33.5, + "end": 33.54, + "confidence": 0.977 + }, + { + "text": "who", + "start": 33.54, + "end": 33.58, + "confidence": 0.941 + }, + { + "text": "is", + "start": 33.58, + "end": 33.62, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.62, + "end": 33.66, + "confidence": 0.978 + }, + { + "text": "one", + "start": 33.66, + "end": 33.7, + "confidence": 0.977 + }, + { + "text": "who", + "start": 33.7, + "end": 33.74, + "confidence": 0.939 + }, + { + "text": "is", + "start": 33.74, + "end": 33.78, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.78, + "end": 33.82, + "confidence": 0.979 + }, + { + "text": "one", + "start": 33.82, + "end": 33.86, + "confidence": 0.977 + }, + { + "text": "who", + "start": 33.86, + "end": 33.9, + "confidence": 0.939 + }, + { + "text": "is", + "start": 33.9, + "end": 33.94, + "confidence": 0.989 + }, + { + "text": "the", + "start": 33.94, + "end": 33.98, + "confidence": 0.979 + }, + { + "text": "one", + "start": 33.98, + "end": 34.02, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.02, + "end": 34.06, + "confidence": 0.938 + }, + { + "text": "is", + "start": 34.06, + "end": 34.1, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.1, + "end": 34.14, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.14, + "end": 34.18, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.18, + "end": 34.22, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.22, + "end": 34.26, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.26, + "end": 34.3, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.3, + "end": 34.34, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.34, + "end": 34.38, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.38, + "end": 34.42, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.42, + "end": 34.46, + "confidence": 0.979 + }, + { + "text": "one", + "start": 34.46, + "end": 34.5, + "confidence": 0.976 + }, + { + "text": "who", + "start": 34.5, + "end": 34.54, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.54, + "end": 34.58, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.58, + "end": 34.62, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.62, + "end": 34.66, + "confidence": 0.977 + }, + { + "text": "who", + "start": 34.66, + "end": 34.7, + "confidence": 0.936 + }, + { + "text": "is", + "start": 34.7, + "end": 34.74, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.74, + "end": 34.78, + "confidence": 0.978 + }, + { + "text": "one", + "start": 34.78, + "end": 34.82, + "confidence": 0.976 + }, + { + "text": "who", + "start": 34.82, + "end": 34.86, + "confidence": 0.935 + }, + { + "text": "is", + "start": 34.86, + "end": 34.9, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.9, + "end": 34.94, + "confidence": 0.979 + }, + { + "text": "one", + "start": 34.94, + "end": 34.98, + "confidence": 0.976 + }, + { + "text": "who", + "start": 34.98, + "end": 35.02, + "confidence": 0.936 + }, + { + "text": "is", + "start": 35.02, + "end": 35.06, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.06, + "end": 35.1, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.1, + "end": 35.14, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.14, + "end": 35.18, + "confidence": 0.936 + }, + { + "text": "is", + "start": 35.18, + "end": 35.22, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.22, + "end": 35.26, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.26, + "end": 35.3, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.3, + "end": 35.34, + "confidence": 0.935 + }, + { + "text": "is", + "start": 35.34, + "end": 35.38, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.38, + "end": 35.42, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.42, + "end": 35.46, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.46, + "end": 35.5, + "confidence": 0.934 + }, + { + "text": "is", + "start": 35.5, + "end": 35.54, + "confidence": 0.986 + }, + { + "text": "the", + "start": 35.54, + "end": 35.58, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.58, + "end": 35.62, + "confidence": 0.976 + }, + { + "text": "who", + "start": 35.62, + "end": 35.66, + "confidence": 0.933 + }, + { + "text": "is", + "start": 35.66, + "end": 35.7, + "confidence": 0.985 + }, + { + "text": "the", + "start": 35.7, + "end": 35.74, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.74, + "end": 35.78, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.78, + "end": 35.82, + "confidence": 0.939 + }, + { + "text": "is", + "start": 35.82, + "end": 35.86, + "confidence": 0.986 + }, + { + "text": "the", + "start": 35.86, + "end": 35.9, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.9, + "end": 35.94, + "confidence": 0.975 + }, + { + "text": "who", + "start": 35.94, + "end": 35.98, + "confidence": 0.932 + }, + { + "text": "is", + "start": 35.98, + "end": 36.02, + "confidence": 0.984 + }, + { + "text": "the", + "start": 36.02, + "end": 36.06, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.06, + "end": 36.1, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.1, + "end": 36.14, + "confidence": 0.931 + }, + { + "text": "is", + "start": 36.14, + "end": 36.18, + "confidence": 0.984 + }, + { + "text": "the", + "start": 36.18, + "end": 36.22, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.22, + "end": 36.26, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.26, + "end": 36.3, + "confidence": 0.929 + }, + { + "text": "is", + "start": 36.3, + "end": 36.34, + "confidence": 0.983 + }, + { + "text": "the", + "start": 36.34, + "end": 36.38, + "confidence": 0.975 + }, + { + "text": "one", + "start": 36.38, + "end": 36.42, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.42, + "end": 36.46, + "confidence": 0.929 + }, + { + "text": "is", + "start": 36.46, + "end": 36.5, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.5, + "end": 36.54, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.54, + "end": 36.58, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.58, + "end": 36.62, + "confidence": 0.928 + }, + { + "text": "is", + "start": 36.62, + "end": 36.66, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.66, + "end": 36.7, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.7, + "end": 36.74, + "confidence": 0.975 + }, + { + "text": "who", + "start": 36.74, + "end": 36.92, + "confidence": 0.928 + }, + { + "text": "is", + "start": 36.92, + "end": 36.98, + "confidence": 0.981 + }, + { + "text": "the", + "start": 36.98, + "end": 37.02, + "confidence": 0.973 + } + ] + }, + { + "id": 2, + "seek": 3700, + "start": 37.02, + "end": 67.0, + "text": " one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "tokens": [ + 50363, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318 + ], + "temperature": 0.0, + "avg_logprob": -0.017284017139010958, + "compression_ratio": 26.93548387096774, + "no_speech_prob": 0.09498446434736252, + "confidence": 0.983, + "words": [ + { + "text": "one", + "start": 37.02, + "end": 37.4, + "confidence": 0.964 + }, + { + "text": "who", + "start": 37.4, + "end": 39.86, + "confidence": 0.961 + }, + { + "text": "is", + "start": 39.86, + "end": 39.9, + "confidence": 0.977 + }, + { + "text": "the", + "start": 39.9, + "end": 39.94, + "confidence": 0.975 + }, + { + "text": "one", + "start": 39.94, + "end": 39.98, + "confidence": 0.964 + }, + { + "text": "who", + "start": 39.98, + "end": 40.02, + "confidence": 0.982 + }, + { + "text": "is", + "start": 40.02, + "end": 40.06, + "confidence": 0.989 + }, + { + "text": "the", + "start": 40.06, + "end": 40.1, + "confidence": 0.978 + }, + { + "text": "one", + "start": 40.1, + "end": 40.14, + "confidence": 0.98 + }, + { + "text": "who", + "start": 40.14, + "end": 40.18, + "confidence": 0.989 + }, + { + "text": "is", + "start": 40.18, + "end": 40.22, + "confidence": 0.992 + }, + { + "text": "the", + "start": 40.22, + "end": 40.26, + "confidence": 0.97 + }, + { + "text": "one", + "start": 40.26, + "end": 40.3, + "confidence": 0.982 + }, + { + "text": "who", + "start": 40.3, + "end": 40.34, + "confidence": 0.989 + }, + { + "text": "is", + "start": 40.34, + "end": 40.38, + "confidence": 0.993 + }, + { + "text": "the", + "start": 40.38, + "end": 40.42, + "confidence": 0.971 + }, + { + "text": "one", + "start": 40.42, + "end": 40.46, + "confidence": 0.984 + }, + { + "text": "who", + "start": 40.46, + "end": 40.5, + "confidence": 0.99 + }, + { + "text": "is", + "start": 40.5, + "end": 40.54, + "confidence": 0.994 + }, + { + "text": "the", + "start": 40.54, + "end": 40.58, + "confidence": 0.969 + }, + { + "text": "one", + "start": 40.58, + "end": 40.62, + "confidence": 0.985 + }, + { + "text": "who", + "start": 40.62, + "end": 40.66, + "confidence": 0.989 + }, + { + "text": "is", + "start": 40.66, + "end": 40.7, + "confidence": 0.991 + }, + { + "text": "the", + "start": 40.7, + "end": 40.74, + "confidence": 0.625 + }, + { + "text": "one", + "start": 40.74, + "end": 40.78, + "confidence": 0.947 + }, + { + "text": "who", + "start": 40.78, + "end": 40.82, + "confidence": 0.985 + }, + { + "text": "is", + "start": 40.82, + "end": 40.86, + "confidence": 0.99 + }, + { + "text": "the", + "start": 40.86, + "end": 40.9, + "confidence": 0.91 + }, + { + "text": "one", + "start": 40.9, + "end": 40.94, + "confidence": 0.984 + }, + { + "text": "who", + "start": 40.94, + "end": 40.98, + "confidence": 0.986 + }, + { + "text": "is", + "start": 40.98, + "end": 41.02, + "confidence": 0.994 + }, + { + "text": "the", + "start": 41.02, + "end": 41.06, + "confidence": 0.933 + }, + { + "text": "one", + "start": 41.06, + "end": 41.1, + "confidence": 0.985 + }, + { + "text": "who", + "start": 41.1, + "end": 41.14, + "confidence": 0.988 + }, + { + "text": "is", + "start": 41.14, + "end": 41.18, + "confidence": 0.996 + }, + { + "text": "the", + "start": 41.18, + "end": 41.22, + "confidence": 0.948 + }, + { + "text": "one", + "start": 41.22, + "end": 41.26, + "confidence": 0.987 + }, + { + "text": "who", + "start": 41.26, + "end": 41.3, + "confidence": 0.989 + }, + { + "text": "is", + "start": 41.3, + "end": 41.34, + "confidence": 0.997 + }, + { + "text": "the", + "start": 41.34, + "end": 41.38, + "confidence": 0.961 + }, + { + "text": "one", + "start": 41.38, + "end": 41.42, + "confidence": 0.988 + }, + { + "text": "who", + "start": 41.42, + "end": 41.46, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.46, + "end": 41.5, + "confidence": 0.997 + }, + { + "text": "the", + "start": 41.5, + "end": 41.54, + "confidence": 0.969 + }, + { + "text": "one", + "start": 41.54, + "end": 41.58, + "confidence": 0.989 + }, + { + "text": "who", + "start": 41.58, + "end": 41.62, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.62, + "end": 41.66, + "confidence": 0.998 + }, + { + "text": "the", + "start": 41.66, + "end": 41.7, + "confidence": 0.976 + }, + { + "text": "one", + "start": 41.7, + "end": 41.74, + "confidence": 0.99 + }, + { + "text": "who", + "start": 41.74, + "end": 41.78, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.78, + "end": 41.82, + "confidence": 0.998 + }, + { + "text": "the", + "start": 41.82, + "end": 41.86, + "confidence": 0.981 + }, + { + "text": "one", + "start": 41.86, + "end": 41.9, + "confidence": 0.99 + }, + { + "text": "who", + "start": 41.9, + "end": 41.94, + "confidence": 0.99 + }, + { + "text": "is", + "start": 41.94, + "end": 41.98, + "confidence": 0.998 + }, + { + "text": "the", + "start": 41.98, + "end": 42.02, + "confidence": 0.984 + }, + { + "text": "one", + "start": 42.02, + "end": 42.06, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.06, + "end": 42.1, + "confidence": 0.99 + }, + { + "text": "is", + "start": 42.1, + "end": 42.14, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.14, + "end": 42.18, + "confidence": 0.987 + }, + { + "text": "one", + "start": 42.18, + "end": 42.22, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.22, + "end": 42.26, + "confidence": 0.99 + }, + { + "text": "is", + "start": 42.26, + "end": 42.3, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.3, + "end": 42.34, + "confidence": 0.988 + }, + { + "text": "one", + "start": 42.34, + "end": 42.38, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.38, + "end": 42.42, + "confidence": 0.989 + }, + { + "text": "is", + "start": 42.42, + "end": 42.46, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.46, + "end": 42.5, + "confidence": 0.989 + }, + { + "text": "one", + "start": 42.5, + "end": 42.54, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.54, + "end": 42.58, + "confidence": 0.989 + }, + { + "text": "is", + "start": 42.58, + "end": 42.62, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.62, + "end": 42.66, + "confidence": 0.99 + }, + { + "text": "one", + "start": 42.66, + "end": 42.7, + "confidence": 0.991 + }, + { + "text": "who", + "start": 42.7, + "end": 42.74, + "confidence": 0.989 + }, + { + "text": "is", + "start": 42.74, + "end": 42.78, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.78, + "end": 42.82, + "confidence": 0.99 + }, + { + "text": "one", + "start": 42.82, + "end": 42.86, + "confidence": 0.992 + }, + { + "text": "who", + "start": 42.86, + "end": 42.9, + "confidence": 0.988 + }, + { + "text": "is", + "start": 42.9, + "end": 42.94, + "confidence": 0.999 + }, + { + "text": "the", + "start": 42.94, + "end": 42.98, + "confidence": 0.991 + }, + { + "text": "one", + "start": 42.98, + "end": 43.02, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.02, + "end": 43.06, + "confidence": 0.988 + }, + { + "text": "is", + "start": 43.06, + "end": 43.1, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.1, + "end": 43.14, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.14, + "end": 43.18, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.18, + "end": 43.22, + "confidence": 0.988 + }, + { + "text": "is", + "start": 43.22, + "end": 43.26, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.26, + "end": 43.3, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.3, + "end": 43.34, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.34, + "end": 43.38, + "confidence": 0.988 + }, + { + "text": "is", + "start": 43.38, + "end": 43.42, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.42, + "end": 43.46, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.46, + "end": 43.5, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.5, + "end": 43.54, + "confidence": 0.987 + }, + { + "text": "is", + "start": 43.54, + "end": 43.58, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.58, + "end": 43.62, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.62, + "end": 43.66, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.66, + "end": 43.7, + "confidence": 0.987 + }, + { + "text": "is", + "start": 43.7, + "end": 43.74, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.74, + "end": 43.78, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.78, + "end": 43.82, + "confidence": 0.991 + }, + { + "text": "who", + "start": 43.82, + "end": 43.86, + "confidence": 0.987 + }, + { + "text": "is", + "start": 43.86, + "end": 43.9, + "confidence": 0.999 + }, + { + "text": "the", + "start": 43.9, + "end": 43.94, + "confidence": 0.991 + }, + { + "text": "one", + "start": 43.94, + "end": 43.98, + "confidence": 0.99 + }, + { + "text": "who", + "start": 43.98, + "end": 44.02, + "confidence": 0.986 + }, + { + "text": "is", + "start": 44.02, + "end": 44.06, + "confidence": 0.999 + }, + { + "text": "the", + "start": 44.06, + "end": 44.1, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.1, + "end": 44.14, + "confidence": 0.99 + }, + { + "text": "who", + "start": 44.14, + "end": 44.18, + "confidence": 0.986 + }, + { + "text": "is", + "start": 44.18, + "end": 44.22, + "confidence": 0.999 + }, + { + "text": "the", + "start": 44.22, + "end": 44.26, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.26, + "end": 44.3, + "confidence": 0.99 + }, + { + "text": "who", + "start": 44.3, + "end": 44.34, + "confidence": 0.985 + }, + { + "text": "is", + "start": 44.34, + "end": 44.38, + "confidence": 0.999 + }, + { + "text": "the", + "start": 44.38, + "end": 44.42, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.42, + "end": 44.46, + "confidence": 0.99 + }, + { + "text": "who", + "start": 44.46, + "end": 44.5, + "confidence": 0.985 + }, + { + "text": "is", + "start": 44.5, + "end": 44.54, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.54, + "end": 44.58, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.58, + "end": 44.62, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.62, + "end": 44.66, + "confidence": 0.984 + }, + { + "text": "is", + "start": 44.66, + "end": 44.7, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.7, + "end": 44.74, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.74, + "end": 44.78, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.78, + "end": 44.82, + "confidence": 0.983 + }, + { + "text": "is", + "start": 44.82, + "end": 44.86, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.86, + "end": 44.9, + "confidence": 0.991 + }, + { + "text": "one", + "start": 44.9, + "end": 44.94, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.94, + "end": 44.98, + "confidence": 0.983 + }, + { + "text": "is", + "start": 44.98, + "end": 45.02, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.02, + "end": 45.06, + "confidence": 0.991 + }, + { + "text": "one", + "start": 45.06, + "end": 45.1, + "confidence": 0.988 + }, + { + "text": "who", + "start": 45.1, + "end": 45.14, + "confidence": 0.982 + }, + { + "text": "is", + "start": 45.14, + "end": 45.18, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.18, + "end": 45.22, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.22, + "end": 45.26, + "confidence": 0.988 + }, + { + "text": "who", + "start": 45.26, + "end": 45.3, + "confidence": 0.98 + }, + { + "text": "is", + "start": 45.3, + "end": 45.34, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.34, + "end": 45.38, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.38, + "end": 45.42, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.42, + "end": 45.46, + "confidence": 0.979 + }, + { + "text": "is", + "start": 45.46, + "end": 45.5, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.5, + "end": 45.54, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.54, + "end": 45.58, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.58, + "end": 45.62, + "confidence": 0.978 + }, + { + "text": "is", + "start": 45.62, + "end": 45.66, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.66, + "end": 45.7, + "confidence": 0.99 + }, + { + "text": "one", + "start": 45.7, + "end": 45.74, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.74, + "end": 45.78, + "confidence": 0.976 + }, + { + "text": "is", + "start": 45.78, + "end": 45.82, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.82, + "end": 45.86, + "confidence": 0.989 + }, + { + "text": "one", + "start": 45.86, + "end": 45.9, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.9, + "end": 45.94, + "confidence": 0.976 + }, + { + "text": "is", + "start": 45.94, + "end": 45.98, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.98, + "end": 46.02, + "confidence": 0.989 + }, + { + "text": "one", + "start": 46.02, + "end": 46.06, + "confidence": 0.986 + }, + { + "text": "who", + "start": 46.06, + "end": 46.1, + "confidence": 0.974 + }, + { + "text": "is", + "start": 46.1, + "end": 46.14, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.14, + "end": 46.18, + "confidence": 0.989 + }, + { + "text": "one", + "start": 46.18, + "end": 46.22, + "confidence": 0.985 + }, + { + "text": "who", + "start": 46.22, + "end": 46.26, + "confidence": 0.972 + }, + { + "text": "is", + "start": 46.26, + "end": 46.3, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.3, + "end": 46.34, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.34, + "end": 46.38, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.38, + "end": 46.42, + "confidence": 0.971 + }, + { + "text": "is", + "start": 46.42, + "end": 46.46, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.46, + "end": 46.5, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.5, + "end": 46.54, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.54, + "end": 46.58, + "confidence": 0.97 + }, + { + "text": "is", + "start": 46.58, + "end": 46.62, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.62, + "end": 46.66, + "confidence": 0.987 + }, + { + "text": "one", + "start": 46.66, + "end": 46.7, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.7, + "end": 46.74, + "confidence": 0.968 + }, + { + "text": "is", + "start": 46.74, + "end": 46.78, + "confidence": 0.995 + }, + { + "text": "the", + "start": 46.78, + "end": 46.82, + "confidence": 0.987 + }, + { + "text": "one", + "start": 46.82, + "end": 46.86, + "confidence": 0.983 + }, + { + "text": "who", + "start": 46.86, + "end": 46.9, + "confidence": 0.966 + }, + { + "text": "is", + "start": 46.9, + "end": 46.94, + "confidence": 0.995 + }, + { + "text": "the", + "start": 46.94, + "end": 46.98, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.98, + "end": 47.02, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.02, + "end": 47.06, + "confidence": 0.964 + }, + { + "text": "is", + "start": 47.06, + "end": 47.1, + "confidence": 0.995 + }, + { + "text": "the", + "start": 47.1, + "end": 47.14, + "confidence": 0.986 + }, + { + "text": "one", + "start": 47.14, + "end": 47.18, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.18, + "end": 47.22, + "confidence": 0.963 + }, + { + "text": "is", + "start": 47.22, + "end": 47.26, + "confidence": 0.994 + }, + { + "text": "the", + "start": 47.26, + "end": 47.3, + "confidence": 0.985 + }, + { + "text": "one", + "start": 47.3, + "end": 47.34, + "confidence": 0.981 + }, + { + "text": "who", + "start": 47.34, + "end": 47.38, + "confidence": 0.959 + }, + { + "text": "is", + "start": 47.38, + "end": 47.42, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.42, + "end": 47.46, + "confidence": 0.984 + }, + { + "text": "one", + "start": 47.46, + "end": 47.5, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.5, + "end": 47.54, + "confidence": 0.958 + }, + { + "text": "is", + "start": 47.54, + "end": 47.58, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.58, + "end": 47.62, + "confidence": 0.983 + }, + { + "text": "one", + "start": 47.62, + "end": 47.66, + "confidence": 0.979 + }, + { + "text": "who", + "start": 47.66, + "end": 47.7, + "confidence": 0.955 + }, + { + "text": "is", + "start": 47.7, + "end": 47.74, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.74, + "end": 47.78, + "confidence": 0.982 + }, + { + "text": "one", + "start": 47.78, + "end": 47.82, + "confidence": 0.979 + }, + { + "text": "who", + "start": 47.82, + "end": 47.86, + "confidence": 0.953 + }, + { + "text": "is", + "start": 47.86, + "end": 47.9, + "confidence": 0.992 + }, + { + "text": "the", + "start": 47.9, + "end": 47.94, + "confidence": 0.982 + }, + { + "text": "one", + "start": 47.94, + "end": 47.98, + "confidence": 0.978 + }, + { + "text": "who", + "start": 47.98, + "end": 48.02, + "confidence": 0.952 + }, + { + "text": "is", + "start": 48.02, + "end": 48.06, + "confidence": 0.992 + }, + { + "text": "the", + "start": 48.06, + "end": 48.1, + "confidence": 0.981 + }, + { + "text": "one", + "start": 48.1, + "end": 48.14, + "confidence": 0.976 + }, + { + "text": "who", + "start": 48.14, + "end": 48.18, + "confidence": 0.95 + }, + { + "text": "is", + "start": 48.18, + "end": 48.22, + "confidence": 0.991 + }, + { + "text": "the", + "start": 48.22, + "end": 48.26, + "confidence": 0.98 + }, + { + "text": "one", + "start": 48.26, + "end": 48.3, + "confidence": 0.976 + }, + { + "text": "who", + "start": 48.3, + "end": 48.34, + "confidence": 0.949 + }, + { + "text": "is", + "start": 48.34, + "end": 48.38, + "confidence": 0.991 + }, + { + "text": "the", + "start": 48.38, + "end": 48.42, + "confidence": 0.979 + }, + { + "text": "one", + "start": 48.42, + "end": 48.46, + "confidence": 0.976 + }, + { + "text": "who", + "start": 48.46, + "end": 48.5, + "confidence": 0.944 + }, + { + "text": "is", + "start": 48.5, + "end": 48.54, + "confidence": 0.99 + }, + { + "text": "the", + "start": 48.54, + "end": 48.58, + "confidence": 0.979 + }, + { + "text": "one", + "start": 48.58, + "end": 48.62, + "confidence": 0.975 + }, + { + "text": "who", + "start": 48.62, + "end": 48.66, + "confidence": 0.945 + }, + { + "text": "is", + "start": 48.66, + "end": 67.0, + "confidence": 0.99 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/small.en/arabic.mp3.words.json b/tests/expected/small.en/arabic.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2d0172794e7287bacdb551a50f63c0d96298b1a3 --- /dev/null +++ b/tests/expected/small.en/arabic.mp3.words.json @@ -0,0 +1,3348 @@ +{ + "text": " I am the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 2.92, + "end": 7.72, + "text": " I am the one who is the one who is the one who is the one who is the one who is the one", + "tokens": [ + 50363, + 314, + 716, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 50713 + ], + "temperature": 0.0, + "avg_logprob": -0.2338263448079427, + "compression_ratio": 24.294117647058822, + "no_speech_prob": 0.6509259939193726, + "confidence": 0.358, + "words": [ + { + "text": "I", + "start": 2.92, + "end": 3.16, + "confidence": 0.053 + }, + { + "text": "am", + "start": 3.16, + "end": 4.04, + "confidence": 0.152 + }, + { + "text": "the", + "start": 4.04, + "end": 5.44, + "confidence": 0.144 + }, + { + "text": "one", + "start": 5.44, + "end": 6.1, + "confidence": 0.086 + }, + { + "text": "who", + "start": 6.1, + "end": 6.18, + "confidence": 0.718 + }, + { + "text": "is", + "start": 6.18, + "end": 6.22, + "confidence": 0.119 + }, + { + "text": "the", + "start": 6.22, + "end": 6.24, + "confidence": 0.134 + }, + { + "text": "one", + "start": 6.24, + "end": 6.72, + "confidence": 0.203 + }, + { + "text": "who", + "start": 6.72, + "end": 6.74, + "confidence": 0.618 + }, + { + "text": "is", + "start": 6.74, + "end": 7.18, + "confidence": 0.319 + }, + { + "text": "the", + "start": 7.18, + "end": 7.2, + "confidence": 0.38 + }, + { + "text": "one", + "start": 7.2, + "end": 7.48, + "confidence": 0.443 + }, + { + "text": "who", + "start": 7.48, + "end": 7.5, + "confidence": 0.447 + }, + { + "text": "is", + "start": 7.5, + "end": 7.52, + "confidence": 0.518 + }, + { + "text": "the", + "start": 7.52, + "end": 7.54, + "confidence": 0.542 + }, + { + "text": "one", + "start": 7.54, + "end": 7.56, + "confidence": 0.724 + }, + { + "text": "who", + "start": 7.56, + "end": 7.58, + "confidence": 0.433 + }, + { + "text": "is", + "start": 7.58, + "end": 7.6, + "confidence": 0.677 + }, + { + "text": "the", + "start": 7.6, + "end": 7.62, + "confidence": 0.638 + }, + { + "text": "one", + "start": 7.62, + "end": 7.64, + "confidence": 0.817 + }, + { + "text": "who", + "start": 7.64, + "end": 7.66, + "confidence": 0.503 + }, + { + "text": "is", + "start": 7.66, + "end": 7.68, + "confidence": 0.74 + }, + { + "text": "the", + "start": 7.68, + "end": 7.7, + "confidence": 0.696 + }, + { + "text": "one", + "start": 7.7, + "end": 7.72, + "confidence": 0.697 + } + ] + }, + { + "id": 1, + "seek": 700, + "start": 7.72, + "end": 37.18, + "text": " who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the", + "tokens": [ + 50363, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262 + ], + "temperature": 0.0, + "avg_logprob": -0.07860866122775607, + "compression_ratio": 26.93548387096774, + "no_speech_prob": 0.6889318823814392, + "confidence": 0.925, + "words": [ + { + "text": "who", + "start": 7.72, + "end": 7.74, + "confidence": 0.214 + }, + { + "text": "is", + "start": 7.74, + "end": 7.76, + "confidence": 0.892 + }, + { + "text": "the", + "start": 7.76, + "end": 7.78, + "confidence": 0.918 + }, + { + "text": "one", + "start": 7.78, + "end": 9.96, + "confidence": 0.955 + }, + { + "text": "who", + "start": 9.96, + "end": 13.46, + "confidence": 0.905 + }, + { + "text": "is", + "start": 13.46, + "end": 14.0, + "confidence": 0.908 + }, + { + "text": "the", + "start": 14.0, + "end": 14.38, + "confidence": 0.942 + }, + { + "text": "one", + "start": 14.38, + "end": 14.4, + "confidence": 0.977 + }, + { + "text": "who", + "start": 14.4, + "end": 14.42, + "confidence": 0.758 + }, + { + "text": "is", + "start": 14.42, + "end": 14.44, + "confidence": 0.932 + }, + { + "text": "the", + "start": 14.44, + "end": 15.24, + "confidence": 0.957 + }, + { + "text": "one", + "start": 15.24, + "end": 18.0, + "confidence": 0.982 + }, + { + "text": "who", + "start": 18.0, + "end": 18.02, + "confidence": 0.581 + }, + { + "text": "is", + "start": 18.02, + "end": 18.04, + "confidence": 0.944 + }, + { + "text": "the", + "start": 18.04, + "end": 18.06, + "confidence": 0.953 + }, + { + "text": "one", + "start": 18.06, + "end": 18.08, + "confidence": 0.98 + }, + { + "text": "who", + "start": 18.08, + "end": 19.38, + "confidence": 0.577 + }, + { + "text": "is", + "start": 19.38, + "end": 19.9, + "confidence": 0.953 + }, + { + "text": "the", + "start": 19.9, + "end": 19.92, + "confidence": 0.95 + }, + { + "text": "one", + "start": 19.92, + "end": 19.94, + "confidence": 0.98 + }, + { + "text": "who", + "start": 19.94, + "end": 19.96, + "confidence": 0.591 + }, + { + "text": "is", + "start": 19.96, + "end": 20.88, + "confidence": 0.95 + }, + { + "text": "the", + "start": 20.88, + "end": 21.28, + "confidence": 0.872 + }, + { + "text": "one", + "start": 21.28, + "end": 21.52, + "confidence": 0.902 + }, + { + "text": "who", + "start": 21.52, + "end": 21.54, + "confidence": 0.499 + }, + { + "text": "is", + "start": 21.54, + "end": 22.04, + "confidence": 0.943 + }, + { + "text": "the", + "start": 22.04, + "end": 22.24, + "confidence": 0.898 + }, + { + "text": "one", + "start": 22.24, + "end": 25.3, + "confidence": 0.98 + }, + { + "text": "who", + "start": 25.3, + "end": 25.32, + "confidence": 0.541 + }, + { + "text": "is", + "start": 25.32, + "end": 26.18, + "confidence": 0.953 + }, + { + "text": "the", + "start": 26.18, + "end": 26.64, + "confidence": 0.904 + }, + { + "text": "one", + "start": 26.64, + "end": 26.92, + "confidence": 0.976 + }, + { + "text": "who", + "start": 26.92, + "end": 26.94, + "confidence": 0.586 + }, + { + "text": "is", + "start": 26.94, + "end": 27.42, + "confidence": 0.953 + }, + { + "text": "the", + "start": 27.42, + "end": 28.08, + "confidence": 0.903 + }, + { + "text": "one", + "start": 28.08, + "end": 28.1, + "confidence": 0.968 + }, + { + "text": "who", + "start": 28.1, + "end": 28.12, + "confidence": 0.603 + }, + { + "text": "is", + "start": 28.12, + "end": 28.14, + "confidence": 0.956 + }, + { + "text": "the", + "start": 28.14, + "end": 28.16, + "confidence": 0.91 + }, + { + "text": "one", + "start": 28.16, + "end": 28.46, + "confidence": 0.963 + }, + { + "text": "who", + "start": 28.46, + "end": 28.48, + "confidence": 0.622 + }, + { + "text": "is", + "start": 28.48, + "end": 28.5, + "confidence": 0.96 + }, + { + "text": "the", + "start": 28.5, + "end": 28.52, + "confidence": 0.92 + }, + { + "text": "one", + "start": 28.52, + "end": 30.08, + "confidence": 0.961 + }, + { + "text": "who", + "start": 30.08, + "end": 30.1, + "confidence": 0.649 + }, + { + "text": "is", + "start": 30.1, + "end": 30.12, + "confidence": 0.965 + }, + { + "text": "the", + "start": 30.12, + "end": 30.3, + "confidence": 0.93 + }, + { + "text": "one", + "start": 30.3, + "end": 30.32, + "confidence": 0.961 + }, + { + "text": "who", + "start": 30.32, + "end": 31.0, + "confidence": 0.671 + }, + { + "text": "is", + "start": 31.0, + "end": 31.52, + "confidence": 0.971 + }, + { + "text": "the", + "start": 31.52, + "end": 31.54, + "confidence": 0.94 + }, + { + "text": "one", + "start": 31.54, + "end": 32.26, + "confidence": 0.962 + }, + { + "text": "who", + "start": 32.26, + "end": 32.48, + "confidence": 0.704 + }, + { + "text": "is", + "start": 32.48, + "end": 33.26, + "confidence": 0.976 + }, + { + "text": "the", + "start": 33.26, + "end": 33.28, + "confidence": 0.946 + }, + { + "text": "one", + "start": 33.28, + "end": 33.56, + "confidence": 0.964 + }, + { + "text": "who", + "start": 33.56, + "end": 33.86, + "confidence": 0.722 + }, + { + "text": "is", + "start": 33.86, + "end": 33.88, + "confidence": 0.979 + }, + { + "text": "the", + "start": 33.88, + "end": 33.9, + "confidence": 0.951 + }, + { + "text": "one", + "start": 33.9, + "end": 33.92, + "confidence": 0.966 + }, + { + "text": "who", + "start": 33.92, + "end": 33.94, + "confidence": 0.746 + }, + { + "text": "is", + "start": 33.94, + "end": 33.96, + "confidence": 0.982 + }, + { + "text": "the", + "start": 33.96, + "end": 33.98, + "confidence": 0.956 + }, + { + "text": "one", + "start": 33.98, + "end": 34.0, + "confidence": 0.967 + }, + { + "text": "who", + "start": 34.0, + "end": 34.02, + "confidence": 0.757 + }, + { + "text": "is", + "start": 34.02, + "end": 34.04, + "confidence": 0.984 + }, + { + "text": "the", + "start": 34.04, + "end": 34.06, + "confidence": 0.961 + }, + { + "text": "one", + "start": 34.06, + "end": 34.08, + "confidence": 0.969 + }, + { + "text": "who", + "start": 34.08, + "end": 34.1, + "confidence": 0.776 + }, + { + "text": "is", + "start": 34.1, + "end": 34.12, + "confidence": 0.986 + }, + { + "text": "the", + "start": 34.12, + "end": 34.14, + "confidence": 0.965 + }, + { + "text": "one", + "start": 34.14, + "end": 34.16, + "confidence": 0.97 + }, + { + "text": "who", + "start": 34.16, + "end": 34.18, + "confidence": 0.799 + }, + { + "text": "is", + "start": 34.18, + "end": 34.2, + "confidence": 0.987 + }, + { + "text": "the", + "start": 34.2, + "end": 34.22, + "confidence": 0.968 + }, + { + "text": "one", + "start": 34.22, + "end": 34.24, + "confidence": 0.971 + }, + { + "text": "who", + "start": 34.24, + "end": 34.26, + "confidence": 0.82 + }, + { + "text": "is", + "start": 34.26, + "end": 34.28, + "confidence": 0.987 + }, + { + "text": "the", + "start": 34.28, + "end": 34.3, + "confidence": 0.969 + }, + { + "text": "one", + "start": 34.3, + "end": 34.32, + "confidence": 0.972 + }, + { + "text": "who", + "start": 34.32, + "end": 34.34, + "confidence": 0.836 + }, + { + "text": "is", + "start": 34.34, + "end": 34.36, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.36, + "end": 34.38, + "confidence": 0.971 + }, + { + "text": "one", + "start": 34.38, + "end": 34.4, + "confidence": 0.972 + }, + { + "text": "who", + "start": 34.4, + "end": 34.42, + "confidence": 0.872 + }, + { + "text": "is", + "start": 34.42, + "end": 34.44, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.44, + "end": 34.46, + "confidence": 0.972 + }, + { + "text": "one", + "start": 34.46, + "end": 34.48, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.48, + "end": 34.5, + "confidence": 0.889 + }, + { + "text": "is", + "start": 34.5, + "end": 34.52, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.52, + "end": 34.54, + "confidence": 0.973 + }, + { + "text": "one", + "start": 34.54, + "end": 34.56, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.56, + "end": 34.58, + "confidence": 0.899 + }, + { + "text": "is", + "start": 34.58, + "end": 34.6, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.6, + "end": 34.62, + "confidence": 0.973 + }, + { + "text": "one", + "start": 34.62, + "end": 34.64, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.64, + "end": 34.66, + "confidence": 0.912 + }, + { + "text": "is", + "start": 34.66, + "end": 34.68, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.68, + "end": 34.7, + "confidence": 0.973 + }, + { + "text": "one", + "start": 34.7, + "end": 34.72, + "confidence": 0.973 + }, + { + "text": "who", + "start": 34.72, + "end": 34.74, + "confidence": 0.921 + }, + { + "text": "is", + "start": 34.74, + "end": 34.76, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.76, + "end": 34.78, + "confidence": 0.974 + }, + { + "text": "one", + "start": 34.78, + "end": 34.8, + "confidence": 0.974 + }, + { + "text": "who", + "start": 34.8, + "end": 34.82, + "confidence": 0.927 + }, + { + "text": "is", + "start": 34.82, + "end": 34.84, + "confidence": 0.989 + }, + { + "text": "the", + "start": 34.84, + "end": 34.86, + "confidence": 0.974 + }, + { + "text": "one", + "start": 34.86, + "end": 34.88, + "confidence": 0.974 + }, + { + "text": "who", + "start": 34.88, + "end": 34.9, + "confidence": 0.935 + }, + { + "text": "is", + "start": 34.9, + "end": 34.92, + "confidence": 0.988 + }, + { + "text": "the", + "start": 34.92, + "end": 34.94, + "confidence": 0.974 + }, + { + "text": "one", + "start": 34.94, + "end": 34.96, + "confidence": 0.974 + }, + { + "text": "who", + "start": 34.96, + "end": 34.98, + "confidence": 0.934 + }, + { + "text": "is", + "start": 34.98, + "end": 35.0, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.0, + "end": 35.02, + "confidence": 0.975 + }, + { + "text": "one", + "start": 35.02, + "end": 35.04, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.04, + "end": 35.06, + "confidence": 0.937 + }, + { + "text": "is", + "start": 35.06, + "end": 35.08, + "confidence": 0.989 + }, + { + "text": "the", + "start": 35.08, + "end": 35.1, + "confidence": 0.976 + }, + { + "text": "one", + "start": 35.1, + "end": 35.12, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.12, + "end": 35.14, + "confidence": 0.937 + }, + { + "text": "is", + "start": 35.14, + "end": 35.16, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.16, + "end": 35.18, + "confidence": 0.976 + }, + { + "text": "one", + "start": 35.18, + "end": 35.2, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.2, + "end": 35.22, + "confidence": 0.934 + }, + { + "text": "is", + "start": 35.22, + "end": 35.24, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.24, + "end": 35.26, + "confidence": 0.976 + }, + { + "text": "one", + "start": 35.26, + "end": 35.28, + "confidence": 0.975 + }, + { + "text": "who", + "start": 35.28, + "end": 35.3, + "confidence": 0.933 + }, + { + "text": "is", + "start": 35.3, + "end": 35.32, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.32, + "end": 35.34, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.34, + "end": 35.36, + "confidence": 0.975 + }, + { + "text": "who", + "start": 35.36, + "end": 35.38, + "confidence": 0.93 + }, + { + "text": "is", + "start": 35.38, + "end": 35.4, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.4, + "end": 35.42, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.42, + "end": 35.44, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.44, + "end": 35.46, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.46, + "end": 35.48, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.48, + "end": 35.5, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.5, + "end": 35.52, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.52, + "end": 35.54, + "confidence": 0.926 + }, + { + "text": "is", + "start": 35.54, + "end": 35.56, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.56, + "end": 35.58, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.58, + "end": 35.6, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.6, + "end": 35.62, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.62, + "end": 35.64, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.64, + "end": 35.66, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.66, + "end": 35.68, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.68, + "end": 35.7, + "confidence": 0.926 + }, + { + "text": "is", + "start": 35.7, + "end": 35.72, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.72, + "end": 35.74, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.74, + "end": 35.76, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.76, + "end": 35.78, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.78, + "end": 35.8, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.8, + "end": 35.82, + "confidence": 0.978 + }, + { + "text": "one", + "start": 35.82, + "end": 35.84, + "confidence": 0.973 + }, + { + "text": "who", + "start": 35.84, + "end": 35.86, + "confidence": 0.926 + }, + { + "text": "is", + "start": 35.86, + "end": 35.88, + "confidence": 0.988 + }, + { + "text": "the", + "start": 35.88, + "end": 35.9, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.9, + "end": 35.92, + "confidence": 0.974 + }, + { + "text": "who", + "start": 35.92, + "end": 35.94, + "confidence": 0.928 + }, + { + "text": "is", + "start": 35.94, + "end": 35.96, + "confidence": 0.987 + }, + { + "text": "the", + "start": 35.96, + "end": 35.98, + "confidence": 0.977 + }, + { + "text": "one", + "start": 35.98, + "end": 36.0, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.0, + "end": 36.02, + "confidence": 0.925 + }, + { + "text": "is", + "start": 36.02, + "end": 36.04, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.04, + "end": 36.06, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.06, + "end": 36.08, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.08, + "end": 36.1, + "confidence": 0.925 + }, + { + "text": "is", + "start": 36.1, + "end": 36.12, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.12, + "end": 36.14, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.14, + "end": 36.16, + "confidence": 0.974 + }, + { + "text": "who", + "start": 36.16, + "end": 36.18, + "confidence": 0.927 + }, + { + "text": "is", + "start": 36.18, + "end": 36.2, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.2, + "end": 36.22, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.22, + "end": 36.24, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.24, + "end": 36.26, + "confidence": 0.922 + }, + { + "text": "is", + "start": 36.26, + "end": 36.28, + "confidence": 0.987 + }, + { + "text": "the", + "start": 36.28, + "end": 36.3, + "confidence": 0.977 + }, + { + "text": "one", + "start": 36.3, + "end": 36.32, + "confidence": 0.972 + }, + { + "text": "who", + "start": 36.32, + "end": 36.34, + "confidence": 0.924 + }, + { + "text": "is", + "start": 36.34, + "end": 36.36, + "confidence": 0.986 + }, + { + "text": "the", + "start": 36.36, + "end": 36.38, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.38, + "end": 36.4, + "confidence": 0.972 + }, + { + "text": "who", + "start": 36.4, + "end": 36.42, + "confidence": 0.922 + }, + { + "text": "is", + "start": 36.42, + "end": 36.44, + "confidence": 0.985 + }, + { + "text": "the", + "start": 36.44, + "end": 36.46, + "confidence": 0.976 + }, + { + "text": "one", + "start": 36.46, + "end": 36.48, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.48, + "end": 36.5, + "confidence": 0.924 + }, + { + "text": "is", + "start": 36.5, + "end": 36.52, + "confidence": 0.985 + }, + { + "text": "the", + "start": 36.52, + "end": 36.54, + "confidence": 0.975 + }, + { + "text": "one", + "start": 36.54, + "end": 36.56, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.56, + "end": 36.58, + "confidence": 0.92 + }, + { + "text": "is", + "start": 36.58, + "end": 36.6, + "confidence": 0.983 + }, + { + "text": "the", + "start": 36.6, + "end": 36.62, + "confidence": 0.975 + }, + { + "text": "one", + "start": 36.62, + "end": 36.64, + "confidence": 0.971 + }, + { + "text": "who", + "start": 36.64, + "end": 36.66, + "confidence": 0.921 + }, + { + "text": "is", + "start": 36.66, + "end": 36.68, + "confidence": 0.983 + }, + { + "text": "the", + "start": 36.68, + "end": 36.7, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.7, + "end": 36.72, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.72, + "end": 36.74, + "confidence": 0.918 + }, + { + "text": "is", + "start": 36.74, + "end": 36.76, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.76, + "end": 36.78, + "confidence": 0.974 + }, + { + "text": "one", + "start": 36.78, + "end": 36.8, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.8, + "end": 36.82, + "confidence": 0.919 + }, + { + "text": "is", + "start": 36.82, + "end": 36.84, + "confidence": 0.982 + }, + { + "text": "the", + "start": 36.84, + "end": 36.86, + "confidence": 0.973 + }, + { + "text": "one", + "start": 36.86, + "end": 36.88, + "confidence": 0.973 + }, + { + "text": "who", + "start": 36.88, + "end": 36.9, + "confidence": 0.917 + }, + { + "text": "is", + "start": 36.9, + "end": 36.92, + "confidence": 0.981 + }, + { + "text": "the", + "start": 36.92, + "end": 36.94, + "confidence": 0.973 + }, + { + "text": "one", + "start": 36.94, + "end": 36.96, + "confidence": 0.972 + }, + { + "text": "who", + "start": 36.96, + "end": 36.98, + "confidence": 0.919 + }, + { + "text": "is", + "start": 36.98, + "end": 37.0, + "confidence": 0.98 + }, + { + "text": "the", + "start": 37.0, + "end": 37.02, + "confidence": 0.972 + }, + { + "text": "one", + "start": 37.02, + "end": 37.04, + "confidence": 0.972 + }, + { + "text": "who", + "start": 37.04, + "end": 37.06, + "confidence": 0.918 + }, + { + "text": "is", + "start": 37.06, + "end": 37.08, + "confidence": 0.979 + }, + { + "text": "the", + "start": 37.08, + "end": 37.1, + "confidence": 0.972 + }, + { + "text": "one", + "start": 37.1, + "end": 37.12, + "confidence": 0.972 + }, + { + "text": "who", + "start": 37.12, + "end": 37.14, + "confidence": 0.917 + }, + { + "text": "is", + "start": 37.14, + "end": 37.16, + "confidence": 0.978 + }, + { + "text": "the", + "start": 37.16, + "end": 37.18, + "confidence": 0.971 + } + ] + }, + { + "id": 2, + "seek": 3700, + "start": 37.18, + "end": 66.98, + "text": " one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is the one who is", + "tokens": [ + 50363, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318, + 262, + 530, + 508, + 318 + ], + "temperature": 0.0, + "avg_logprob": -0.024531144036187065, + "compression_ratio": 26.93548387096774, + "no_speech_prob": 0.09981618076562881, + "confidence": 0.976, + "words": [ + { + "text": "one", + "start": 37.18, + "end": 37.32, + "confidence": 0.959 + }, + { + "text": "who", + "start": 37.32, + "end": 37.9, + "confidence": 0.954 + }, + { + "text": "is", + "start": 37.9, + "end": 37.92, + "confidence": 0.968 + }, + { + "text": "the", + "start": 37.92, + "end": 38.1, + "confidence": 0.962 + }, + { + "text": "one", + "start": 38.1, + "end": 38.6, + "confidence": 0.965 + }, + { + "text": "who", + "start": 38.6, + "end": 39.36, + "confidence": 0.979 + }, + { + "text": "is", + "start": 39.36, + "end": 39.58, + "confidence": 0.986 + }, + { + "text": "the", + "start": 39.58, + "end": 39.6, + "confidence": 0.969 + }, + { + "text": "one", + "start": 39.6, + "end": 39.62, + "confidence": 0.981 + }, + { + "text": "who", + "start": 39.62, + "end": 39.82, + "confidence": 0.985 + }, + { + "text": "is", + "start": 39.82, + "end": 40.12, + "confidence": 0.99 + }, + { + "text": "the", + "start": 40.12, + "end": 40.32, + "confidence": 0.957 + }, + { + "text": "one", + "start": 40.32, + "end": 40.62, + "confidence": 0.983 + }, + { + "text": "who", + "start": 40.62, + "end": 40.64, + "confidence": 0.984 + }, + { + "text": "is", + "start": 40.64, + "end": 40.66, + "confidence": 0.99 + }, + { + "text": "the", + "start": 40.66, + "end": 40.68, + "confidence": 0.956 + }, + { + "text": "one", + "start": 40.68, + "end": 40.74, + "confidence": 0.984 + }, + { + "text": "who", + "start": 40.74, + "end": 40.76, + "confidence": 0.984 + }, + { + "text": "is", + "start": 40.76, + "end": 40.78, + "confidence": 0.991 + }, + { + "text": "the", + "start": 40.78, + "end": 40.8, + "confidence": 0.953 + }, + { + "text": "one", + "start": 40.8, + "end": 40.82, + "confidence": 0.985 + }, + { + "text": "who", + "start": 40.82, + "end": 40.84, + "confidence": 0.979 + }, + { + "text": "is", + "start": 40.84, + "end": 40.86, + "confidence": 0.987 + }, + { + "text": "the", + "start": 40.86, + "end": 40.88, + "confidence": 0.495 + }, + { + "text": "one", + "start": 40.88, + "end": 41.5, + "confidence": 0.951 + }, + { + "text": "who", + "start": 41.5, + "end": 41.52, + "confidence": 0.968 + }, + { + "text": "is", + "start": 41.52, + "end": 41.54, + "confidence": 0.983 + }, + { + "text": "the", + "start": 41.54, + "end": 41.56, + "confidence": 0.844 + }, + { + "text": "one", + "start": 41.56, + "end": 41.58, + "confidence": 0.982 + }, + { + "text": "who", + "start": 41.58, + "end": 41.6, + "confidence": 0.973 + }, + { + "text": "is", + "start": 41.6, + "end": 41.62, + "confidence": 0.991 + }, + { + "text": "the", + "start": 41.62, + "end": 41.64, + "confidence": 0.888 + }, + { + "text": "one", + "start": 41.64, + "end": 41.66, + "confidence": 0.984 + }, + { + "text": "who", + "start": 41.66, + "end": 41.68, + "confidence": 0.978 + }, + { + "text": "is", + "start": 41.68, + "end": 41.7, + "confidence": 0.994 + }, + { + "text": "the", + "start": 41.7, + "end": 41.72, + "confidence": 0.915 + }, + { + "text": "one", + "start": 41.72, + "end": 41.74, + "confidence": 0.985 + }, + { + "text": "who", + "start": 41.74, + "end": 41.76, + "confidence": 0.981 + }, + { + "text": "is", + "start": 41.76, + "end": 41.78, + "confidence": 0.995 + }, + { + "text": "the", + "start": 41.78, + "end": 41.8, + "confidence": 0.937 + }, + { + "text": "one", + "start": 41.8, + "end": 41.82, + "confidence": 0.986 + }, + { + "text": "who", + "start": 41.82, + "end": 41.84, + "confidence": 0.983 + }, + { + "text": "is", + "start": 41.84, + "end": 41.86, + "confidence": 0.996 + }, + { + "text": "the", + "start": 41.86, + "end": 41.88, + "confidence": 0.95 + }, + { + "text": "one", + "start": 41.88, + "end": 41.9, + "confidence": 0.987 + }, + { + "text": "who", + "start": 41.9, + "end": 41.92, + "confidence": 0.983 + }, + { + "text": "is", + "start": 41.92, + "end": 41.94, + "confidence": 0.996 + }, + { + "text": "the", + "start": 41.94, + "end": 41.96, + "confidence": 0.961 + }, + { + "text": "one", + "start": 41.96, + "end": 41.98, + "confidence": 0.987 + }, + { + "text": "who", + "start": 41.98, + "end": 42.0, + "confidence": 0.984 + }, + { + "text": "is", + "start": 42.0, + "end": 42.02, + "confidence": 0.997 + }, + { + "text": "the", + "start": 42.02, + "end": 42.04, + "confidence": 0.969 + }, + { + "text": "one", + "start": 42.04, + "end": 42.06, + "confidence": 0.987 + }, + { + "text": "who", + "start": 42.06, + "end": 42.08, + "confidence": 0.983 + }, + { + "text": "is", + "start": 42.08, + "end": 42.1, + "confidence": 0.997 + }, + { + "text": "the", + "start": 42.1, + "end": 42.12, + "confidence": 0.975 + }, + { + "text": "one", + "start": 42.12, + "end": 42.14, + "confidence": 0.988 + }, + { + "text": "who", + "start": 42.14, + "end": 42.16, + "confidence": 0.983 + }, + { + "text": "is", + "start": 42.16, + "end": 42.18, + "confidence": 0.997 + }, + { + "text": "the", + "start": 42.18, + "end": 42.2, + "confidence": 0.979 + }, + { + "text": "one", + "start": 42.2, + "end": 42.22, + "confidence": 0.989 + }, + { + "text": "who", + "start": 42.22, + "end": 42.24, + "confidence": 0.983 + }, + { + "text": "is", + "start": 42.24, + "end": 42.26, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.26, + "end": 42.28, + "confidence": 0.982 + }, + { + "text": "one", + "start": 42.28, + "end": 42.3, + "confidence": 0.989 + }, + { + "text": "who", + "start": 42.3, + "end": 42.32, + "confidence": 0.982 + }, + { + "text": "is", + "start": 42.32, + "end": 42.34, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.34, + "end": 42.4, + "confidence": 0.983 + }, + { + "text": "one", + "start": 42.4, + "end": 42.56, + "confidence": 0.989 + }, + { + "text": "who", + "start": 42.56, + "end": 42.58, + "confidence": 0.982 + }, + { + "text": "is", + "start": 42.58, + "end": 42.62, + "confidence": 0.998 + }, + { + "text": "the", + "start": 42.62, + "end": 42.64, + "confidence": 0.985 + }, + { + "text": "one", + "start": 42.64, + "end": 43.62, + "confidence": 0.989 + }, + { + "text": "who", + "start": 43.62, + "end": 43.64, + "confidence": 0.981 + }, + { + "text": "is", + "start": 43.64, + "end": 43.66, + "confidence": 0.998 + }, + { + "text": "the", + "start": 43.66, + "end": 43.68, + "confidence": 0.986 + }, + { + "text": "one", + "start": 43.68, + "end": 43.7, + "confidence": 0.989 + }, + { + "text": "who", + "start": 43.7, + "end": 43.72, + "confidence": 0.981 + }, + { + "text": "is", + "start": 43.72, + "end": 43.74, + "confidence": 0.998 + }, + { + "text": "the", + "start": 43.74, + "end": 43.76, + "confidence": 0.987 + }, + { + "text": "one", + "start": 43.76, + "end": 44.02, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.02, + "end": 44.04, + "confidence": 0.981 + }, + { + "text": "is", + "start": 44.04, + "end": 44.06, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.06, + "end": 44.08, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.08, + "end": 44.1, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.1, + "end": 44.12, + "confidence": 0.981 + }, + { + "text": "is", + "start": 44.12, + "end": 44.14, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.14, + "end": 44.16, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.16, + "end": 44.18, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.18, + "end": 44.2, + "confidence": 0.981 + }, + { + "text": "is", + "start": 44.2, + "end": 44.22, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.22, + "end": 44.24, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.24, + "end": 44.26, + "confidence": 0.989 + }, + { + "text": "who", + "start": 44.26, + "end": 44.28, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.28, + "end": 44.3, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.3, + "end": 44.32, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.32, + "end": 44.34, + "confidence": 0.988 + }, + { + "text": "who", + "start": 44.34, + "end": 44.36, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.36, + "end": 44.38, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.38, + "end": 44.4, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.4, + "end": 44.42, + "confidence": 0.988 + }, + { + "text": "who", + "start": 44.42, + "end": 44.44, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.44, + "end": 44.46, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.46, + "end": 44.48, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.48, + "end": 44.54, + "confidence": 0.988 + }, + { + "text": "who", + "start": 44.54, + "end": 44.56, + "confidence": 0.98 + }, + { + "text": "is", + "start": 44.56, + "end": 44.58, + "confidence": 0.998 + }, + { + "text": "the", + "start": 44.58, + "end": 44.6, + "confidence": 0.988 + }, + { + "text": "one", + "start": 44.6, + "end": 45.16, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.16, + "end": 45.18, + "confidence": 0.98 + }, + { + "text": "is", + "start": 45.18, + "end": 45.2, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.2, + "end": 45.22, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.22, + "end": 45.24, + "confidence": 0.987 + }, + { + "text": "who", + "start": 45.24, + "end": 45.26, + "confidence": 0.979 + }, + { + "text": "is", + "start": 45.26, + "end": 45.28, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.28, + "end": 45.3, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.3, + "end": 45.32, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.32, + "end": 45.34, + "confidence": 0.978 + }, + { + "text": "is", + "start": 45.34, + "end": 45.36, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.36, + "end": 45.38, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.38, + "end": 45.4, + "confidence": 0.986 + }, + { + "text": "who", + "start": 45.4, + "end": 45.42, + "confidence": 0.977 + }, + { + "text": "is", + "start": 45.42, + "end": 45.44, + "confidence": 0.998 + }, + { + "text": "the", + "start": 45.44, + "end": 45.46, + "confidence": 0.988 + }, + { + "text": "one", + "start": 45.46, + "end": 45.94, + "confidence": 0.985 + }, + { + "text": "who", + "start": 45.94, + "end": 45.96, + "confidence": 0.975 + }, + { + "text": "is", + "start": 45.96, + "end": 45.98, + "confidence": 0.997 + }, + { + "text": "the", + "start": 45.98, + "end": 46.0, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.0, + "end": 46.02, + "confidence": 0.985 + }, + { + "text": "who", + "start": 46.02, + "end": 46.04, + "confidence": 0.976 + }, + { + "text": "is", + "start": 46.04, + "end": 46.06, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.06, + "end": 46.08, + "confidence": 0.988 + }, + { + "text": "one", + "start": 46.08, + "end": 46.1, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.1, + "end": 46.12, + "confidence": 0.975 + }, + { + "text": "is", + "start": 46.12, + "end": 46.14, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.14, + "end": 46.16, + "confidence": 0.987 + }, + { + "text": "one", + "start": 46.16, + "end": 46.18, + "confidence": 0.984 + }, + { + "text": "who", + "start": 46.18, + "end": 46.2, + "confidence": 0.971 + }, + { + "text": "is", + "start": 46.2, + "end": 46.22, + "confidence": 0.997 + }, + { + "text": "the", + "start": 46.22, + "end": 46.24, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.24, + "end": 46.26, + "confidence": 0.983 + }, + { + "text": "who", + "start": 46.26, + "end": 46.28, + "confidence": 0.971 + }, + { + "text": "is", + "start": 46.28, + "end": 46.3, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.3, + "end": 46.32, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.32, + "end": 46.84, + "confidence": 0.982 + }, + { + "text": "who", + "start": 46.84, + "end": 46.86, + "confidence": 0.969 + }, + { + "text": "is", + "start": 46.86, + "end": 46.88, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.88, + "end": 46.9, + "confidence": 0.986 + }, + { + "text": "one", + "start": 46.9, + "end": 46.92, + "confidence": 0.982 + }, + { + "text": "who", + "start": 46.92, + "end": 46.94, + "confidence": 0.966 + }, + { + "text": "is", + "start": 46.94, + "end": 46.96, + "confidence": 0.996 + }, + { + "text": "the", + "start": 46.96, + "end": 46.98, + "confidence": 0.985 + }, + { + "text": "one", + "start": 46.98, + "end": 47.1, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.1, + "end": 47.12, + "confidence": 0.966 + }, + { + "text": "is", + "start": 47.12, + "end": 47.14, + "confidence": 0.995 + }, + { + "text": "the", + "start": 47.14, + "end": 47.16, + "confidence": 0.985 + }, + { + "text": "one", + "start": 47.16, + "end": 47.18, + "confidence": 0.982 + }, + { + "text": "who", + "start": 47.18, + "end": 47.2, + "confidence": 0.963 + }, + { + "text": "is", + "start": 47.2, + "end": 47.22, + "confidence": 0.995 + }, + { + "text": "the", + "start": 47.22, + "end": 47.24, + "confidence": 0.984 + }, + { + "text": "one", + "start": 47.24, + "end": 47.26, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.26, + "end": 47.28, + "confidence": 0.959 + }, + { + "text": "is", + "start": 47.28, + "end": 47.3, + "confidence": 0.994 + }, + { + "text": "the", + "start": 47.3, + "end": 47.32, + "confidence": 0.983 + }, + { + "text": "one", + "start": 47.32, + "end": 47.34, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.34, + "end": 47.36, + "confidence": 0.958 + }, + { + "text": "is", + "start": 47.36, + "end": 47.38, + "confidence": 0.994 + }, + { + "text": "the", + "start": 47.38, + "end": 47.4, + "confidence": 0.983 + }, + { + "text": "one", + "start": 47.4, + "end": 47.6, + "confidence": 0.979 + }, + { + "text": "who", + "start": 47.6, + "end": 47.62, + "confidence": 0.955 + }, + { + "text": "is", + "start": 47.62, + "end": 47.64, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.64, + "end": 47.66, + "confidence": 0.982 + }, + { + "text": "one", + "start": 47.66, + "end": 47.68, + "confidence": 0.98 + }, + { + "text": "who", + "start": 47.68, + "end": 47.7, + "confidence": 0.956 + }, + { + "text": "is", + "start": 47.7, + "end": 47.72, + "confidence": 0.993 + }, + { + "text": "the", + "start": 47.72, + "end": 47.74, + "confidence": 0.981 + }, + { + "text": "one", + "start": 47.74, + "end": 47.76, + "confidence": 0.978 + }, + { + "text": "who", + "start": 47.76, + "end": 47.78, + "confidence": 0.949 + }, + { + "text": "is", + "start": 47.78, + "end": 47.8, + "confidence": 0.992 + }, + { + "text": "the", + "start": 47.8, + "end": 47.82, + "confidence": 0.98 + }, + { + "text": "one", + "start": 47.82, + "end": 49.1, + "confidence": 0.978 + }, + { + "text": "who", + "start": 49.1, + "end": 49.12, + "confidence": 0.948 + }, + { + "text": "is", + "start": 49.12, + "end": 49.14, + "confidence": 0.991 + }, + { + "text": "the", + "start": 49.14, + "end": 49.16, + "confidence": 0.98 + }, + { + "text": "one", + "start": 49.16, + "end": 49.26, + "confidence": 0.978 + }, + { + "text": "who", + "start": 49.26, + "end": 49.28, + "confidence": 0.949 + }, + { + "text": "is", + "start": 49.28, + "end": 49.3, + "confidence": 0.991 + }, + { + "text": "the", + "start": 49.3, + "end": 49.32, + "confidence": 0.979 + }, + { + "text": "one", + "start": 49.32, + "end": 49.34, + "confidence": 0.977 + }, + { + "text": "who", + "start": 49.34, + "end": 49.36, + "confidence": 0.943 + }, + { + "text": "is", + "start": 49.36, + "end": 49.38, + "confidence": 0.99 + }, + { + "text": "the", + "start": 49.38, + "end": 49.4, + "confidence": 0.977 + }, + { + "text": "one", + "start": 49.4, + "end": 49.42, + "confidence": 0.976 + }, + { + "text": "who", + "start": 49.42, + "end": 49.44, + "confidence": 0.94 + }, + { + "text": "is", + "start": 49.44, + "end": 49.46, + "confidence": 0.989 + }, + { + "text": "the", + "start": 49.46, + "end": 49.48, + "confidence": 0.977 + }, + { + "text": "one", + "start": 49.48, + "end": 49.5, + "confidence": 0.975 + }, + { + "text": "who", + "start": 49.5, + "end": 49.52, + "confidence": 0.94 + }, + { + "text": "is", + "start": 49.52, + "end": 49.54, + "confidence": 0.989 + }, + { + "text": "the", + "start": 49.54, + "end": 49.56, + "confidence": 0.975 + }, + { + "text": "one", + "start": 49.56, + "end": 49.58, + "confidence": 0.975 + }, + { + "text": "who", + "start": 49.58, + "end": 49.6, + "confidence": 0.935 + }, + { + "text": "is", + "start": 49.6, + "end": 49.62, + "confidence": 0.987 + }, + { + "text": "the", + "start": 49.62, + "end": 49.64, + "confidence": 0.974 + }, + { + "text": "one", + "start": 49.64, + "end": 49.66, + "confidence": 0.975 + }, + { + "text": "who", + "start": 49.66, + "end": 49.68, + "confidence": 0.936 + }, + { + "text": "is", + "start": 49.68, + "end": 49.7, + "confidence": 0.988 + }, + { + "text": "the", + "start": 49.7, + "end": 49.72, + "confidence": 0.974 + }, + { + "text": "one", + "start": 49.72, + "end": 49.74, + "confidence": 0.971 + }, + { + "text": "who", + "start": 49.74, + "end": 49.76, + "confidence": 0.931 + }, + { + "text": "is", + "start": 49.76, + "end": 49.78, + "confidence": 0.986 + }, + { + "text": "the", + "start": 49.78, + "end": 49.8, + "confidence": 0.971 + }, + { + "text": "one", + "start": 49.8, + "end": 49.82, + "confidence": 0.971 + }, + { + "text": "who", + "start": 49.82, + "end": 49.84, + "confidence": 0.928 + }, + { + "text": "is", + "start": 49.84, + "end": 49.86, + "confidence": 0.986 + }, + { + "text": "the", + "start": 49.86, + "end": 49.88, + "confidence": 0.971 + }, + { + "text": "one", + "start": 49.88, + "end": 49.9, + "confidence": 0.971 + }, + { + "text": "who", + "start": 49.9, + "end": 49.92, + "confidence": 0.923 + }, + { + "text": "is", + "start": 49.92, + "end": 49.94, + "confidence": 0.985 + }, + { + "text": "the", + "start": 49.94, + "end": 49.96, + "confidence": 0.97 + }, + { + "text": "one", + "start": 49.96, + "end": 66.64, + "confidence": 0.969 + }, + { + "text": "who", + "start": 66.64, + "end": 66.66, + "confidence": 0.925 + }, + { + "text": "is", + "start": 66.66, + "end": 66.98, + "confidence": 0.985 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/split_subtitles/punctuations.mp3_20.srt b/tests/expected/split_subtitles/punctuations.mp3_20.srt new file mode 100644 index 0000000000000000000000000000000000000000..607f0125cf1df1d150f8432a5ce5ad72669143df --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_20.srt @@ -0,0 +1,12 @@ +1 +00:00:00,400 --> 00:00:01,360 +Dis-moi, + +2 +00:00:01,360 --> 00:00:02,020 +est-ce que l'avion + +3 +00:00:02,020 --> 00:00:02,340 +vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_20.vtt b/tests/expected/split_subtitles/punctuations.mp3_20.vtt new file mode 100644 index 0000000000000000000000000000000000000000..d06e85098725e851017bd86f94fa529b5c66d030 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_20.vtt @@ -0,0 +1,11 @@ +WEBVTT + +00:00.400 --> 00:01.360 +Dis-moi, + +00:01.360 --> 00:02.020 +est-ce que l'avion + +00:02.020 --> 00:02.340 +vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_50.srt b/tests/expected/split_subtitles/punctuations.mp3_50.srt new file mode 100644 index 0000000000000000000000000000000000000000..ffc9a2b42b2c510691ec27b303df2c88825c0aa4 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_50.srt @@ -0,0 +1,4 @@ +1 +00:00:00,400 --> 00:00:02,340 +Dis-moi, est-ce que l'avion vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_50.vtt b/tests/expected/split_subtitles/punctuations.mp3_50.vtt new file mode 100644 index 0000000000000000000000000000000000000000..83c7bd9a0253ca6a8d117803fac3a2bdc3b9e525 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_50.vtt @@ -0,0 +1,5 @@ +WEBVTT + +00:00.400 --> 00:02.340 +Dis-moi, est-ce que l'avion vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_6.srt b/tests/expected/split_subtitles/punctuations.mp3_6.srt new file mode 100644 index 0000000000000000000000000000000000000000..0caea3d7e82a647c78c154e2d1879292e5442c9d --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_6.srt @@ -0,0 +1,20 @@ +1 +00:00:00,400 --> 00:00:01,360 +Dis-moi, + +2 +00:00:01,360 --> 00:00:01,520 +est-ce + +3 +00:00:01,520 --> 00:00:01,640 +que + +4 +00:00:01,640 --> 00:00:02,020 +l'avion + +5 +00:00:02,020 --> 00:00:02,340 +vole? + diff --git a/tests/expected/split_subtitles/punctuations.mp3_6.vtt b/tests/expected/split_subtitles/punctuations.mp3_6.vtt new file mode 100644 index 0000000000000000000000000000000000000000..f255e42075a222812432fc82fe6622c86de71015 --- /dev/null +++ b/tests/expected/split_subtitles/punctuations.mp3_6.vtt @@ -0,0 +1,17 @@ +WEBVTT + +00:00.400 --> 00:01.360 +Dis-moi, + +00:01.360 --> 00:01.520 +est-ce + +00:01.520 --> 00:01.640 +que + +00:01.640 --> 00:02.020 +l'avion + +00:02.020 --> 00:02.340 +vole? + diff --git a/tests/expected/split_subtitles/smartphone.mp3_20.srt b/tests/expected/split_subtitles/smartphone.mp3_20.srt new file mode 100644 index 0000000000000000000000000000000000000000..22c039440f6cb0beda8f9ccf043d766a717ac78a --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_20.srt @@ -0,0 +1,812 @@ +1 +00:00:00,380 --> 00:00:01,080 +C'est évident ce que + +2 +00:00:01,080 --> 00:00:01,780 +dit Nicolas, + +3 +00:00:01,780 --> 00:00:02,380 +mais je ne me + +4 +00:00:02,380 --> 00:00:02,840 +l'étais jamais + +5 +00:00:02,840 --> 00:00:03,620 +formulé comme ça. + +6 +00:00:04,080 --> 00:00:05,000 +Ce qui fait la force + +7 +00:00:05,000 --> 00:00:05,880 +du smartphone, + +8 +00:00:05,880 --> 00:00:06,520 +c'est pas seulement + +9 +00:00:06,520 --> 00:00:07,560 +l'accumulation des + +10 +00:00:07,560 --> 00:00:07,920 +fonctions, + +11 +00:00:08,320 --> 00:00:09,100 +mais la manière dont + +12 +00:00:09,100 --> 00:00:10,320 +elles interagissent + +13 +00:00:10,320 --> 00:00:10,880 +entre elles. + +14 +00:00:10,960 --> 00:00:11,400 +Ce qui dit + +15 +00:00:11,400 --> 00:00:11,780 +d'ailleurs sur la + +16 +00:00:11,780 --> 00:00:12,120 +photo, + +17 +00:00:12,120 --> 00:00:13,000 +c'est hyper convaincant. + +18 +00:00:13,340 --> 00:00:14,340 +Alors évidemment, + +19 +00:00:14,340 --> 00:00:15,160 +il faudrait ajouter + +20 +00:00:15,160 --> 00:00:16,020 +les interfaces. + +21 +00:00:16,220 --> 00:00:17,260 +L'écran tactile a + +22 +00:00:17,260 --> 00:00:18,620 +été beaucoup très + +23 +00:00:18,620 --> 00:00:19,360 +souvent mentionné. + +24 +00:00:19,840 --> 00:00:20,520 +Mais bon, + +25 +00:00:20,520 --> 00:00:20,960 +il faut dire qu'il + +26 +00:00:20,960 --> 00:00:22,100 +profite aussi de 20 + +27 +00:00:22,100 --> 00:00:22,920 +ans pendant lesquels + +28 +00:00:22,920 --> 00:00:23,720 +les ordinateurs nous + +29 +00:00:23,720 --> 00:00:24,500 +ont appris à cliquer + +30 +00:00:24,500 --> 00:00:25,260 +sur des icônes. + +31 +00:00:25,420 --> 00:00:26,660 +Sauf que le + +32 +00:00:26,660 --> 00:00:27,620 +smartphone ajoute le + +33 +00:00:27,620 --> 00:00:28,180 +toucher, + +34 +00:00:28,180 --> 00:00:28,680 +ce qui rend le + +35 +00:00:28,680 --> 00:00:30,220 +contact plus direct, + +36 +00:00:30,220 --> 00:00:30,640 +plus sensible. + +37 +00:00:31,040 --> 00:00:31,700 +Et puis évidemment, + +38 +00:00:31,700 --> 00:00:32,120 +il faudrait parler + +39 +00:00:32,120 --> 00:00:32,480 +aussi des + +40 +00:00:32,480 --> 00:00:33,180 +applications qui + +41 +00:00:33,180 --> 00:00:33,960 +permettent de + +42 +00:00:33,960 --> 00:00:34,800 +contourner le côté + +43 +00:00:34,800 --> 00:00:35,780 +touffu de la + +44 +00:00:35,780 --> 00:00:36,780 +navigation web pour + +45 +00:00:36,780 --> 00:00:37,680 +aller directement au + +46 +00:00:37,680 --> 00:00:37,820 +but. + +47 +00:00:37,820 --> 00:00:39,420 +Bref, tout ça, + +48 +00:00:39,420 --> 00:00:40,160 +ce sont les + +49 +00:00:40,160 --> 00:00:40,960 +conditions qui + +50 +00:00:40,960 --> 00:00:42,060 +permettent de créer + +51 +00:00:42,060 --> 00:00:42,800 +cet objet dont + +52 +00:00:42,800 --> 00:00:43,700 +Nicolas dit qu'il + +53 +00:00:43,700 --> 00:00:43,880 +est + +54 +00:00:43,880 --> 00:00:44,980 +vraisemblablement + +55 +00:00:44,980 --> 00:00:45,700 +inédit dans + +56 +00:00:45,700 --> 00:00:46,180 +l'histoire de + +57 +00:00:46,180 --> 00:00:46,580 +l'humanité. + +58 +00:00:46,600 --> 00:00:47,720 +Mais ça, + +59 +00:00:47,720 --> 00:00:48,260 +ça soulève une autre + +60 +00:00:48,260 --> 00:00:48,820 +interrogation. + +61 +00:00:49,220 --> 00:00:49,980 +Est-ce que le fait + +62 +00:00:49,980 --> 00:00:51,120 +que cet objet soit + +63 +00:00:51,120 --> 00:00:52,420 +inédit induit que + +64 +00:00:52,420 --> 00:00:53,660 +notre rapport à lui + +65 +00:00:53,660 --> 00:00:55,000 +est aussi un rapport + +66 +00:00:55,000 --> 00:00:55,460 +inédit? + +67 +00:00:55,460 --> 00:00:56,240 +Je veux dire, + +68 +00:00:56,240 --> 00:00:56,580 +est-ce que le + +69 +00:00:56,580 --> 00:00:57,280 +rapport qu'on a au + +70 +00:00:57,280 --> 00:00:57,920 +smartphone est + +71 +00:00:57,920 --> 00:00:58,660 +comparable à celui + +72 +00:00:58,660 --> 00:00:59,460 +qu'on entretenait à + +73 +00:00:59,460 --> 00:00:59,960 +d'autres objets + +74 +00:00:59,960 --> 00:01:01,500 +techniques comme la + +75 +00:01:01,500 --> 00:01:02,680 +voiture ou le + +76 +00:01:02,680 --> 00:01:03,120 +téléphone? + +77 +00:01:03,360 --> 00:01:05,660 +Il n'y a pas + +78 +00:01:05,660 --> 00:01:06,420 +d'équivalent en + +79 +00:01:06,420 --> 00:01:06,660 +fait. + +80 +00:01:06,880 --> 00:01:07,540 +Et donc cette espèce + +81 +00:01:07,540 --> 00:01:08,940 +de nouveauté dans la + +82 +00:01:08,940 --> 00:01:10,240 +relation à l'objet, + +83 +00:01:10,240 --> 00:01:10,760 +c'est fascinant et + +84 +00:01:10,760 --> 00:01:11,520 +terrifiant. + +85 +00:01:11,620 --> 00:01:12,440 +Parce qu'on a + +86 +00:01:12,440 --> 00:01:13,560 +l'impression, + +87 +00:01:13,560 --> 00:01:14,400 +comme le disent les + +88 +00:01:14,400 --> 00:01:15,000 +utilisateurs et les + +89 +00:01:15,000 --> 00:01:15,220 +services, + +90 +00:01:15,220 --> 00:01:16,080 +d'être dépendants de + +91 +00:01:16,080 --> 00:01:16,480 +cet objet, + +92 +00:01:16,860 --> 00:01:17,340 +d'induire en fait + +93 +00:01:17,340 --> 00:01:18,480 +une espèce de + +94 +00:01:18,480 --> 00:01:18,600 +relation, + +95 +00:01:18,600 --> 00:01:19,920 +de médiation avec le + +96 +00:01:19,920 --> 00:01:21,780 +monde qui rend de + +97 +00:01:21,780 --> 00:01:22,240 +l'ampleur et qui + +98 +00:01:22,240 --> 00:01:22,720 +amène aussi à des + +99 +00:01:22,720 --> 00:01:23,260 +formes de rejet. + +100 +00:01:23,940 --> 00:01:24,940 +Donc, + +101 +00:01:24,940 --> 00:01:26,560 +à objet inédit, + +102 +00:01:26,560 --> 00:01:27,800 +rapport inédit. + +103 +00:01:28,020 --> 00:01:29,280 +Et ce rapport, + +104 +00:01:29,280 --> 00:01:29,880 +si j'en crois + +105 +00:01:29,880 --> 00:01:30,540 +Nicolas, + +106 +00:01:30,540 --> 00:01:31,800 +serait caractérisé + +107 +00:01:31,800 --> 00:01:33,400 +par un mélange de + +108 +00:01:33,400 --> 00:01:34,680 +dépendance et de + +109 +00:01:34,680 --> 00:01:35,140 +rejet. + +110 +00:01:35,780 --> 00:01:37,140 +Bon, en vrai, + +111 +00:01:37,140 --> 00:01:38,080 +il faudrait remonter + +112 +00:01:38,080 --> 00:01:39,320 +très très finement + +113 +00:01:39,320 --> 00:01:40,240 +toute l'histoire des + +114 +00:01:40,240 --> 00:01:41,480 +objets techniques et + +115 +00:01:41,480 --> 00:01:42,320 +de leur insertion + +116 +00:01:42,320 --> 00:01:42,860 +dans nos vies + +117 +00:01:42,900 --> 00:01:43,760 +pour déterminer si + +118 +00:01:43,760 --> 00:01:44,740 +ce rapport est + +119 +00:01:44,740 --> 00:01:45,740 +totalement inédit. + +120 +00:01:46,100 --> 00:01:46,920 +Mais j'ai + +121 +00:01:46,920 --> 00:01:47,560 +l'impression comme + +122 +00:01:47,560 --> 00:01:48,700 +ça que Nicolas ne se + +123 +00:01:48,700 --> 00:01:49,340 +trompe pas vraiment. + +124 +00:01:49,880 --> 00:01:50,520 +Pour autant que je + +125 +00:01:50,520 --> 00:01:51,140 +sache, + +126 +00:01:51,140 --> 00:01:52,060 +il y a eu plein de + +127 +00:01:52,060 --> 00:01:52,940 +discussions autour + +128 +00:01:52,940 --> 00:01:54,060 +de la voiture ou + +129 +00:01:54,060 --> 00:01:54,980 +même du téléphone. + +130 +00:01:55,340 --> 00:01:56,400 +Mais la dépendance + +131 +00:01:56,400 --> 00:01:57,460 +n'était pas du même + +132 +00:01:57,460 --> 00:01:57,780 +ordre. + +133 +00:01:57,780 --> 00:01:58,780 +Donc le rejet non + +134 +00:01:58,780 --> 00:01:59,380 +plus n'était pas du + +135 +00:01:59,380 --> 00:01:59,840 +même ordre. + +136 +00:01:59,980 --> 00:02:00,880 +On peut adorer sa + +137 +00:02:00,880 --> 00:02:01,460 +bagnole, + +138 +00:02:01,460 --> 00:02:02,340 +en avoir besoin pour + +139 +00:02:02,340 --> 00:02:03,020 +plein de choses. + +140 +00:02:03,280 --> 00:02:04,680 +Et bien, le soir, + +141 +00:02:04,680 --> 00:02:05,380 +quand on va se + +142 +00:02:05,380 --> 00:02:05,800 +coucher, + +143 +00:02:05,800 --> 00:02:06,360 +on la laisse. + +144 +00:02:06,980 --> 00:02:07,800 +On ne l'a pas dans + +145 +00:02:07,800 --> 00:02:08,680 +la main quand on est + +146 +00:02:08,680 --> 00:02:09,140 +au lit, + +147 +00:02:09,140 --> 00:02:09,680 +on ne l'emmène pas + +148 +00:02:09,680 --> 00:02:10,480 +au chiottes. + +149 +00:02:10,860 --> 00:02:11,480 +On pouvait être + +150 +00:02:11,480 --> 00:02:13,100 +énervé par son môme + +151 +00:02:13,100 --> 00:02:13,800 +qui occupait la + +152 +00:02:13,800 --> 00:02:14,600 +ligne de téléphone + +153 +00:02:14,600 --> 00:02:15,360 +pendant une heure + +154 +00:02:15,360 --> 00:02:15,960 +chaque soir pour + +155 +00:02:15,960 --> 00:02:16,600 +discuter avec un + +156 +00:02:16,600 --> 00:02:16,900 +copain. + +157 +00:02:17,280 --> 00:02:17,940 +Mais ça ne + +158 +00:02:17,940 --> 00:02:18,980 +ressemblait pas à ce + +159 +00:02:18,980 --> 00:02:20,120 +qu'on peut ressentir + +160 +00:02:20,120 --> 00:02:21,340 +à voir ce même môme + +161 +00:02:21,340 --> 00:02:21,880 +aujourd'hui, + +162 +00:02:22,140 --> 00:02:23,180 +continuellement avec + +163 +00:02:23,180 --> 00:02:23,940 +son smartphone dans + +164 +00:02:23,940 --> 00:02:24,360 +la main, + +165 +00:02:24,360 --> 00:02:25,060 +comme si c'était une + +166 +00:02:25,060 --> 00:02:25,820 +sorte de pacemaker + +167 +00:02:25,820 --> 00:02:26,300 +externe, + +168 +00:02:26,340 --> 00:02:27,360 +comme si le lâcher + +169 +00:02:27,360 --> 00:02:28,000 +allait entraîner sa + +170 +00:02:28,000 --> 00:02:28,840 +mort immédiate. + +171 +00:02:29,040 --> 00:02:29,280 +Bon, + +172 +00:02:29,280 --> 00:02:29,880 +je dis ça pour le + +173 +00:02:29,880 --> 00:02:30,320 +môme, + +174 +00:02:30,320 --> 00:02:31,140 +mais c'est évidemment + +175 +00:02:31,140 --> 00:02:31,760 +valable pour nous + +176 +00:02:31,760 --> 00:02:31,960 +aussi. + +177 +00:02:32,340 --> 00:02:33,460 +Donc, + +178 +00:02:33,460 --> 00:02:34,240 +rapport inédit. + +179 +00:02:34,240 --> 00:02:35,480 +D'accord. + +180 +00:02:35,480 --> 00:02:36,680 +Mais pourquoi a-t-on + +181 +00:02:36,680 --> 00:02:37,260 +l'impression qu'on + +182 +00:02:37,260 --> 00:02:38,220 +n'en sortira jamais? + +183 +00:02:38,860 --> 00:02:39,780 +Est-ce qu'il faut en + +184 +00:02:39,780 --> 00:02:40,660 +remettre la faute + +185 +00:02:40,660 --> 00:02:41,720 +sur les gens qui ont + +186 +00:02:41,720 --> 00:02:42,780 +créé cet outil + +187 +00:02:42,780 --> 00:02:43,500 +merveilleux et + +188 +00:02:43,500 --> 00:02:43,860 +diabolique, + +189 +00:02:43,860 --> 00:02:44,660 +et diabolique parce + +190 +00:02:44,660 --> 00:02:45,320 +que merveilleux? + +191 +00:02:46,340 --> 00:02:47,480 +Les économistes + +192 +00:02:47,480 --> 00:02:47,820 +parlent de + +193 +00:02:47,820 --> 00:02:48,500 +dépendance du + +194 +00:02:48,500 --> 00:02:48,820 +sentier. + +195 +00:02:48,860 --> 00:02:49,700 +C'est l'idée qu'on + +196 +00:02:49,700 --> 00:02:50,780 +est sur un sentier + +197 +00:02:50,780 --> 00:02:51,900 +qui a été établi, + +198 +00:02:51,900 --> 00:02:52,720 +soit volontairement + +199 +00:02:52,720 --> 00:02:54,240 +en marchant dessus, + +200 +00:02:54,240 --> 00:02:55,500 +soit en définissant + +201 +00:02:55,500 --> 00:02:56,040 +des bornes, + +202 +00:02:56,040 --> 00:02:56,820 +en définissant une + +203 +00:02:56,820 --> 00:02:57,420 +signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_20.vtt b/tests/expected/split_subtitles/smartphone.mp3_20.vtt new file mode 100644 index 0000000000000000000000000000000000000000..b880a5ca949532022a422bc316c7efd245866555 --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_20.vtt @@ -0,0 +1,611 @@ +WEBVTT + +00:00.380 --> 00:01.080 +C'est évident ce que + +00:01.080 --> 00:01.780 +dit Nicolas, + +00:01.780 --> 00:02.380 +mais je ne me + +00:02.380 --> 00:02.840 +l'étais jamais + +00:02.840 --> 00:03.620 +formulé comme ça. + +00:04.080 --> 00:05.000 +Ce qui fait la force + +00:05.000 --> 00:05.880 +du smartphone, + +00:05.880 --> 00:06.520 +c'est pas seulement + +00:06.520 --> 00:07.560 +l'accumulation des + +00:07.560 --> 00:07.920 +fonctions, + +00:08.320 --> 00:09.100 +mais la manière dont + +00:09.100 --> 00:10.320 +elles interagissent + +00:10.320 --> 00:10.880 +entre elles. + +00:10.960 --> 00:11.400 +Ce qui dit + +00:11.400 --> 00:11.780 +d'ailleurs sur la + +00:11.780 --> 00:12.120 +photo, + +00:12.120 --> 00:13.000 +c'est hyper convaincant. + +00:13.340 --> 00:14.340 +Alors évidemment, + +00:14.340 --> 00:15.160 +il faudrait ajouter + +00:15.160 --> 00:16.020 +les interfaces. + +00:16.220 --> 00:17.260 +L'écran tactile a + +00:17.260 --> 00:18.620 +été beaucoup très + +00:18.620 --> 00:19.360 +souvent mentionné. + +00:19.840 --> 00:20.520 +Mais bon, + +00:20.520 --> 00:20.960 +il faut dire qu'il + +00:20.960 --> 00:22.100 +profite aussi de 20 + +00:22.100 --> 00:22.920 +ans pendant lesquels + +00:22.920 --> 00:23.720 +les ordinateurs nous + +00:23.720 --> 00:24.500 +ont appris à cliquer + +00:24.500 --> 00:25.260 +sur des icônes. + +00:25.420 --> 00:26.660 +Sauf que le + +00:26.660 --> 00:27.620 +smartphone ajoute le + +00:27.620 --> 00:28.180 +toucher, + +00:28.180 --> 00:28.680 +ce qui rend le + +00:28.680 --> 00:30.220 +contact plus direct, + +00:30.220 --> 00:30.640 +plus sensible. + +00:31.040 --> 00:31.700 +Et puis évidemment, + +00:31.700 --> 00:32.120 +il faudrait parler + +00:32.120 --> 00:32.480 +aussi des + +00:32.480 --> 00:33.180 +applications qui + +00:33.180 --> 00:33.960 +permettent de + +00:33.960 --> 00:34.800 +contourner le côté + +00:34.800 --> 00:35.780 +touffu de la + +00:35.780 --> 00:36.780 +navigation web pour + +00:36.780 --> 00:37.680 +aller directement au + +00:37.680 --> 00:37.820 +but. + +00:37.820 --> 00:39.420 +Bref, tout ça, + +00:39.420 --> 00:40.160 +ce sont les + +00:40.160 --> 00:40.960 +conditions qui + +00:40.960 --> 00:42.060 +permettent de créer + +00:42.060 --> 00:42.800 +cet objet dont + +00:42.800 --> 00:43.700 +Nicolas dit qu'il + +00:43.700 --> 00:43.880 +est + +00:43.880 --> 00:44.980 +vraisemblablement + +00:44.980 --> 00:45.700 +inédit dans + +00:45.700 --> 00:46.180 +l'histoire de + +00:46.180 --> 00:46.580 +l'humanité. + +00:46.600 --> 00:47.720 +Mais ça, + +00:47.720 --> 00:48.260 +ça soulève une autre + +00:48.260 --> 00:48.820 +interrogation. + +00:49.220 --> 00:49.980 +Est-ce que le fait + +00:49.980 --> 00:51.120 +que cet objet soit + +00:51.120 --> 00:52.420 +inédit induit que + +00:52.420 --> 00:53.660 +notre rapport à lui + +00:53.660 --> 00:55.000 +est aussi un rapport + +00:55.000 --> 00:55.460 +inédit? + +00:55.460 --> 00:56.240 +Je veux dire, + +00:56.240 --> 00:56.580 +est-ce que le + +00:56.580 --> 00:57.280 +rapport qu'on a au + +00:57.280 --> 00:57.920 +smartphone est + +00:57.920 --> 00:58.660 +comparable à celui + +00:58.660 --> 00:59.460 +qu'on entretenait à + +00:59.460 --> 00:59.960 +d'autres objets + +00:59.960 --> 01:01.500 +techniques comme la + +01:01.500 --> 01:02.680 +voiture ou le + +01:02.680 --> 01:03.120 +téléphone? + +01:03.360 --> 01:05.660 +Il n'y a pas + +01:05.660 --> 01:06.420 +d'équivalent en + +01:06.420 --> 01:06.660 +fait. + +01:06.880 --> 01:07.540 +Et donc cette espèce + +01:07.540 --> 01:08.940 +de nouveauté dans la + +01:08.940 --> 01:10.240 +relation à l'objet, + +01:10.240 --> 01:10.760 +c'est fascinant et + +01:10.760 --> 01:11.520 +terrifiant. + +01:11.620 --> 01:12.440 +Parce qu'on a + +01:12.440 --> 01:13.560 +l'impression, + +01:13.560 --> 01:14.400 +comme le disent les + +01:14.400 --> 01:15.000 +utilisateurs et les + +01:15.000 --> 01:15.220 +services, + +01:15.220 --> 01:16.080 +d'être dépendants de + +01:16.080 --> 01:16.480 +cet objet, + +01:16.860 --> 01:17.340 +d'induire en fait + +01:17.340 --> 01:18.480 +une espèce de + +01:18.480 --> 01:18.600 +relation, + +01:18.600 --> 01:19.920 +de médiation avec le + +01:19.920 --> 01:21.780 +monde qui rend de + +01:21.780 --> 01:22.240 +l'ampleur et qui + +01:22.240 --> 01:22.720 +amène aussi à des + +01:22.720 --> 01:23.260 +formes de rejet. + +01:23.940 --> 01:24.940 +Donc, + +01:24.940 --> 01:26.560 +à objet inédit, + +01:26.560 --> 01:27.800 +rapport inédit. + +01:28.020 --> 01:29.280 +Et ce rapport, + +01:29.280 --> 01:29.880 +si j'en crois + +01:29.880 --> 01:30.540 +Nicolas, + +01:30.540 --> 01:31.800 +serait caractérisé + +01:31.800 --> 01:33.400 +par un mélange de + +01:33.400 --> 01:34.680 +dépendance et de + +01:34.680 --> 01:35.140 +rejet. + +01:35.780 --> 01:37.140 +Bon, en vrai, + +01:37.140 --> 01:38.080 +il faudrait remonter + +01:38.080 --> 01:39.320 +très très finement + +01:39.320 --> 01:40.240 +toute l'histoire des + +01:40.240 --> 01:41.480 +objets techniques et + +01:41.480 --> 01:42.320 +de leur insertion + +01:42.320 --> 01:42.860 +dans nos vies + +01:42.900 --> 01:43.760 +pour déterminer si + +01:43.760 --> 01:44.740 +ce rapport est + +01:44.740 --> 01:45.740 +totalement inédit. + +01:46.100 --> 01:46.920 +Mais j'ai + +01:46.920 --> 01:47.560 +l'impression comme + +01:47.560 --> 01:48.700 +ça que Nicolas ne se + +01:48.700 --> 01:49.340 +trompe pas vraiment. + +01:49.880 --> 01:50.520 +Pour autant que je + +01:50.520 --> 01:51.140 +sache, + +01:51.140 --> 01:52.060 +il y a eu plein de + +01:52.060 --> 01:52.940 +discussions autour + +01:52.940 --> 01:54.060 +de la voiture ou + +01:54.060 --> 01:54.980 +même du téléphone. + +01:55.340 --> 01:56.400 +Mais la dépendance + +01:56.400 --> 01:57.460 +n'était pas du même + +01:57.460 --> 01:57.780 +ordre. + +01:57.780 --> 01:58.780 +Donc le rejet non + +01:58.780 --> 01:59.380 +plus n'était pas du + +01:59.380 --> 01:59.840 +même ordre. + +01:59.980 --> 02:00.880 +On peut adorer sa + +02:00.880 --> 02:01.460 +bagnole, + +02:01.460 --> 02:02.340 +en avoir besoin pour + +02:02.340 --> 02:03.020 +plein de choses. + +02:03.280 --> 02:04.680 +Et bien, le soir, + +02:04.680 --> 02:05.380 +quand on va se + +02:05.380 --> 02:05.800 +coucher, + +02:05.800 --> 02:06.360 +on la laisse. + +02:06.980 --> 02:07.800 +On ne l'a pas dans + +02:07.800 --> 02:08.680 +la main quand on est + +02:08.680 --> 02:09.140 +au lit, + +02:09.140 --> 02:09.680 +on ne l'emmène pas + +02:09.680 --> 02:10.480 +au chiottes. + +02:10.860 --> 02:11.480 +On pouvait être + +02:11.480 --> 02:13.100 +énervé par son môme + +02:13.100 --> 02:13.800 +qui occupait la + +02:13.800 --> 02:14.600 +ligne de téléphone + +02:14.600 --> 02:15.360 +pendant une heure + +02:15.360 --> 02:15.960 +chaque soir pour + +02:15.960 --> 02:16.600 +discuter avec un + +02:16.600 --> 02:16.900 +copain. + +02:17.280 --> 02:17.940 +Mais ça ne + +02:17.940 --> 02:18.980 +ressemblait pas à ce + +02:18.980 --> 02:20.120 +qu'on peut ressentir + +02:20.120 --> 02:21.340 +à voir ce même môme + +02:21.340 --> 02:21.880 +aujourd'hui, + +02:22.140 --> 02:23.180 +continuellement avec + +02:23.180 --> 02:23.940 +son smartphone dans + +02:23.940 --> 02:24.360 +la main, + +02:24.360 --> 02:25.060 +comme si c'était une + +02:25.060 --> 02:25.820 +sorte de pacemaker + +02:25.820 --> 02:26.300 +externe, + +02:26.340 --> 02:27.360 +comme si le lâcher + +02:27.360 --> 02:28.000 +allait entraîner sa + +02:28.000 --> 02:28.840 +mort immédiate. + +02:29.040 --> 02:29.280 +Bon, + +02:29.280 --> 02:29.880 +je dis ça pour le + +02:29.880 --> 02:30.320 +môme, + +02:30.320 --> 02:31.140 +mais c'est évidemment + +02:31.140 --> 02:31.760 +valable pour nous + +02:31.760 --> 02:31.960 +aussi. + +02:32.340 --> 02:33.460 +Donc, + +02:33.460 --> 02:34.240 +rapport inédit. + +02:34.240 --> 02:35.480 +D'accord. + +02:35.480 --> 02:36.680 +Mais pourquoi a-t-on + +02:36.680 --> 02:37.260 +l'impression qu'on + +02:37.260 --> 02:38.220 +n'en sortira jamais? + +02:38.860 --> 02:39.780 +Est-ce qu'il faut en + +02:39.780 --> 02:40.660 +remettre la faute + +02:40.660 --> 02:41.720 +sur les gens qui ont + +02:41.720 --> 02:42.780 +créé cet outil + +02:42.780 --> 02:43.500 +merveilleux et + +02:43.500 --> 02:43.860 +diabolique, + +02:43.860 --> 02:44.660 +et diabolique parce + +02:44.660 --> 02:45.320 +que merveilleux? + +02:46.340 --> 02:47.480 +Les économistes + +02:47.480 --> 02:47.820 +parlent de + +02:47.820 --> 02:48.500 +dépendance du + +02:48.500 --> 02:48.820 +sentier. + +02:48.860 --> 02:49.700 +C'est l'idée qu'on + +02:49.700 --> 02:50.780 +est sur un sentier + +02:50.780 --> 02:51.900 +qui a été établi, + +02:51.900 --> 02:52.720 +soit volontairement + +02:52.720 --> 02:54.240 +en marchant dessus, + +02:54.240 --> 02:55.500 +soit en définissant + +02:55.500 --> 02:56.040 +des bornes, + +02:56.040 --> 02:56.820 +en définissant une + +02:56.820 --> 02:57.420 +signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_50.srt b/tests/expected/split_subtitles/smartphone.mp3_50.srt new file mode 100644 index 0000000000000000000000000000000000000000..5a9cbd960d9a7073f91c8d0412e7daf0f797d25c --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_50.srt @@ -0,0 +1,356 @@ +1 +00:00:00,380 --> 00:00:01,780 +C'est évident ce que dit Nicolas, + +2 +00:00:01,780 --> 00:00:03,620 +mais je ne me l'étais jamais formulé comme ça. + +3 +00:00:04,080 --> 00:00:05,880 +Ce qui fait la force du smartphone, + +4 +00:00:05,880 --> 00:00:07,920 +c'est pas seulement l'accumulation des fonctions, + +5 +00:00:08,320 --> 00:00:10,580 +mais la manière dont elles interagissent entre + +6 +00:00:10,580 --> 00:00:10,880 +elles. + +7 +00:00:10,960 --> 00:00:12,120 +Ce qui dit d'ailleurs sur la photo, + +8 +00:00:12,120 --> 00:00:13,000 +c'est hyper convaincant. + +9 +00:00:13,340 --> 00:00:14,340 +Alors évidemment, + +10 +00:00:14,340 --> 00:00:16,020 +il faudrait ajouter les interfaces. + +11 +00:00:16,220 --> 00:00:18,900 +L'écran tactile a été beaucoup très souvent + +12 +00:00:18,900 --> 00:00:19,360 +mentionné. + +13 +00:00:19,840 --> 00:00:20,520 +Mais bon, + +14 +00:00:20,520 --> 00:00:22,480 +il faut dire qu'il profite aussi de 20 ans pendant + +15 +00:00:22,480 --> 00:00:24,500 +lesquels les ordinateurs nous ont appris à cliquer + +16 +00:00:24,500 --> 00:00:25,260 +sur des icônes. + +17 +00:00:25,420 --> 00:00:28,180 +Sauf que le smartphone ajoute le toucher, + +18 +00:00:28,180 --> 00:00:30,640 +ce qui rend le contact plus direct, plus sensible. + +19 +00:00:31,040 --> 00:00:31,700 +Et puis évidemment, + +20 +00:00:31,700 --> 00:00:33,180 +il faudrait parler aussi des applications qui + +21 +00:00:33,180 --> 00:00:35,780 +permettent de contourner le côté touffu de la + +22 +00:00:35,780 --> 00:00:37,820 +navigation web pour aller directement au but. + +23 +00:00:37,820 --> 00:00:39,420 +Bref, tout ça, + +24 +00:00:39,420 --> 00:00:42,380 +ce sont les conditions qui permettent de créer cet + +25 +00:00:42,380 --> 00:00:44,980 +objet dont Nicolas dit qu'il est vraisemblablement + +26 +00:00:44,980 --> 00:00:46,580 +inédit dans l'histoire de l'humanité. + +27 +00:00:46,600 --> 00:00:48,820 +Mais ça, ça soulève une autre interrogation. + +28 +00:00:49,220 --> 00:00:51,800 +Est-ce que le fait que cet objet soit inédit + +29 +00:00:51,800 --> 00:00:54,700 +induit que notre rapport à lui est aussi un + +30 +00:00:54,700 --> 00:00:55,460 +rapport inédit? + +31 +00:00:55,460 --> 00:00:56,240 +Je veux dire, + +32 +00:00:56,240 --> 00:00:57,920 +est-ce que le rapport qu'on a au smartphone est + +33 +00:00:57,920 --> 00:00:59,700 +comparable à celui qu'on entretenait à d'autres + +34 +00:00:59,700 --> 00:01:02,680 +objets techniques comme la voiture ou le + +35 +00:01:02,680 --> 00:01:03,120 +téléphone? + +36 +00:01:03,360 --> 00:01:06,660 +Il n'y a pas d'équivalent en fait. + +37 +00:01:06,880 --> 00:01:09,220 +Et donc cette espèce de nouveauté dans la relation + +38 +00:01:09,220 --> 00:01:11,520 +à l'objet, c'est fascinant et terrifiant. + +39 +00:01:11,620 --> 00:01:13,560 +Parce qu'on a l'impression, + +40 +00:01:13,560 --> 00:01:15,220 +comme le disent les utilisateurs et les services, + +41 +00:01:15,220 --> 00:01:16,480 +d'être dépendants de cet objet, + +42 +00:01:16,860 --> 00:01:18,600 +d'induire en fait une espèce de relation, + +43 +00:01:18,600 --> 00:01:22,020 +de médiation avec le monde qui rend de l'ampleur + +44 +00:01:22,020 --> 00:01:23,260 +et qui amène aussi à des formes de rejet. + +45 +00:01:23,940 --> 00:01:27,800 +Donc, à objet inédit, rapport inédit. + +46 +00:01:28,020 --> 00:01:30,540 +Et ce rapport, si j'en crois Nicolas, + +47 +00:01:30,540 --> 00:01:34,540 +serait caractérisé par un mélange de dépendance et + +48 +00:01:34,540 --> 00:01:35,140 +de rejet. + +49 +00:01:35,780 --> 00:01:37,140 +Bon, en vrai, + +50 +00:01:37,140 --> 00:01:39,700 +il faudrait remonter très très finement toute + +51 +00:01:39,700 --> 00:01:41,840 +l'histoire des objets techniques et de leur + +52 +00:01:41,840 --> 00:01:42,860 +insertion dans nos vies + +53 +00:01:42,900 --> 00:01:45,300 +pour déterminer si ce rapport est totalement + +54 +00:01:45,300 --> 00:01:45,740 +inédit. + +55 +00:01:46,100 --> 00:01:48,700 +Mais j'ai l'impression comme ça que Nicolas ne se + +56 +00:01:48,700 --> 00:01:49,340 +trompe pas vraiment. + +57 +00:01:49,880 --> 00:01:51,140 +Pour autant que je sache, + +58 +00:01:51,140 --> 00:01:53,520 +il y a eu plein de discussions autour de la + +59 +00:01:53,520 --> 00:01:54,980 +voiture ou même du téléphone. + +60 +00:01:55,340 --> 00:01:57,780 +Mais la dépendance n'était pas du même ordre. + +61 +00:01:57,780 --> 00:01:59,840 +Donc le rejet non plus n'était pas du même ordre. + +62 +00:01:59,980 --> 00:02:01,460 +On peut adorer sa bagnole, + +63 +00:02:01,460 --> 00:02:03,020 +en avoir besoin pour plein de choses. + +64 +00:02:03,280 --> 00:02:05,800 +Et bien, le soir, quand on va se coucher, + +65 +00:02:05,800 --> 00:02:06,360 +on la laisse. + +66 +00:02:06,980 --> 00:02:09,140 +On ne l'a pas dans la main quand on est au lit, + +67 +00:02:09,140 --> 00:02:10,480 +on ne l'emmène pas au chiottes. + +68 +00:02:10,860 --> 00:02:13,760 +On pouvait être énervé par son môme qui occupait + +69 +00:02:13,760 --> 00:02:15,540 +la ligne de téléphone pendant une heure chaque + +70 +00:02:15,540 --> 00:02:16,900 +soir pour discuter avec un copain. + +71 +00:02:17,280 --> 00:02:19,480 +Mais ça ne ressemblait pas à ce qu'on peut + +72 +00:02:19,480 --> 00:02:21,880 +ressentir à voir ce même môme aujourd'hui, + +73 +00:02:22,140 --> 00:02:24,360 +continuellement avec son smartphone dans la main, + +74 +00:02:24,360 --> 00:02:26,300 +comme si c'était une sorte de pacemaker externe, + +75 +00:02:26,340 --> 00:02:28,220 +comme si le lâcher allait entraîner sa mort + +76 +00:02:28,220 --> 00:02:28,840 +immédiate. + +77 +00:02:29,040 --> 00:02:30,320 +Bon, je dis ça pour le môme, + +78 +00:02:30,320 --> 00:02:31,960 +mais c'est évidemment valable pour nous aussi. + +79 +00:02:32,340 --> 00:02:35,480 +Donc, rapport inédit. D'accord. + +80 +00:02:35,480 --> 00:02:37,440 +Mais pourquoi a-t-on l'impression qu'on n'en + +81 +00:02:37,440 --> 00:02:38,220 +sortira jamais? + +82 +00:02:38,860 --> 00:02:41,280 +Est-ce qu'il faut en remettre la faute sur les + +83 +00:02:41,280 --> 00:02:43,500 +gens qui ont créé cet outil merveilleux et + +84 +00:02:43,500 --> 00:02:45,320 +diabolique, et diabolique parce que merveilleux? + +85 +00:02:46,340 --> 00:02:48,820 +Les économistes parlent de dépendance du sentier. + +86 +00:02:48,860 --> 00:02:51,120 +C'est l'idée qu'on est sur un sentier qui a été + +87 +00:02:51,120 --> 00:02:54,240 +établi, soit volontairement en marchant dessus, + +88 +00:02:54,240 --> 00:02:56,040 +soit en définissant des bornes, + +89 +00:02:56,040 --> 00:02:57,420 +en définissant une signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_50.vtt b/tests/expected/split_subtitles/smartphone.mp3_50.vtt new file mode 100644 index 0000000000000000000000000000000000000000..68d9b01306781ba2fcb81166b675053824255222 --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_50.vtt @@ -0,0 +1,269 @@ +WEBVTT + +00:00.380 --> 00:01.780 +C'est évident ce que dit Nicolas, + +00:01.780 --> 00:03.620 +mais je ne me l'étais jamais formulé comme ça. + +00:04.080 --> 00:05.880 +Ce qui fait la force du smartphone, + +00:05.880 --> 00:07.920 +c'est pas seulement l'accumulation des fonctions, + +00:08.320 --> 00:10.580 +mais la manière dont elles interagissent entre + +00:10.580 --> 00:10.880 +elles. + +00:10.960 --> 00:12.120 +Ce qui dit d'ailleurs sur la photo, + +00:12.120 --> 00:13.000 +c'est hyper convaincant. + +00:13.340 --> 00:14.340 +Alors évidemment, + +00:14.340 --> 00:16.020 +il faudrait ajouter les interfaces. + +00:16.220 --> 00:18.900 +L'écran tactile a été beaucoup très souvent + +00:18.900 --> 00:19.360 +mentionné. + +00:19.840 --> 00:20.520 +Mais bon, + +00:20.520 --> 00:22.480 +il faut dire qu'il profite aussi de 20 ans pendant + +00:22.480 --> 00:24.500 +lesquels les ordinateurs nous ont appris à cliquer + +00:24.500 --> 00:25.260 +sur des icônes. + +00:25.420 --> 00:28.180 +Sauf que le smartphone ajoute le toucher, + +00:28.180 --> 00:30.640 +ce qui rend le contact plus direct, plus sensible. + +00:31.040 --> 00:31.700 +Et puis évidemment, + +00:31.700 --> 00:33.180 +il faudrait parler aussi des applications qui + +00:33.180 --> 00:35.780 +permettent de contourner le côté touffu de la + +00:35.780 --> 00:37.820 +navigation web pour aller directement au but. + +00:37.820 --> 00:39.420 +Bref, tout ça, + +00:39.420 --> 00:42.380 +ce sont les conditions qui permettent de créer cet + +00:42.380 --> 00:44.980 +objet dont Nicolas dit qu'il est vraisemblablement + +00:44.980 --> 00:46.580 +inédit dans l'histoire de l'humanité. + +00:46.600 --> 00:48.820 +Mais ça, ça soulève une autre interrogation. + +00:49.220 --> 00:51.800 +Est-ce que le fait que cet objet soit inédit + +00:51.800 --> 00:54.700 +induit que notre rapport à lui est aussi un + +00:54.700 --> 00:55.460 +rapport inédit? + +00:55.460 --> 00:56.240 +Je veux dire, + +00:56.240 --> 00:57.920 +est-ce que le rapport qu'on a au smartphone est + +00:57.920 --> 00:59.700 +comparable à celui qu'on entretenait à d'autres + +00:59.700 --> 01:02.680 +objets techniques comme la voiture ou le + +01:02.680 --> 01:03.120 +téléphone? + +01:03.360 --> 01:06.660 +Il n'y a pas d'équivalent en fait. + +01:06.880 --> 01:09.220 +Et donc cette espèce de nouveauté dans la relation + +01:09.220 --> 01:11.520 +à l'objet, c'est fascinant et terrifiant. + +01:11.620 --> 01:13.560 +Parce qu'on a l'impression, + +01:13.560 --> 01:15.220 +comme le disent les utilisateurs et les services, + +01:15.220 --> 01:16.480 +d'être dépendants de cet objet, + +01:16.860 --> 01:18.600 +d'induire en fait une espèce de relation, + +01:18.600 --> 01:22.020 +de médiation avec le monde qui rend de l'ampleur + +01:22.020 --> 01:23.260 +et qui amène aussi à des formes de rejet. + +01:23.940 --> 01:27.800 +Donc, à objet inédit, rapport inédit. + +01:28.020 --> 01:30.540 +Et ce rapport, si j'en crois Nicolas, + +01:30.540 --> 01:34.540 +serait caractérisé par un mélange de dépendance et + +01:34.540 --> 01:35.140 +de rejet. + +01:35.780 --> 01:37.140 +Bon, en vrai, + +01:37.140 --> 01:39.700 +il faudrait remonter très très finement toute + +01:39.700 --> 01:41.840 +l'histoire des objets techniques et de leur + +01:41.840 --> 01:42.860 +insertion dans nos vies + +01:42.900 --> 01:45.300 +pour déterminer si ce rapport est totalement + +01:45.300 --> 01:45.740 +inédit. + +01:46.100 --> 01:48.700 +Mais j'ai l'impression comme ça que Nicolas ne se + +01:48.700 --> 01:49.340 +trompe pas vraiment. + +01:49.880 --> 01:51.140 +Pour autant que je sache, + +01:51.140 --> 01:53.520 +il y a eu plein de discussions autour de la + +01:53.520 --> 01:54.980 +voiture ou même du téléphone. + +01:55.340 --> 01:57.780 +Mais la dépendance n'était pas du même ordre. + +01:57.780 --> 01:59.840 +Donc le rejet non plus n'était pas du même ordre. + +01:59.980 --> 02:01.460 +On peut adorer sa bagnole, + +02:01.460 --> 02:03.020 +en avoir besoin pour plein de choses. + +02:03.280 --> 02:05.800 +Et bien, le soir, quand on va se coucher, + +02:05.800 --> 02:06.360 +on la laisse. + +02:06.980 --> 02:09.140 +On ne l'a pas dans la main quand on est au lit, + +02:09.140 --> 02:10.480 +on ne l'emmène pas au chiottes. + +02:10.860 --> 02:13.760 +On pouvait être énervé par son môme qui occupait + +02:13.760 --> 02:15.540 +la ligne de téléphone pendant une heure chaque + +02:15.540 --> 02:16.900 +soir pour discuter avec un copain. + +02:17.280 --> 02:19.480 +Mais ça ne ressemblait pas à ce qu'on peut + +02:19.480 --> 02:21.880 +ressentir à voir ce même môme aujourd'hui, + +02:22.140 --> 02:24.360 +continuellement avec son smartphone dans la main, + +02:24.360 --> 02:26.300 +comme si c'était une sorte de pacemaker externe, + +02:26.340 --> 02:28.220 +comme si le lâcher allait entraîner sa mort + +02:28.220 --> 02:28.840 +immédiate. + +02:29.040 --> 02:30.320 +Bon, je dis ça pour le môme, + +02:30.320 --> 02:31.960 +mais c'est évidemment valable pour nous aussi. + +02:32.340 --> 02:35.480 +Donc, rapport inédit. D'accord. + +02:35.480 --> 02:37.440 +Mais pourquoi a-t-on l'impression qu'on n'en + +02:37.440 --> 02:38.220 +sortira jamais? + +02:38.860 --> 02:41.280 +Est-ce qu'il faut en remettre la faute sur les + +02:41.280 --> 02:43.500 +gens qui ont créé cet outil merveilleux et + +02:43.500 --> 02:45.320 +diabolique, et diabolique parce que merveilleux? + +02:46.340 --> 02:48.820 +Les économistes parlent de dépendance du sentier. + +02:48.860 --> 02:51.120 +C'est l'idée qu'on est sur un sentier qui a été + +02:51.120 --> 02:54.240 +établi, soit volontairement en marchant dessus, + +02:54.240 --> 02:56.040 +soit en définissant des bornes, + +02:56.040 --> 02:57.420 +en définissant une signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_6.srt b/tests/expected/split_subtitles/smartphone.mp3_6.srt new file mode 100644 index 0000000000000000000000000000000000000000..e03246f80d846a479b5404c598c34d50b8266e96 --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_6.srt @@ -0,0 +1,2024 @@ +1 +00:00:00,380 --> 00:00:00,580 +C'est + +2 +00:00:00,580 --> 00:00:00,880 +évident + +3 +00:00:00,880 --> 00:00:01,080 +ce que + +4 +00:00:01,080 --> 00:00:01,200 +dit + +5 +00:00:01,200 --> 00:00:01,780 +Nicolas, + +6 +00:00:01,780 --> 00:00:01,900 +mais + +7 +00:00:01,900 --> 00:00:02,340 +je ne + +8 +00:00:02,340 --> 00:00:02,380 +me + +9 +00:00:02,380 --> 00:00:02,580 +l'étais + +10 +00:00:02,580 --> 00:00:02,840 +jamais + +11 +00:00:02,840 --> 00:00:03,260 +formulé + +12 +00:00:03,260 --> 00:00:03,420 +comme + +13 +00:00:03,420 --> 00:00:03,620 +ça. + +14 +00:00:04,080 --> 00:00:04,340 +Ce qui + +15 +00:00:04,340 --> 00:00:04,480 +fait + +16 +00:00:04,480 --> 00:00:04,660 +la + +17 +00:00:04,660 --> 00:00:05,000 +force + +18 +00:00:05,000 --> 00:00:05,200 +du + +19 +00:00:05,200 --> 00:00:05,880 +smartphone, + +20 +00:00:05,880 --> 00:00:06,120 +c'est + +21 +00:00:06,120 --> 00:00:06,260 +pas + +22 +00:00:06,260 --> 00:00:06,520 +seulement + +23 +00:00:06,520 --> 00:00:07,380 +l'accumulation + +24 +00:00:07,380 --> 00:00:07,560 +des + +25 +00:00:07,560 --> 00:00:07,920 +fonctions, + +26 +00:00:08,320 --> 00:00:08,440 +mais + +27 +00:00:08,440 --> 00:00:08,600 +la + +28 +00:00:08,600 --> 00:00:08,900 +manière + +29 +00:00:08,900 --> 00:00:09,100 +dont + +30 +00:00:09,100 --> 00:00:09,480 +elles + +31 +00:00:09,480 --> 00:00:10,320 +interagissent + +32 +00:00:10,320 --> 00:00:10,580 +entre + +33 +00:00:10,580 --> 00:00:10,880 +elles. + +34 +00:00:10,960 --> 00:00:11,220 +Ce qui + +35 +00:00:11,220 --> 00:00:11,400 +dit + +36 +00:00:11,400 --> 00:00:11,560 +d'ailleurs + +37 +00:00:11,560 --> 00:00:11,780 +sur la + +38 +00:00:11,780 --> 00:00:12,120 +photo, + +39 +00:00:12,120 --> 00:00:12,200 +c'est + +40 +00:00:12,200 --> 00:00:12,420 +hyper + +41 +00:00:12,420 --> 00:00:13,000 +convaincant. + +42 +00:00:13,340 --> 00:00:13,620 +Alors + +43 +00:00:13,620 --> 00:00:14,340 +évidemment, + +44 +00:00:14,340 --> 00:00:14,380 +il + +45 +00:00:14,380 --> 00:00:14,740 +faudrait + +46 +00:00:14,740 --> 00:00:15,160 +ajouter + +47 +00:00:15,160 --> 00:00:15,520 +les + +48 +00:00:15,520 --> 00:00:16,020 +interfaces. + +49 +00:00:16,220 --> 00:00:16,700 +L'écran + +50 +00:00:16,700 --> 00:00:17,060 +tactile + +51 +00:00:17,060 --> 00:00:17,880 +a été + +52 +00:00:17,880 --> 00:00:18,280 +beaucoup + +53 +00:00:18,280 --> 00:00:18,620 +très + +54 +00:00:18,620 --> 00:00:18,900 +souvent + +55 +00:00:18,900 --> 00:00:19,360 +mentionné. + +56 +00:00:19,840 --> 00:00:20,220 +Mais + +57 +00:00:20,220 --> 00:00:20,520 +bon, + +58 +00:00:20,520 --> 00:00:20,600 +il + +59 +00:00:20,600 --> 00:00:20,700 +faut + +60 +00:00:20,700 --> 00:00:20,840 +dire + +61 +00:00:20,840 --> 00:00:20,960 +qu'il + +62 +00:00:20,960 --> 00:00:21,260 +profite + +63 +00:00:21,260 --> 00:00:21,680 +aussi + +64 +00:00:21,680 --> 00:00:22,100 +de 20 + +65 +00:00:22,100 --> 00:00:22,320 +ans + +66 +00:00:22,320 --> 00:00:22,480 +pendant + +67 +00:00:22,480 --> 00:00:22,920 +lesquels + +68 +00:00:22,920 --> 00:00:23,040 +les + +69 +00:00:23,040 --> 00:00:23,540 +ordinateurs + +70 +00:00:23,540 --> 00:00:23,720 +nous + +71 +00:00:23,720 --> 00:00:23,820 +ont + +72 +00:00:23,820 --> 00:00:24,100 +appris + +73 +00:00:24,100 --> 00:00:24,240 +à + +74 +00:00:24,240 --> 00:00:24,500 +cliquer + +75 +00:00:24,500 --> 00:00:24,660 +sur + +76 +00:00:24,660 --> 00:00:24,940 +des + +77 +00:00:24,940 --> 00:00:25,260 +icônes. + +78 +00:00:25,420 --> 00:00:25,760 +Sauf + +79 +00:00:25,760 --> 00:00:26,660 +que le + +80 +00:00:26,660 --> 00:00:27,060 +smartphone + +81 +00:00:27,060 --> 00:00:27,440 +ajoute + +82 +00:00:27,440 --> 00:00:27,620 +le + +83 +00:00:27,620 --> 00:00:28,180 +toucher, + +84 +00:00:28,180 --> 00:00:28,280 +ce qui + +85 +00:00:28,280 --> 00:00:28,480 +rend + +86 +00:00:28,480 --> 00:00:28,680 +le + +87 +00:00:28,680 --> 00:00:29,100 +contact + +88 +00:00:29,100 --> 00:00:29,460 +plus + +89 +00:00:29,460 --> 00:00:30,220 +direct, + +90 +00:00:30,220 --> 00:00:30,260 +plus + +91 +00:00:30,260 --> 00:00:30,640 +sensible. + +92 +00:00:31,040 --> 00:00:31,220 +Et + +93 +00:00:31,220 --> 00:00:31,360 +puis + +94 +00:00:31,360 --> 00:00:31,700 +évidemment, + +95 +00:00:31,700 --> 00:00:31,740 +il + +96 +00:00:31,740 --> 00:00:31,940 +faudrait + +97 +00:00:31,940 --> 00:00:32,120 +parler + +98 +00:00:32,120 --> 00:00:32,340 +aussi + +99 +00:00:32,340 --> 00:00:32,480 +des + +100 +00:00:32,480 --> 00:00:32,900 +applications + +101 +00:00:32,900 --> 00:00:33,180 +qui + +102 +00:00:33,180 --> 00:00:33,740 +permettent + +103 +00:00:33,740 --> 00:00:33,960 +de + +104 +00:00:33,960 --> 00:00:34,420 +contourner + +105 +00:00:34,420 --> 00:00:34,520 +le + +106 +00:00:34,520 --> 00:00:34,800 +côté + +107 +00:00:34,800 --> 00:00:35,320 +touffu + +108 +00:00:35,320 --> 00:00:35,780 +de la + +109 +00:00:35,780 --> 00:00:36,240 +navigation + +110 +00:00:36,240 --> 00:00:36,600 +web + +111 +00:00:36,600 --> 00:00:36,780 +pour + +112 +00:00:36,780 --> 00:00:36,980 +aller + +113 +00:00:36,980 --> 00:00:37,520 +directement + +114 +00:00:37,520 --> 00:00:37,680 +au + +115 +00:00:37,680 --> 00:00:37,820 +but. + +116 +00:00:37,820 --> 00:00:38,760 +Bref, + +117 +00:00:38,760 --> 00:00:38,980 +tout + +118 +00:00:38,980 --> 00:00:39,420 +ça, + +119 +00:00:39,420 --> 00:00:39,880 +ce sont + +120 +00:00:39,880 --> 00:00:40,160 +les + +121 +00:00:40,160 --> 00:00:40,680 +conditions + +122 +00:00:40,680 --> 00:00:40,960 +qui + +123 +00:00:40,960 --> 00:00:41,460 +permettent + +124 +00:00:41,460 --> 00:00:41,600 +de + +125 +00:00:41,600 --> 00:00:42,060 +créer + +126 +00:00:42,060 --> 00:00:42,380 +cet + +127 +00:00:42,380 --> 00:00:42,600 +objet + +128 +00:00:42,600 --> 00:00:42,800 +dont + +129 +00:00:42,800 --> 00:00:43,260 +Nicolas + +130 +00:00:43,260 --> 00:00:43,500 +dit + +131 +00:00:43,500 --> 00:00:43,700 +qu'il + +132 +00:00:43,700 --> 00:00:43,880 +est + +133 +00:00:43,880 --> 00:00:44,980 +vraisemblablement + +134 +00:00:44,980 --> 00:00:45,380 +inédit + +135 +00:00:45,380 --> 00:00:45,700 +dans + +136 +00:00:45,700 --> 00:00:45,980 +l'histoire + +137 +00:00:45,980 --> 00:00:46,180 +de + +138 +00:00:46,180 --> 00:00:46,580 +l'humanité. + +139 +00:00:46,600 --> 00:00:47,240 +Mais + +140 +00:00:47,240 --> 00:00:47,720 +ça, + +141 +00:00:47,720 --> 00:00:47,840 +ça soulève + +142 +00:00:47,840 --> 00:00:48,020 +une + +143 +00:00:48,020 --> 00:00:48,260 +autre + +144 +00:00:48,260 --> 00:00:48,820 +interrogation. + +145 +00:00:49,220 --> 00:00:49,620 +Est-ce + +146 +00:00:49,620 --> 00:00:49,820 +que le + +147 +00:00:49,820 --> 00:00:49,980 +fait + +148 +00:00:49,980 --> 00:00:50,140 +que + +149 +00:00:50,140 --> 00:00:50,320 +cet + +150 +00:00:50,320 --> 00:00:50,660 +objet + +151 +00:00:50,660 --> 00:00:51,120 +soit + +152 +00:00:51,120 --> 00:00:51,800 +inédit + +153 +00:00:51,800 --> 00:00:52,320 +induit + +154 +00:00:52,320 --> 00:00:52,420 +que + +155 +00:00:52,420 --> 00:00:52,720 +notre + +156 +00:00:52,720 --> 00:00:53,280 +rapport + +157 +00:00:53,280 --> 00:00:53,660 +à lui + +158 +00:00:53,660 --> 00:00:54,020 +est + +159 +00:00:54,020 --> 00:00:54,540 +aussi + +160 +00:00:54,540 --> 00:00:54,700 +un + +161 +00:00:54,700 --> 00:00:55,000 +rapport + +162 +00:00:55,000 --> 00:00:55,460 +inédit? + +163 +00:00:55,460 --> 00:00:55,900 +Je + +164 +00:00:55,900 --> 00:00:56,000 +veux + +165 +00:00:56,000 --> 00:00:56,240 +dire, + +166 +00:00:56,240 --> 00:00:56,360 +est-ce + +167 +00:00:56,360 --> 00:00:56,580 +que le + +168 +00:00:56,580 --> 00:00:56,880 +rapport + +169 +00:00:56,880 --> 00:00:57,040 +qu'on + +170 +00:00:57,040 --> 00:00:57,280 +a au + +171 +00:00:57,280 --> 00:00:57,600 +smartphone + +172 +00:00:57,600 --> 00:00:57,920 +est + +173 +00:00:57,920 --> 00:00:58,240 +comparable + +174 +00:00:58,240 --> 00:00:58,480 +à + +175 +00:00:58,480 --> 00:00:58,660 +celui + +176 +00:00:58,660 --> 00:00:58,900 +qu'on + +177 +00:00:58,900 --> 00:00:59,320 +entretenait + +178 +00:00:59,320 --> 00:00:59,460 +à + +179 +00:00:59,460 --> 00:00:59,700 +d'autres + +180 +00:00:59,700 --> 00:00:59,960 +objets + +181 +00:00:59,960 --> 00:01:00,460 +techniques + +182 +00:01:00,460 --> 00:01:00,880 +comme + +183 +00:01:00,880 --> 00:01:01,500 +la + +184 +00:01:01,500 --> 00:01:02,060 +voiture + +185 +00:01:02,060 --> 00:01:02,680 +ou le + +186 +00:01:02,680 --> 00:01:03,120 +téléphone? + +187 +00:01:03,360 --> 00:01:05,480 +Il n'y + +188 +00:01:05,480 --> 00:01:05,660 +a pas + +189 +00:01:05,660 --> 00:01:06,220 +d'équivalent + +190 +00:01:06,220 --> 00:01:06,420 +en + +191 +00:01:06,420 --> 00:01:06,660 +fait. + +192 +00:01:06,880 --> 00:01:06,980 +Et + +193 +00:01:06,980 --> 00:01:07,080 +donc + +194 +00:01:07,080 --> 00:01:07,280 +cette + +195 +00:01:07,280 --> 00:01:07,540 +espèce + +196 +00:01:07,540 --> 00:01:07,680 +de + +197 +00:01:07,680 --> 00:01:08,480 +nouveauté + +198 +00:01:08,480 --> 00:01:08,660 +dans + +199 +00:01:08,660 --> 00:01:08,940 +la + +200 +00:01:08,940 --> 00:01:09,220 +relation + +201 +00:01:09,220 --> 00:01:09,380 +à + +202 +00:01:09,380 --> 00:01:10,240 +l'objet, + +203 +00:01:10,240 --> 00:01:10,380 +c'est + +204 +00:01:10,380 --> 00:01:10,640 +fascinant + +205 +00:01:10,640 --> 00:01:10,760 +et + +206 +00:01:10,760 --> 00:01:11,520 +terrifiant. + +207 +00:01:11,620 --> 00:01:11,860 +Parce + +208 +00:01:11,860 --> 00:01:12,120 +qu'on + +209 +00:01:12,120 --> 00:01:12,440 +a + +210 +00:01:12,440 --> 00:01:13,560 +l'impression, + +211 +00:01:13,560 --> 00:01:13,840 +comme + +212 +00:01:13,840 --> 00:01:14,000 +le + +213 +00:01:14,000 --> 00:01:14,200 +disent + +214 +00:01:14,200 --> 00:01:14,400 +les + +215 +00:01:14,400 --> 00:01:14,840 +utilisateurs + +216 +00:01:14,840 --> 00:01:15,000 +et les + +217 +00:01:15,000 --> 00:01:15,220 +services, + +218 +00:01:15,220 --> 00:01:15,420 +d'être + +219 +00:01:15,420 --> 00:01:15,960 +dépendants + +220 +00:01:15,960 --> 00:01:16,260 +de cet + +221 +00:01:16,260 --> 00:01:16,480 +objet, + +222 +00:01:16,860 --> 00:01:17,080 +d'induire + +223 +00:01:17,080 --> 00:01:17,240 +en + +224 +00:01:17,240 --> 00:01:17,340 +fait + +225 +00:01:17,340 --> 00:01:17,520 +une + +226 +00:01:17,520 --> 00:01:17,880 +espèce + +227 +00:01:17,880 --> 00:01:18,480 +de + +228 +00:01:18,480 --> 00:01:18,600 +relation, + +229 +00:01:18,600 --> 00:01:18,940 +de + +230 +00:01:18,940 --> 00:01:19,520 +médiation + +231 +00:01:19,520 --> 00:01:19,740 +avec + +232 +00:01:19,740 --> 00:01:19,920 +le + +233 +00:01:19,920 --> 00:01:20,640 +monde + +234 +00:01:20,640 --> 00:01:21,100 +qui + +235 +00:01:21,100 --> 00:01:21,640 +rend + +236 +00:01:21,640 --> 00:01:21,780 +de + +237 +00:01:21,780 --> 00:01:22,020 +l'ampleur + +238 +00:01:22,020 --> 00:01:22,240 +et qui + +239 +00:01:22,240 --> 00:01:22,360 +amène + +240 +00:01:22,360 --> 00:01:22,560 +aussi + +241 +00:01:22,560 --> 00:01:22,720 +à des + +242 +00:01:22,720 --> 00:01:22,900 +formes + +243 +00:01:22,900 --> 00:01:23,020 +de + +244 +00:01:23,020 --> 00:01:23,260 +rejet. + +245 +00:01:23,940 --> 00:01:24,940 +Donc, + +246 +00:01:24,940 --> 00:01:24,980 +à + +247 +00:01:24,980 --> 00:01:25,360 +objet + +248 +00:01:25,360 --> 00:01:26,560 +inédit, + +249 +00:01:26,560 --> 00:01:27,000 +rapport + +250 +00:01:27,000 --> 00:01:27,800 +inédit. + +251 +00:01:28,020 --> 00:01:28,860 +Et ce + +252 +00:01:28,860 --> 00:01:29,280 +rapport, + +253 +00:01:29,280 --> 00:01:29,560 +si + +254 +00:01:29,560 --> 00:01:29,840 +j'en + +255 +00:01:29,840 --> 00:01:29,880 +crois + +256 +00:01:29,880 --> 00:01:30,540 +Nicolas, + +257 +00:01:30,540 --> 00:01:30,940 +serait + +258 +00:01:30,940 --> 00:01:31,800 +caractérisé + +259 +00:01:31,800 --> 00:01:32,440 +par un + +260 +00:01:32,440 --> 00:01:32,980 +mélange + +261 +00:01:32,980 --> 00:01:33,400 +de + +262 +00:01:33,400 --> 00:01:34,240 +dépendance + +263 +00:01:34,240 --> 00:01:34,680 +et de + +264 +00:01:34,680 --> 00:01:35,140 +rejet. + +265 +00:01:35,780 --> 00:01:36,380 +Bon, + +266 +00:01:36,380 --> 00:01:36,520 +en + +267 +00:01:36,520 --> 00:01:37,140 +vrai, + +268 +00:01:37,140 --> 00:01:37,180 +il + +269 +00:01:37,180 --> 00:01:37,580 +faudrait + +270 +00:01:37,580 --> 00:01:38,080 +remonter + +271 +00:01:38,080 --> 00:01:38,580 +très + +272 +00:01:38,580 --> 00:01:38,700 +très + +273 +00:01:38,700 --> 00:01:39,320 +finement + +274 +00:01:39,320 --> 00:01:39,700 +toute + +275 +00:01:39,700 --> 00:01:40,060 +l'histoire + +276 +00:01:40,060 --> 00:01:40,240 +des + +277 +00:01:40,240 --> 00:01:40,480 +objets + +278 +00:01:40,480 --> 00:01:41,020 +techniques + +279 +00:01:41,020 --> 00:01:41,680 +et de + +280 +00:01:41,680 --> 00:01:41,840 +leur + +281 +00:01:41,840 --> 00:01:42,320 +insertion + +282 +00:01:42,320 --> 00:01:42,480 +dans + +283 +00:01:42,480 --> 00:01:42,660 +nos + +284 +00:01:42,660 --> 00:01:42,860 +vies + +285 +00:01:42,900 --> 00:01:43,060 +pour + +286 +00:01:43,060 --> 00:01:43,660 +déterminer + +287 +00:01:43,660 --> 00:01:43,940 +si ce + +288 +00:01:43,940 --> 00:01:44,260 +rapport + +289 +00:01:44,260 --> 00:01:44,740 +est + +290 +00:01:44,740 --> 00:01:45,300 +totalement + +291 +00:01:45,300 --> 00:01:45,740 +inédit. + +292 +00:01:46,100 --> 00:01:46,360 +Mais + +293 +00:01:46,360 --> 00:01:46,920 +j'ai + +294 +00:01:46,920 --> 00:01:47,360 +l'impression + +295 +00:01:47,360 --> 00:01:47,560 +comme + +296 +00:01:47,560 --> 00:01:47,960 +ça que + +297 +00:01:47,960 --> 00:01:48,460 +Nicolas + +298 +00:01:48,460 --> 00:01:48,700 +ne se + +299 +00:01:48,700 --> 00:01:48,880 +trompe + +300 +00:01:48,880 --> 00:01:49,080 +pas + +301 +00:01:49,080 --> 00:01:49,340 +vraiment. + +302 +00:01:49,880 --> 00:01:50,080 +Pour + +303 +00:01:50,080 --> 00:01:50,240 +autant + +304 +00:01:50,240 --> 00:01:50,520 +que je + +305 +00:01:50,520 --> 00:01:51,140 +sache, + +306 +00:01:51,140 --> 00:01:51,360 +il y a + +307 +00:01:51,360 --> 00:01:51,680 +eu + +308 +00:01:51,680 --> 00:01:51,880 +plein + +309 +00:01:51,880 --> 00:01:52,060 +de + +310 +00:01:52,060 --> 00:01:52,600 +discussions + +311 +00:01:52,600 --> 00:01:52,940 +autour + +312 +00:01:52,940 --> 00:01:53,520 +de la + +313 +00:01:53,520 --> 00:01:53,860 +voiture + +314 +00:01:53,860 --> 00:01:54,060 +ou + +315 +00:01:54,060 --> 00:01:54,440 +même + +316 +00:01:54,440 --> 00:01:54,600 +du + +317 +00:01:54,600 --> 00:01:54,980 +téléphone. + +318 +00:01:55,340 --> 00:01:55,720 +Mais + +319 +00:01:55,720 --> 00:01:56,020 +la + +320 +00:01:56,020 --> 00:01:56,400 +dépendance + +321 +00:01:56,400 --> 00:01:56,620 +n'était + +322 +00:01:56,620 --> 00:01:57,160 +pas du + +323 +00:01:57,160 --> 00:01:57,460 +même + +324 +00:01:57,460 --> 00:01:57,780 +ordre. + +325 +00:01:57,780 --> 00:01:57,980 +Donc + +326 +00:01:57,980 --> 00:01:58,340 +le + +327 +00:01:58,340 --> 00:01:58,620 +rejet + +328 +00:01:58,620 --> 00:01:58,780 +non + +329 +00:01:58,780 --> 00:01:58,940 +plus + +330 +00:01:58,940 --> 00:01:59,120 +n'était + +331 +00:01:59,120 --> 00:01:59,380 +pas du + +332 +00:01:59,380 --> 00:01:59,560 +même + +333 +00:01:59,560 --> 00:01:59,840 +ordre. + +334 +00:01:59,980 --> 00:02:00,180 +On + +335 +00:02:00,180 --> 00:02:00,380 +peut + +336 +00:02:00,380 --> 00:02:00,660 +adorer + +337 +00:02:00,660 --> 00:02:00,880 +sa + +338 +00:02:00,880 --> 00:02:01,460 +bagnole, + +339 +00:02:01,460 --> 00:02:01,560 +en + +340 +00:02:01,560 --> 00:02:01,740 +avoir + +341 +00:02:01,740 --> 00:02:02,100 +besoin + +342 +00:02:02,100 --> 00:02:02,340 +pour + +343 +00:02:02,340 --> 00:02:02,680 +plein + +344 +00:02:02,680 --> 00:02:02,800 +de + +345 +00:02:02,800 --> 00:02:03,020 +choses. + +346 +00:02:03,280 --> 00:02:03,460 +Et + +347 +00:02:03,460 --> 00:02:03,860 +bien, + +348 +00:02:03,860 --> 00:02:03,980 +le + +349 +00:02:03,980 --> 00:02:04,680 +soir, + +350 +00:02:04,680 --> 00:02:04,900 +quand + +351 +00:02:04,900 --> 00:02:05,140 +on va + +352 +00:02:05,140 --> 00:02:05,380 +se + +353 +00:02:05,380 --> 00:02:05,800 +coucher, + +354 +00:02:05,800 --> 00:02:06,220 +on la + +355 +00:02:06,220 --> 00:02:06,360 +laisse. + +356 +00:02:06,980 --> 00:02:07,360 +On ne + +357 +00:02:07,360 --> 00:02:07,480 +l'a + +358 +00:02:07,480 --> 00:02:07,680 +pas + +359 +00:02:07,680 --> 00:02:07,800 +dans + +360 +00:02:07,800 --> 00:02:08,060 +la + +361 +00:02:08,060 --> 00:02:08,260 +main + +362 +00:02:08,260 --> 00:02:08,440 +quand + +363 +00:02:08,440 --> 00:02:08,680 +on est + +364 +00:02:08,680 --> 00:02:09,040 +au + +365 +00:02:09,040 --> 00:02:09,140 +lit, + +366 +00:02:09,140 --> 00:02:09,300 +on ne + +367 +00:02:09,300 --> 00:02:09,500 +l'emmène + +368 +00:02:09,500 --> 00:02:09,860 +pas au + +369 +00:02:09,860 --> 00:02:10,480 +chiottes. + +370 +00:02:10,860 --> 00:02:11,040 +On + +371 +00:02:11,040 --> 00:02:11,280 +pouvait + +372 +00:02:11,280 --> 00:02:11,480 +être + +373 +00:02:11,480 --> 00:02:12,220 +énervé + +374 +00:02:12,220 --> 00:02:12,440 +par + +375 +00:02:12,440 --> 00:02:12,700 +son + +376 +00:02:12,700 --> 00:02:13,100 +môme + +377 +00:02:13,100 --> 00:02:13,340 +qui + +378 +00:02:13,340 --> 00:02:13,760 +occupait + +379 +00:02:13,760 --> 00:02:13,800 +la + +380 +00:02:13,800 --> 00:02:14,080 +ligne + +381 +00:02:14,080 --> 00:02:14,140 +de + +382 +00:02:14,140 --> 00:02:14,600 +téléphone + +383 +00:02:14,600 --> 00:02:14,820 +pendant + +384 +00:02:14,820 --> 00:02:15,200 +une + +385 +00:02:15,200 --> 00:02:15,360 +heure + +386 +00:02:15,360 --> 00:02:15,540 +chaque + +387 +00:02:15,540 --> 00:02:15,800 +soir + +388 +00:02:15,800 --> 00:02:15,960 +pour + +389 +00:02:15,960 --> 00:02:16,280 +discuter + +390 +00:02:16,280 --> 00:02:16,480 +avec + +391 +00:02:16,480 --> 00:02:16,600 +un + +392 +00:02:16,600 --> 00:02:16,900 +copain. + +393 +00:02:17,280 --> 00:02:17,460 +Mais + +394 +00:02:17,460 --> 00:02:17,940 +ça ne + +395 +00:02:17,940 --> 00:02:18,400 +ressemblait + +396 +00:02:18,400 --> 00:02:18,940 +pas à + +397 +00:02:18,940 --> 00:02:18,980 +ce + +398 +00:02:18,980 --> 00:02:19,100 +qu'on + +399 +00:02:19,100 --> 00:02:19,480 +peut + +400 +00:02:19,480 --> 00:02:20,120 +ressentir + +401 +00:02:20,120 --> 00:02:20,460 +à voir + +402 +00:02:20,460 --> 00:02:20,680 +ce + +403 +00:02:20,680 --> 00:02:20,920 +même + +404 +00:02:20,920 --> 00:02:21,340 +môme + +405 +00:02:21,340 --> 00:02:21,880 +aujourd'hui, + +406 +00:02:22,140 --> 00:02:22,940 +continuellement + +407 +00:02:22,940 --> 00:02:23,180 +avec + +408 +00:02:23,180 --> 00:02:23,380 +son + +409 +00:02:23,380 --> 00:02:23,760 +smartphone + +410 +00:02:23,760 --> 00:02:23,940 +dans + +411 +00:02:23,940 --> 00:02:24,000 +la + +412 +00:02:24,000 --> 00:02:24,360 +main, + +413 +00:02:24,360 --> 00:02:24,520 +comme + +414 +00:02:24,520 --> 00:02:24,640 +si + +415 +00:02:24,640 --> 00:02:24,820 +c'était + +416 +00:02:24,820 --> 00:02:25,060 +une + +417 +00:02:25,060 --> 00:02:25,220 +sorte + +418 +00:02:25,220 --> 00:02:25,300 +de + +419 +00:02:25,300 --> 00:02:25,820 +pacemaker + +420 +00:02:25,820 --> 00:02:26,300 +externe, + +421 +00:02:26,340 --> 00:02:26,580 +comme + +422 +00:02:26,580 --> 00:02:26,860 +si le + +423 +00:02:26,860 --> 00:02:27,360 +lâcher + +424 +00:02:27,360 --> 00:02:27,560 +allait + +425 +00:02:27,560 --> 00:02:27,860 +entraîner + +426 +00:02:27,860 --> 00:02:28,000 +sa + +427 +00:02:28,000 --> 00:02:28,220 +mort + +428 +00:02:28,220 --> 00:02:28,840 +immédiate. + +429 +00:02:29,040 --> 00:02:29,280 +Bon, + +430 +00:02:29,280 --> 00:02:29,460 +je dis + +431 +00:02:29,460 --> 00:02:29,640 +ça + +432 +00:02:29,640 --> 00:02:29,740 +pour + +433 +00:02:29,740 --> 00:02:29,880 +le + +434 +00:02:29,880 --> 00:02:30,320 +môme, + +435 +00:02:30,320 --> 00:02:30,520 +mais + +436 +00:02:30,520 --> 00:02:30,820 +c'est + +437 +00:02:30,820 --> 00:02:31,140 +évidemment + +438 +00:02:31,140 --> 00:02:31,480 +valable + +439 +00:02:31,480 --> 00:02:31,620 +pour + +440 +00:02:31,620 --> 00:02:31,760 +nous + +441 +00:02:31,760 --> 00:02:31,960 +aussi. + +442 +00:02:32,340 --> 00:02:33,460 +Donc, + +443 +00:02:33,460 --> 00:02:33,660 +rapport + +444 +00:02:33,660 --> 00:02:34,240 +inédit. + +445 +00:02:34,240 --> 00:02:35,480 +D'accord. + +446 +00:02:35,480 --> 00:02:35,820 +Mais + +447 +00:02:35,820 --> 00:02:36,320 +pourquoi + +448 +00:02:36,320 --> 00:02:36,680 +a-t-on + +449 +00:02:36,680 --> 00:02:37,060 +l'impression + +450 +00:02:37,060 --> 00:02:37,260 +qu'on + +451 +00:02:37,260 --> 00:02:37,440 +n'en + +452 +00:02:37,440 --> 00:02:37,900 +sortira + +453 +00:02:37,900 --> 00:02:38,220 +jamais? + +454 +00:02:38,860 --> 00:02:39,340 +Est-ce + +455 +00:02:39,340 --> 00:02:39,460 +qu'il + +456 +00:02:39,460 --> 00:02:39,620 +faut + +457 +00:02:39,620 --> 00:02:39,780 +en + +458 +00:02:39,780 --> 00:02:40,120 +remettre + +459 +00:02:40,120 --> 00:02:40,340 +la + +460 +00:02:40,340 --> 00:02:40,660 +faute + +461 +00:02:40,660 --> 00:02:40,940 +sur + +462 +00:02:40,940 --> 00:02:41,280 +les + +463 +00:02:41,280 --> 00:02:41,440 +gens + +464 +00:02:41,440 --> 00:02:41,580 +qui + +465 +00:02:41,580 --> 00:02:41,720 +ont + +466 +00:02:41,720 --> 00:02:42,300 +créé + +467 +00:02:42,300 --> 00:02:42,460 +cet + +468 +00:02:42,460 --> 00:02:42,780 +outil + +469 +00:02:42,780 --> 00:02:43,340 +merveilleux + +470 +00:02:43,340 --> 00:02:43,500 +et + +471 +00:02:43,500 --> 00:02:43,860 +diabolique, + +472 +00:02:43,860 --> 00:02:43,920 +et + +473 +00:02:43,920 --> 00:02:44,400 +diabolique + +474 +00:02:44,400 --> 00:02:44,660 +parce + +475 +00:02:44,660 --> 00:02:44,840 +que + +476 +00:02:44,840 --> 00:02:45,320 +merveilleux? + +477 +00:02:46,340 --> 00:02:47,040 +Les + +478 +00:02:47,040 --> 00:02:47,480 +économistes + +479 +00:02:47,480 --> 00:02:47,680 +parlent + +480 +00:02:47,680 --> 00:02:47,820 +de + +481 +00:02:47,820 --> 00:02:48,360 +dépendance + +482 +00:02:48,360 --> 00:02:48,500 +du + +483 +00:02:48,500 --> 00:02:48,820 +sentier. + +484 +00:02:48,860 --> 00:02:49,100 +C'est + +485 +00:02:49,100 --> 00:02:49,340 +l'idée + +486 +00:02:49,340 --> 00:02:49,700 +qu'on + +487 +00:02:49,700 --> 00:02:49,880 +est + +488 +00:02:49,880 --> 00:02:50,540 +sur un + +489 +00:02:50,540 --> 00:02:50,780 +sentier + +490 +00:02:50,780 --> 00:02:50,940 +qui a + +491 +00:02:50,940 --> 00:02:51,120 +été + +492 +00:02:51,120 --> 00:02:51,900 +établi, + +493 +00:02:51,900 --> 00:02:52,120 +soit + +494 +00:02:52,120 --> 00:02:52,720 +volontairement + +495 +00:02:52,720 --> 00:02:52,800 +en + +496 +00:02:52,800 --> 00:02:53,060 +marchant + +497 +00:02:53,060 --> 00:02:54,240 +dessus, + +498 +00:02:54,240 --> 00:02:54,920 +soit + +499 +00:02:54,920 --> 00:02:55,360 +en + +500 +00:02:55,360 --> 00:02:55,500 +définissant + +501 +00:02:55,500 --> 00:02:55,760 +des + +502 +00:02:55,760 --> 00:02:56,040 +bornes, + +503 +00:02:56,040 --> 00:02:56,080 +en + +504 +00:02:56,080 --> 00:02:56,580 +définissant + +505 +00:02:56,580 --> 00:02:56,820 +une + +506 +00:02:56,820 --> 00:02:57,420 +signalétique. + diff --git a/tests/expected/split_subtitles/smartphone.mp3_6.vtt b/tests/expected/split_subtitles/smartphone.mp3_6.vtt new file mode 100644 index 0000000000000000000000000000000000000000..61871c111372405e0b3331b5d11e0e1cbd3f450e --- /dev/null +++ b/tests/expected/split_subtitles/smartphone.mp3_6.vtt @@ -0,0 +1,1520 @@ +WEBVTT + +00:00.380 --> 00:00.580 +C'est + +00:00.580 --> 00:00.880 +évident + +00:00.880 --> 00:01.080 +ce que + +00:01.080 --> 00:01.200 +dit + +00:01.200 --> 00:01.780 +Nicolas, + +00:01.780 --> 00:01.900 +mais + +00:01.900 --> 00:02.340 +je ne + +00:02.340 --> 00:02.380 +me + +00:02.380 --> 00:02.580 +l'étais + +00:02.580 --> 00:02.840 +jamais + +00:02.840 --> 00:03.260 +formulé + +00:03.260 --> 00:03.420 +comme + +00:03.420 --> 00:03.620 +ça. + +00:04.080 --> 00:04.340 +Ce qui + +00:04.340 --> 00:04.480 +fait + +00:04.480 --> 00:04.660 +la + +00:04.660 --> 00:05.000 +force + +00:05.000 --> 00:05.200 +du + +00:05.200 --> 00:05.880 +smartphone, + +00:05.880 --> 00:06.120 +c'est + +00:06.120 --> 00:06.260 +pas + +00:06.260 --> 00:06.520 +seulement + +00:06.520 --> 00:07.380 +l'accumulation + +00:07.380 --> 00:07.560 +des + +00:07.560 --> 00:07.920 +fonctions, + +00:08.320 --> 00:08.440 +mais + +00:08.440 --> 00:08.600 +la + +00:08.600 --> 00:08.900 +manière + +00:08.900 --> 00:09.100 +dont + +00:09.100 --> 00:09.480 +elles + +00:09.480 --> 00:10.320 +interagissent + +00:10.320 --> 00:10.580 +entre + +00:10.580 --> 00:10.880 +elles. + +00:10.960 --> 00:11.220 +Ce qui + +00:11.220 --> 00:11.400 +dit + +00:11.400 --> 00:11.560 +d'ailleurs + +00:11.560 --> 00:11.780 +sur la + +00:11.780 --> 00:12.120 +photo, + +00:12.120 --> 00:12.200 +c'est + +00:12.200 --> 00:12.420 +hyper + +00:12.420 --> 00:13.000 +convaincant. + +00:13.340 --> 00:13.620 +Alors + +00:13.620 --> 00:14.340 +évidemment, + +00:14.340 --> 00:14.380 +il + +00:14.380 --> 00:14.740 +faudrait + +00:14.740 --> 00:15.160 +ajouter + +00:15.160 --> 00:15.520 +les + +00:15.520 --> 00:16.020 +interfaces. + +00:16.220 --> 00:16.700 +L'écran + +00:16.700 --> 00:17.060 +tactile + +00:17.060 --> 00:17.880 +a été + +00:17.880 --> 00:18.280 +beaucoup + +00:18.280 --> 00:18.620 +très + +00:18.620 --> 00:18.900 +souvent + +00:18.900 --> 00:19.360 +mentionné. + +00:19.840 --> 00:20.220 +Mais + +00:20.220 --> 00:20.520 +bon, + +00:20.520 --> 00:20.600 +il + +00:20.600 --> 00:20.700 +faut + +00:20.700 --> 00:20.840 +dire + +00:20.840 --> 00:20.960 +qu'il + +00:20.960 --> 00:21.260 +profite + +00:21.260 --> 00:21.680 +aussi + +00:21.680 --> 00:22.100 +de 20 + +00:22.100 --> 00:22.320 +ans + +00:22.320 --> 00:22.480 +pendant + +00:22.480 --> 00:22.920 +lesquels + +00:22.920 --> 00:23.040 +les + +00:23.040 --> 00:23.540 +ordinateurs + +00:23.540 --> 00:23.720 +nous + +00:23.720 --> 00:23.820 +ont + +00:23.820 --> 00:24.100 +appris + +00:24.100 --> 00:24.240 +à + +00:24.240 --> 00:24.500 +cliquer + +00:24.500 --> 00:24.660 +sur + +00:24.660 --> 00:24.940 +des + +00:24.940 --> 00:25.260 +icônes. + +00:25.420 --> 00:25.760 +Sauf + +00:25.760 --> 00:26.660 +que le + +00:26.660 --> 00:27.060 +smartphone + +00:27.060 --> 00:27.440 +ajoute + +00:27.440 --> 00:27.620 +le + +00:27.620 --> 00:28.180 +toucher, + +00:28.180 --> 00:28.280 +ce qui + +00:28.280 --> 00:28.480 +rend + +00:28.480 --> 00:28.680 +le + +00:28.680 --> 00:29.100 +contact + +00:29.100 --> 00:29.460 +plus + +00:29.460 --> 00:30.220 +direct, + +00:30.220 --> 00:30.260 +plus + +00:30.260 --> 00:30.640 +sensible. + +00:31.040 --> 00:31.220 +Et + +00:31.220 --> 00:31.360 +puis + +00:31.360 --> 00:31.700 +évidemment, + +00:31.700 --> 00:31.740 +il + +00:31.740 --> 00:31.940 +faudrait + +00:31.940 --> 00:32.120 +parler + +00:32.120 --> 00:32.340 +aussi + +00:32.340 --> 00:32.480 +des + +00:32.480 --> 00:32.900 +applications + +00:32.900 --> 00:33.180 +qui + +00:33.180 --> 00:33.740 +permettent + +00:33.740 --> 00:33.960 +de + +00:33.960 --> 00:34.420 +contourner + +00:34.420 --> 00:34.520 +le + +00:34.520 --> 00:34.800 +côté + +00:34.800 --> 00:35.320 +touffu + +00:35.320 --> 00:35.780 +de la + +00:35.780 --> 00:36.240 +navigation + +00:36.240 --> 00:36.600 +web + +00:36.600 --> 00:36.780 +pour + +00:36.780 --> 00:36.980 +aller + +00:36.980 --> 00:37.520 +directement + +00:37.520 --> 00:37.680 +au + +00:37.680 --> 00:37.820 +but. + +00:37.820 --> 00:38.760 +Bref, + +00:38.760 --> 00:38.980 +tout + +00:38.980 --> 00:39.420 +ça, + +00:39.420 --> 00:39.880 +ce sont + +00:39.880 --> 00:40.160 +les + +00:40.160 --> 00:40.680 +conditions + +00:40.680 --> 00:40.960 +qui + +00:40.960 --> 00:41.460 +permettent + +00:41.460 --> 00:41.600 +de + +00:41.600 --> 00:42.060 +créer + +00:42.060 --> 00:42.380 +cet + +00:42.380 --> 00:42.600 +objet + +00:42.600 --> 00:42.800 +dont + +00:42.800 --> 00:43.260 +Nicolas + +00:43.260 --> 00:43.500 +dit + +00:43.500 --> 00:43.700 +qu'il + +00:43.700 --> 00:43.880 +est + +00:43.880 --> 00:44.980 +vraisemblablement + +00:44.980 --> 00:45.380 +inédit + +00:45.380 --> 00:45.700 +dans + +00:45.700 --> 00:45.980 +l'histoire + +00:45.980 --> 00:46.180 +de + +00:46.180 --> 00:46.580 +l'humanité. + +00:46.600 --> 00:47.240 +Mais + +00:47.240 --> 00:47.720 +ça, + +00:47.720 --> 00:47.840 +ça soulève + +00:47.840 --> 00:48.020 +une + +00:48.020 --> 00:48.260 +autre + +00:48.260 --> 00:48.820 +interrogation. + +00:49.220 --> 00:49.620 +Est-ce + +00:49.620 --> 00:49.820 +que le + +00:49.820 --> 00:49.980 +fait + +00:49.980 --> 00:50.140 +que + +00:50.140 --> 00:50.320 +cet + +00:50.320 --> 00:50.660 +objet + +00:50.660 --> 00:51.120 +soit + +00:51.120 --> 00:51.800 +inédit + +00:51.800 --> 00:52.320 +induit + +00:52.320 --> 00:52.420 +que + +00:52.420 --> 00:52.720 +notre + +00:52.720 --> 00:53.280 +rapport + +00:53.280 --> 00:53.660 +à lui + +00:53.660 --> 00:54.020 +est + +00:54.020 --> 00:54.540 +aussi + +00:54.540 --> 00:54.700 +un + +00:54.700 --> 00:55.000 +rapport + +00:55.000 --> 00:55.460 +inédit? + +00:55.460 --> 00:55.900 +Je + +00:55.900 --> 00:56.000 +veux + +00:56.000 --> 00:56.240 +dire, + +00:56.240 --> 00:56.360 +est-ce + +00:56.360 --> 00:56.580 +que le + +00:56.580 --> 00:56.880 +rapport + +00:56.880 --> 00:57.040 +qu'on + +00:57.040 --> 00:57.280 +a au + +00:57.280 --> 00:57.600 +smartphone + +00:57.600 --> 00:57.920 +est + +00:57.920 --> 00:58.240 +comparable + +00:58.240 --> 00:58.480 +à + +00:58.480 --> 00:58.660 +celui + +00:58.660 --> 00:58.900 +qu'on + +00:58.900 --> 00:59.320 +entretenait + +00:59.320 --> 00:59.460 +à + +00:59.460 --> 00:59.700 +d'autres + +00:59.700 --> 00:59.960 +objets + +00:59.960 --> 01:00.460 +techniques + +01:00.460 --> 01:00.880 +comme + +01:00.880 --> 01:01.500 +la + +01:01.500 --> 01:02.060 +voiture + +01:02.060 --> 01:02.680 +ou le + +01:02.680 --> 01:03.120 +téléphone? + +01:03.360 --> 01:05.480 +Il n'y + +01:05.480 --> 01:05.660 +a pas + +01:05.660 --> 01:06.220 +d'équivalent + +01:06.220 --> 01:06.420 +en + +01:06.420 --> 01:06.660 +fait. + +01:06.880 --> 01:06.980 +Et + +01:06.980 --> 01:07.080 +donc + +01:07.080 --> 01:07.280 +cette + +01:07.280 --> 01:07.540 +espèce + +01:07.540 --> 01:07.680 +de + +01:07.680 --> 01:08.480 +nouveauté + +01:08.480 --> 01:08.660 +dans + +01:08.660 --> 01:08.940 +la + +01:08.940 --> 01:09.220 +relation + +01:09.220 --> 01:09.380 +à + +01:09.380 --> 01:10.240 +l'objet, + +01:10.240 --> 01:10.380 +c'est + +01:10.380 --> 01:10.640 +fascinant + +01:10.640 --> 01:10.760 +et + +01:10.760 --> 01:11.520 +terrifiant. + +01:11.620 --> 01:11.860 +Parce + +01:11.860 --> 01:12.120 +qu'on + +01:12.120 --> 01:12.440 +a + +01:12.440 --> 01:13.560 +l'impression, + +01:13.560 --> 01:13.840 +comme + +01:13.840 --> 01:14.000 +le + +01:14.000 --> 01:14.200 +disent + +01:14.200 --> 01:14.400 +les + +01:14.400 --> 01:14.840 +utilisateurs + +01:14.840 --> 01:15.000 +et les + +01:15.000 --> 01:15.220 +services, + +01:15.220 --> 01:15.420 +d'être + +01:15.420 --> 01:15.960 +dépendants + +01:15.960 --> 01:16.260 +de cet + +01:16.260 --> 01:16.480 +objet, + +01:16.860 --> 01:17.080 +d'induire + +01:17.080 --> 01:17.240 +en + +01:17.240 --> 01:17.340 +fait + +01:17.340 --> 01:17.520 +une + +01:17.520 --> 01:17.880 +espèce + +01:17.880 --> 01:18.480 +de + +01:18.480 --> 01:18.600 +relation, + +01:18.600 --> 01:18.940 +de + +01:18.940 --> 01:19.520 +médiation + +01:19.520 --> 01:19.740 +avec + +01:19.740 --> 01:19.920 +le + +01:19.920 --> 01:20.640 +monde + +01:20.640 --> 01:21.100 +qui + +01:21.100 --> 01:21.640 +rend + +01:21.640 --> 01:21.780 +de + +01:21.780 --> 01:22.020 +l'ampleur + +01:22.020 --> 01:22.240 +et qui + +01:22.240 --> 01:22.360 +amène + +01:22.360 --> 01:22.560 +aussi + +01:22.560 --> 01:22.720 +à des + +01:22.720 --> 01:22.900 +formes + +01:22.900 --> 01:23.020 +de + +01:23.020 --> 01:23.260 +rejet. + +01:23.940 --> 01:24.940 +Donc, + +01:24.940 --> 01:24.980 +à + +01:24.980 --> 01:25.360 +objet + +01:25.360 --> 01:26.560 +inédit, + +01:26.560 --> 01:27.000 +rapport + +01:27.000 --> 01:27.800 +inédit. + +01:28.020 --> 01:28.860 +Et ce + +01:28.860 --> 01:29.280 +rapport, + +01:29.280 --> 01:29.560 +si + +01:29.560 --> 01:29.840 +j'en + +01:29.840 --> 01:29.880 +crois + +01:29.880 --> 01:30.540 +Nicolas, + +01:30.540 --> 01:30.940 +serait + +01:30.940 --> 01:31.800 +caractérisé + +01:31.800 --> 01:32.440 +par un + +01:32.440 --> 01:32.980 +mélange + +01:32.980 --> 01:33.400 +de + +01:33.400 --> 01:34.240 +dépendance + +01:34.240 --> 01:34.680 +et de + +01:34.680 --> 01:35.140 +rejet. + +01:35.780 --> 01:36.380 +Bon, + +01:36.380 --> 01:36.520 +en + +01:36.520 --> 01:37.140 +vrai, + +01:37.140 --> 01:37.180 +il + +01:37.180 --> 01:37.580 +faudrait + +01:37.580 --> 01:38.080 +remonter + +01:38.080 --> 01:38.580 +très + +01:38.580 --> 01:38.700 +très + +01:38.700 --> 01:39.320 +finement + +01:39.320 --> 01:39.700 +toute + +01:39.700 --> 01:40.060 +l'histoire + +01:40.060 --> 01:40.240 +des + +01:40.240 --> 01:40.480 +objets + +01:40.480 --> 01:41.020 +techniques + +01:41.020 --> 01:41.680 +et de + +01:41.680 --> 01:41.840 +leur + +01:41.840 --> 01:42.320 +insertion + +01:42.320 --> 01:42.480 +dans + +01:42.480 --> 01:42.660 +nos + +01:42.660 --> 01:42.860 +vies + +01:42.900 --> 01:43.060 +pour + +01:43.060 --> 01:43.660 +déterminer + +01:43.660 --> 01:43.940 +si ce + +01:43.940 --> 01:44.260 +rapport + +01:44.260 --> 01:44.740 +est + +01:44.740 --> 01:45.300 +totalement + +01:45.300 --> 01:45.740 +inédit. + +01:46.100 --> 01:46.360 +Mais + +01:46.360 --> 01:46.920 +j'ai + +01:46.920 --> 01:47.360 +l'impression + +01:47.360 --> 01:47.560 +comme + +01:47.560 --> 01:47.960 +ça que + +01:47.960 --> 01:48.460 +Nicolas + +01:48.460 --> 01:48.700 +ne se + +01:48.700 --> 01:48.880 +trompe + +01:48.880 --> 01:49.080 +pas + +01:49.080 --> 01:49.340 +vraiment. + +01:49.880 --> 01:50.080 +Pour + +01:50.080 --> 01:50.240 +autant + +01:50.240 --> 01:50.520 +que je + +01:50.520 --> 01:51.140 +sache, + +01:51.140 --> 01:51.360 +il y a + +01:51.360 --> 01:51.680 +eu + +01:51.680 --> 01:51.880 +plein + +01:51.880 --> 01:52.060 +de + +01:52.060 --> 01:52.600 +discussions + +01:52.600 --> 01:52.940 +autour + +01:52.940 --> 01:53.520 +de la + +01:53.520 --> 01:53.860 +voiture + +01:53.860 --> 01:54.060 +ou + +01:54.060 --> 01:54.440 +même + +01:54.440 --> 01:54.600 +du + +01:54.600 --> 01:54.980 +téléphone. + +01:55.340 --> 01:55.720 +Mais + +01:55.720 --> 01:56.020 +la + +01:56.020 --> 01:56.400 +dépendance + +01:56.400 --> 01:56.620 +n'était + +01:56.620 --> 01:57.160 +pas du + +01:57.160 --> 01:57.460 +même + +01:57.460 --> 01:57.780 +ordre. + +01:57.780 --> 01:57.980 +Donc + +01:57.980 --> 01:58.340 +le + +01:58.340 --> 01:58.620 +rejet + +01:58.620 --> 01:58.780 +non + +01:58.780 --> 01:58.940 +plus + +01:58.940 --> 01:59.120 +n'était + +01:59.120 --> 01:59.380 +pas du + +01:59.380 --> 01:59.560 +même + +01:59.560 --> 01:59.840 +ordre. + +01:59.980 --> 02:00.180 +On + +02:00.180 --> 02:00.380 +peut + +02:00.380 --> 02:00.660 +adorer + +02:00.660 --> 02:00.880 +sa + +02:00.880 --> 02:01.460 +bagnole, + +02:01.460 --> 02:01.560 +en + +02:01.560 --> 02:01.740 +avoir + +02:01.740 --> 02:02.100 +besoin + +02:02.100 --> 02:02.340 +pour + +02:02.340 --> 02:02.680 +plein + +02:02.680 --> 02:02.800 +de + +02:02.800 --> 02:03.020 +choses. + +02:03.280 --> 02:03.460 +Et + +02:03.460 --> 02:03.860 +bien, + +02:03.860 --> 02:03.980 +le + +02:03.980 --> 02:04.680 +soir, + +02:04.680 --> 02:04.900 +quand + +02:04.900 --> 02:05.140 +on va + +02:05.140 --> 02:05.380 +se + +02:05.380 --> 02:05.800 +coucher, + +02:05.800 --> 02:06.220 +on la + +02:06.220 --> 02:06.360 +laisse. + +02:06.980 --> 02:07.360 +On ne + +02:07.360 --> 02:07.480 +l'a + +02:07.480 --> 02:07.680 +pas + +02:07.680 --> 02:07.800 +dans + +02:07.800 --> 02:08.060 +la + +02:08.060 --> 02:08.260 +main + +02:08.260 --> 02:08.440 +quand + +02:08.440 --> 02:08.680 +on est + +02:08.680 --> 02:09.040 +au + +02:09.040 --> 02:09.140 +lit, + +02:09.140 --> 02:09.300 +on ne + +02:09.300 --> 02:09.500 +l'emmène + +02:09.500 --> 02:09.860 +pas au + +02:09.860 --> 02:10.480 +chiottes. + +02:10.860 --> 02:11.040 +On + +02:11.040 --> 02:11.280 +pouvait + +02:11.280 --> 02:11.480 +être + +02:11.480 --> 02:12.220 +énervé + +02:12.220 --> 02:12.440 +par + +02:12.440 --> 02:12.700 +son + +02:12.700 --> 02:13.100 +môme + +02:13.100 --> 02:13.340 +qui + +02:13.340 --> 02:13.760 +occupait + +02:13.760 --> 02:13.800 +la + +02:13.800 --> 02:14.080 +ligne + +02:14.080 --> 02:14.140 +de + +02:14.140 --> 02:14.600 +téléphone + +02:14.600 --> 02:14.820 +pendant + +02:14.820 --> 02:15.200 +une + +02:15.200 --> 02:15.360 +heure + +02:15.360 --> 02:15.540 +chaque + +02:15.540 --> 02:15.800 +soir + +02:15.800 --> 02:15.960 +pour + +02:15.960 --> 02:16.280 +discuter + +02:16.280 --> 02:16.480 +avec + +02:16.480 --> 02:16.600 +un + +02:16.600 --> 02:16.900 +copain. + +02:17.280 --> 02:17.460 +Mais + +02:17.460 --> 02:17.940 +ça ne + +02:17.940 --> 02:18.400 +ressemblait + +02:18.400 --> 02:18.940 +pas à + +02:18.940 --> 02:18.980 +ce + +02:18.980 --> 02:19.100 +qu'on + +02:19.100 --> 02:19.480 +peut + +02:19.480 --> 02:20.120 +ressentir + +02:20.120 --> 02:20.460 +à voir + +02:20.460 --> 02:20.680 +ce + +02:20.680 --> 02:20.920 +même + +02:20.920 --> 02:21.340 +môme + +02:21.340 --> 02:21.880 +aujourd'hui, + +02:22.140 --> 02:22.940 +continuellement + +02:22.940 --> 02:23.180 +avec + +02:23.180 --> 02:23.380 +son + +02:23.380 --> 02:23.760 +smartphone + +02:23.760 --> 02:23.940 +dans + +02:23.940 --> 02:24.000 +la + +02:24.000 --> 02:24.360 +main, + +02:24.360 --> 02:24.520 +comme + +02:24.520 --> 02:24.640 +si + +02:24.640 --> 02:24.820 +c'était + +02:24.820 --> 02:25.060 +une + +02:25.060 --> 02:25.220 +sorte + +02:25.220 --> 02:25.300 +de + +02:25.300 --> 02:25.820 +pacemaker + +02:25.820 --> 02:26.300 +externe, + +02:26.340 --> 02:26.580 +comme + +02:26.580 --> 02:26.860 +si le + +02:26.860 --> 02:27.360 +lâcher + +02:27.360 --> 02:27.560 +allait + +02:27.560 --> 02:27.860 +entraîner + +02:27.860 --> 02:28.000 +sa + +02:28.000 --> 02:28.220 +mort + +02:28.220 --> 02:28.840 +immédiate. + +02:29.040 --> 02:29.280 +Bon, + +02:29.280 --> 02:29.460 +je dis + +02:29.460 --> 02:29.640 +ça + +02:29.640 --> 02:29.740 +pour + +02:29.740 --> 02:29.880 +le + +02:29.880 --> 02:30.320 +môme, + +02:30.320 --> 02:30.520 +mais + +02:30.520 --> 02:30.820 +c'est + +02:30.820 --> 02:31.140 +évidemment + +02:31.140 --> 02:31.480 +valable + +02:31.480 --> 02:31.620 +pour + +02:31.620 --> 02:31.760 +nous + +02:31.760 --> 02:31.960 +aussi. + +02:32.340 --> 02:33.460 +Donc, + +02:33.460 --> 02:33.660 +rapport + +02:33.660 --> 02:34.240 +inédit. + +02:34.240 --> 02:35.480 +D'accord. + +02:35.480 --> 02:35.820 +Mais + +02:35.820 --> 02:36.320 +pourquoi + +02:36.320 --> 02:36.680 +a-t-on + +02:36.680 --> 02:37.060 +l'impression + +02:37.060 --> 02:37.260 +qu'on + +02:37.260 --> 02:37.440 +n'en + +02:37.440 --> 02:37.900 +sortira + +02:37.900 --> 02:38.220 +jamais? + +02:38.860 --> 02:39.340 +Est-ce + +02:39.340 --> 02:39.460 +qu'il + +02:39.460 --> 02:39.620 +faut + +02:39.620 --> 02:39.780 +en + +02:39.780 --> 02:40.120 +remettre + +02:40.120 --> 02:40.340 +la + +02:40.340 --> 02:40.660 +faute + +02:40.660 --> 02:40.940 +sur + +02:40.940 --> 02:41.280 +les + +02:41.280 --> 02:41.440 +gens + +02:41.440 --> 02:41.580 +qui + +02:41.580 --> 02:41.720 +ont + +02:41.720 --> 02:42.300 +créé + +02:42.300 --> 02:42.460 +cet + +02:42.460 --> 02:42.780 +outil + +02:42.780 --> 02:43.340 +merveilleux + +02:43.340 --> 02:43.500 +et + +02:43.500 --> 02:43.860 +diabolique, + +02:43.860 --> 02:43.920 +et + +02:43.920 --> 02:44.400 +diabolique + +02:44.400 --> 02:44.660 +parce + +02:44.660 --> 02:44.840 +que + +02:44.840 --> 02:45.320 +merveilleux? + +02:46.340 --> 02:47.040 +Les + +02:47.040 --> 02:47.480 +économistes + +02:47.480 --> 02:47.680 +parlent + +02:47.680 --> 02:47.820 +de + +02:47.820 --> 02:48.360 +dépendance + +02:48.360 --> 02:48.500 +du + +02:48.500 --> 02:48.820 +sentier. + +02:48.860 --> 02:49.100 +C'est + +02:49.100 --> 02:49.340 +l'idée + +02:49.340 --> 02:49.700 +qu'on + +02:49.700 --> 02:49.880 +est + +02:49.880 --> 02:50.540 +sur un + +02:50.540 --> 02:50.780 +sentier + +02:50.780 --> 02:50.940 +qui a + +02:50.940 --> 02:51.120 +été + +02:51.120 --> 02:51.900 +établi, + +02:51.900 --> 02:52.120 +soit + +02:52.120 --> 02:52.720 +volontairement + +02:52.720 --> 02:52.800 +en + +02:52.800 --> 02:53.060 +marchant + +02:53.060 --> 02:54.240 +dessus, + +02:54.240 --> 02:54.920 +soit + +02:54.920 --> 02:55.360 +en + +02:55.360 --> 02:55.500 +définissant + +02:55.500 --> 02:55.760 +des + +02:55.760 --> 02:56.040 +bornes, + +02:56.040 --> 02:56.080 +en + +02:56.080 --> 02:56.580 +définissant + +02:56.580 --> 02:56.820 +une + +02:56.820 --> 02:57.420 +signalétique. + diff --git a/tests/expected/tiny.en.cpu/nocond_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en.cpu/nocond_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..be3e848ef767849ee49d451215e9eb018ec0f049 --- /dev/null +++ b/tests/expected/tiny.en.cpu/nocond_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,204 @@ +{ + "text": " Mohoo! Let's go with it again! Mohoo! Mohoo! Mohoo! Let's go with it again!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 2.98, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.8020243644714355, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.11818066984415054, + "confidence": 0.496, + "words": [ + { + "text": "Mohoo!", + "start": 0.44, + "end": 1.72, + "confidence": 0.271 + }, + { + "text": "Let's", + "start": 1.72, + "end": 2.04, + "confidence": 0.678 + }, + { + "text": "go", + "start": 2.04, + "end": 2.18, + "confidence": 0.914 + }, + { + "text": "with", + "start": 2.18, + "end": 2.38, + "confidence": 0.234 + }, + { + "text": "it", + "start": 2.38, + "end": 2.44, + "confidence": 0.933 + }, + { + "text": "again!", + "start": 2.44, + "end": 2.98, + "confidence": 0.996 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 30.06, + "end": 31.25, + "text": " Mohoo!", + "tokens": [ + 337, + 1219, + 2238, + 0 + ], + "temperature": 0.0, + "avg_logprob": -0.8320662379264832, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.15341022610664368, + "confidence": 0.308, + "words": [ + { + "text": "Mohoo!", + "start": 30.06, + "end": 31.25, + "confidence": 0.308 + } + ] + }, + { + "id": 2, + "seek": 3100, + "start": 31.25, + "end": 32.48, + "text": " Mohoo!", + "tokens": [ + 337, + 1219, + 2238, + 0 + ], + "temperature": 0.0, + "avg_logprob": -0.767271101474762, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.1705959141254425, + "confidence": 0.329, + "words": [ + { + "text": "Mohoo!", + "start": 31.25, + "end": 32.48, + "confidence": 0.329 + } + ] + }, + { + "id": 3, + "seek": 3200, + "start": 32.98, + "end": 33.8, + "text": " Mohoo!", + "tokens": [ + 337, + 1219, + 2238, + 0 + ], + "temperature": 0.0, + "avg_logprob": -0.5514491200447083, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.1296440064907074, + "confidence": 0.338, + "words": [ + { + "text": "Mohoo!", + "start": 32.98, + "end": 33.8, + "confidence": 0.338 + } + ] + }, + { + "id": 4, + "seek": 3400, + "start": 34.4, + "end": 35.48, + "text": " Let's go with it again!", + "tokens": [ + 50363, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50463 + ], + "temperature": 0.0, + "avg_logprob": -0.3764993667602539, + "compression_ratio": 0.7419354838709677, + "no_speech_prob": 0.05320969223976135, + "confidence": 0.815, + "words": [ + { + "text": "Let's", + "start": 34.4, + "end": 34.58, + "confidence": 0.862 + }, + { + "text": "go", + "start": 34.58, + "end": 34.76, + "confidence": 0.923 + }, + { + "text": "with", + "start": 34.76, + "end": 34.96, + "confidence": 0.627 + }, + { + "text": "it", + "start": 34.96, + "end": 35.0, + "confidence": 0.686 + }, + { + "text": "again!", + "start": 35.0, + "end": 35.48, + "confidence": 0.992 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny.en/accurate_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en/accurate_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..01ea635f651d61c29bbb25c54b17aa4d20353f18 --- /dev/null +++ b/tests/expected/tiny.en/accurate_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,134 @@ +{ + "text": " Mohoo, let's go with it again Mohoo, let's go with it again", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.84, + "text": " Mohoo, let's go with it again", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 11, + 1309, + 338, + 467, + 351, + 340, + 757, + 50501 + ], + "temperature": 0.0, + "avg_logprob": -0.769197390629695, + "compression_ratio": 0.7837837837837838, + "no_speech_prob": 0.11794308573007584, + "confidence": 0.485, + "words": [ + { + "text": "Mohoo,", + "start": 0.42, + "end": 1.38, + "confidence": 0.245 + }, + { + "text": "let's", + "start": 1.92, + "end": 2.2, + "confidence": 0.91 + }, + { + "text": "go", + "start": 2.2, + "end": 2.3, + "confidence": 0.908 + }, + { + "text": "with", + "start": 2.3, + "end": 2.46, + "confidence": 0.286 + }, + { + "text": "it", + "start": 2.46, + "end": 2.56, + "confidence": 0.939 + }, + { + "text": "again", + "start": 2.56, + "end": 2.84, + "confidence": 0.997 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.86, + "end": 35.42, + "text": " Mohoo, let's go with it again", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 11, + 1309, + 338, + 467, + 351, + 340, + 757, + 50627 + ], + "temperature": 0.0, + "avg_logprob": -0.41327399473923904, + "compression_ratio": 0.7837837837837838, + "no_speech_prob": 0.348534494638443, + "confidence": 0.544, + "words": [ + { + "text": "Mohoo,", + "start": 32.86, + "end": 34.02, + "confidence": 0.277 + }, + { + "text": "let's", + "start": 34.46, + "end": 34.74, + "confidence": 0.89 + }, + { + "text": "go", + "start": 34.74, + "end": 34.86, + "confidence": 0.935 + }, + { + "text": "with", + "start": 34.86, + "end": 34.98, + "confidence": 0.658 + }, + { + "text": "it", + "start": 34.98, + "end": 35.1, + "confidence": 0.79 + }, + { + "text": "again", + "start": 35.1, + "end": 35.42, + "confidence": 0.997 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny.en/efficient_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en/efficient_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..376d482cb2205487c5b2c31e0b9020b2b0134047 --- /dev/null +++ b/tests/expected/tiny.en/efficient_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,136 @@ +{ + "text": " Mohoo! Let's go with it again! Mohoo! Let's go with it again!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.8, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.8020608765738351, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.11798259615898132, + "confidence": 0.496, + "words": [ + { + "text": "Mohoo!", + "start": 0.42, + "end": 1.36, + "confidence": 0.271 + }, + { + "text": "Let's", + "start": 1.9, + "end": 2.18, + "confidence": 0.677 + }, + { + "text": "go", + "start": 2.18, + "end": 2.32, + "confidence": 0.914 + }, + { + "text": "with", + "start": 2.32, + "end": 2.46, + "confidence": 0.233 + }, + { + "text": "it", + "start": 2.46, + "end": 2.56, + "confidence": 0.933 + }, + { + "text": "again!", + "start": 2.56, + "end": 2.8, + "confidence": 0.996 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.98, + "end": 35.44, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50663 + ], + "temperature": 0.0, + "avg_logprob": -0.19375058582850865, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.45197251439094543, + "confidence": 0.952, + "words": [ + { + "text": "Mohoo!", + "start": 32.98, + "end": 34.02, + "confidence": 0.896 + }, + { + "text": "Let's", + "start": 34.44, + "end": 34.72, + "confidence": 0.96 + }, + { + "text": "go", + "start": 34.72, + "end": 34.84, + "confidence": 0.99 + }, + { + "text": "with", + "start": 34.84, + "end": 35.0, + "confidence": 0.985 + }, + { + "text": "it", + "start": 35.0, + "end": 35.1, + "confidence": 0.991 + }, + { + "text": "again!", + "start": 35.1, + "end": 35.44, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny.en/nocond_bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny.en/nocond_bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..376d482cb2205487c5b2c31e0b9020b2b0134047 --- /dev/null +++ b/tests/expected/tiny.en/nocond_bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,136 @@ +{ + "text": " Mohoo! Let's go with it again! Mohoo! Let's go with it again!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.8, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50513 + ], + "temperature": 0.0, + "avg_logprob": -0.8020608765738351, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.11798259615898132, + "confidence": 0.496, + "words": [ + { + "text": "Mohoo!", + "start": 0.42, + "end": 1.36, + "confidence": 0.271 + }, + { + "text": "Let's", + "start": 1.9, + "end": 2.18, + "confidence": 0.677 + }, + { + "text": "go", + "start": 2.18, + "end": 2.32, + "confidence": 0.914 + }, + { + "text": "with", + "start": 2.32, + "end": 2.46, + "confidence": 0.233 + }, + { + "text": "it", + "start": 2.46, + "end": 2.56, + "confidence": 0.933 + }, + { + "text": "again!", + "start": 2.56, + "end": 2.8, + "confidence": 0.996 + } + ] + }, + { + "id": 1, + "seek": 3000, + "start": 32.98, + "end": 35.44, + "text": " Mohoo! Let's go with it again!", + "tokens": [ + 50363, + 337, + 1219, + 2238, + 0, + 3914, + 338, + 467, + 351, + 340, + 757, + 0, + 50663 + ], + "temperature": 0.0, + "avg_logprob": -0.19375058582850865, + "compression_ratio": 0.7894736842105263, + "no_speech_prob": 0.45197251439094543, + "confidence": 0.952, + "words": [ + { + "text": "Mohoo!", + "start": 32.98, + "end": 34.02, + "confidence": 0.896 + }, + { + "text": "Let's", + "start": 34.44, + "end": 34.72, + "confidence": 0.96 + }, + { + "text": "go", + "start": 34.72, + "end": 34.84, + "confidence": 0.99 + }, + { + "text": "with", + "start": 34.84, + "end": 35.0, + "confidence": 0.985 + }, + { + "text": "it", + "start": 35.0, + "end": 35.1, + "confidence": 0.991 + }, + { + "text": "again!", + "start": 35.1, + "end": 35.44, + "confidence": 0.998 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto.cpu/gaenswein15.mp3.words.json b/tests/expected/tiny_auto.cpu/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..2fa1b2d83ea55923062a1564d4750ee40a08e1b9 --- /dev/null +++ b/tests/expected/tiny_auto.cpu/gaenswein15.mp3.words.json @@ -0,0 +1,318 @@ +{ + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen Rätos ist dann nicht so weitergegangen wie sich Papsbelle dick das gewünscht hatte. Das hat er als Meshale im Rätos.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.9, + "end": 12.76, + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen Rätos ist dann nicht so weitergegangen wie sich Papsbelle dick das gewünscht hatte.", + "tokens": [ + 9233, + 6216, + 2164, + 13034, + 1418, + 376, + 14935, + 69, + 37503, + 2957, + 39498, + 3907, + 376, + 14935, + 1220, + 2959, + 978, + 39428, + 765, + 7698, + 68, + 10126, + 730, + 677, + 858, + 6282, + 497, + 3628, + 329, + 1418, + 3594, + 1979, + 370, + 8988, + 432, + 47152, + 3355, + 3041, + 430, + 2382, + 65, + 4434, + 18659, + 1482, + 6906, + 3412, + 82, + 4701, + 13299, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6555103008563702, + "compression_ratio": 1.2619047619047619, + "no_speech_prob": 0.11051499098539352, + "confidence": 0.541, + "words": [ + { + "text": "Wie", + "start": 0.9, + "end": 1.06, + "confidence": 0.478 + }, + { + "text": "wieder", + "start": 1.06, + "end": 1.32, + "confidence": 0.901 + }, + { + "text": "zu", + "start": 1.32, + "end": 1.54, + "confidence": 0.249 + }, + { + "text": "dazu", + "start": 1.54, + "end": 1.86, + "confidence": 0.131 + }, + { + "text": "ist", + "start": 1.86, + "end": 2.12, + "confidence": 0.586 + }, + { + "text": "Meshfuchs", + "start": 2.12, + "end": 2.72, + "confidence": 0.332 + }, + { + "text": "von", + "start": 2.72, + "end": 2.94, + "confidence": 0.626 + }, + { + "text": "1962", + "start": 2.94, + "end": 4.72, + "confidence": 0.208 + }, + { + "text": "als", + "start": 4.72, + "end": 5.22, + "confidence": 0.885 + }, + { + "text": "Meshale", + "start": 5.22, + "end": 5.7, + "confidence": 0.385 + }, + { + "text": "für", + "start": 5.7, + "end": 5.9, + "confidence": 0.933 + }, + { + "text": "die", + "start": 5.9, + "end": 6.08, + "confidence": 0.972 + }, + { + "text": "außerordentliche", + "start": 6.08, + "end": 6.98, + "confidence": 0.899 + }, + { + "text": "Form", + "start": 6.98, + "end": 7.22, + "confidence": 0.655 + }, + { + "text": "des", + "start": 7.22, + "end": 7.58, + "confidence": 0.971 + }, + { + "text": "grullischen", + "start": 7.58, + "end": 8.04, + "confidence": 0.384 + }, + { + "text": "Rätos", + "start": 8.04, + "end": 8.58, + "confidence": 0.368 + }, + { + "text": "ist", + "start": 8.58, + "end": 9.6, + "confidence": 0.549 + }, + { + "text": "dann", + "start": 9.6, + "end": 9.74, + "confidence": 0.5 + }, + { + "text": "nicht", + "start": 9.74, + "end": 9.92, + "confidence": 0.936 + }, + { + "text": "so", + "start": 9.92, + "end": 10.08, + "confidence": 0.953 + }, + { + "text": "weitergegangen", + "start": 10.08, + "end": 10.86, + "confidence": 0.74 + }, + { + "text": "wie", + "start": 10.86, + "end": 11.04, + "confidence": 0.703 + }, + { + "text": "sich", + "start": 11.04, + "end": 11.22, + "confidence": 0.939 + }, + { + "text": "Papsbelle", + "start": 11.22, + "end": 11.62, + "confidence": 0.356 + }, + { + "text": "dick", + "start": 11.62, + "end": 11.84, + "confidence": 0.319 + }, + { + "text": "das", + "start": 11.84, + "end": 12.08, + "confidence": 0.844 + }, + { + "text": "gewünscht", + "start": 12.08, + "end": 12.48, + "confidence": 0.818 + }, + { + "text": "hatte.", + "start": 12.48, + "end": 12.76, + "confidence": 0.922 + } + ] + }, + { + "id": 1, + "seek": 1300, + "start": 13.98, + "end": 15.22, + "text": " Das hat er als Meshale im Rätos.", + "tokens": [ + 50364, + 2846, + 2385, + 1189, + 3907, + 376, + 14935, + 1220, + 566, + 497, + 3628, + 329, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.9505692799886067, + "compression_ratio": 0.8048780487804879, + "no_speech_prob": 0.044665463268756866, + "confidence": 0.381, + "words": [ + { + "text": "Das", + "start": 13.98, + "end": 14.2, + "confidence": 0.841 + }, + { + "text": "hat", + "start": 14.2, + "end": 14.32, + "confidence": 0.878 + }, + { + "text": "er", + "start": 14.32, + "end": 14.46, + "confidence": 0.606 + }, + { + "text": "als", + "start": 14.46, + "end": 14.7, + "confidence": 0.626 + }, + { + "text": "Meshale", + "start": 14.7, + "end": 15.0, + "confidence": 0.206 + }, + { + "text": "im", + "start": 15.0, + "end": 15.04, + "confidence": 0.101 + }, + { + "text": "Rätos.", + "start": 15.04, + "end": 15.22, + "confidence": 0.464 + } + ] + } + ], + "language": "de" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto.cpu/radio_short.mp3.words.json b/tests/expected/tiny_auto.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f2388cb2c74572247a52a55a46815f03a82a84a0 --- /dev/null +++ b/tests/expected/tiny_auto.cpu/radio_short.mp3.words.json @@ -0,0 +1,3786 @@ +{ + "text": " What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, guys? What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.06, + "end": 5.92, + "text": " What are you telling me, guys?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.343, + "words": [ + { + "text": "What", + "start": 0.06, + "end": 4.9, + "confidence": 0.041 + }, + { + "text": "are", + "start": 4.9, + "end": 5.18, + "confidence": 0.603 + }, + { + "text": "you", + "start": 5.18, + "end": 5.42, + "confidence": 0.97 + }, + { + "text": "telling", + "start": 5.42, + "end": 5.64, + "confidence": 0.354 + }, + { + "text": "me,", + "start": 5.64, + "end": 5.88, + "confidence": 0.622 + }, + { + "text": "guys?", + "start": 5.88, + "end": 5.92, + "confidence": 0.307 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.5, + "end": 7.5, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.677, + "words": [ + { + "text": "What", + "start": 6.5, + "end": 6.92, + "confidence": 0.885 + }, + { + "text": "are", + "start": 6.92, + "end": 6.96, + "confidence": 0.936 + }, + { + "text": "you", + "start": 6.96, + "end": 7.02, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 7.02, + "end": 7.24, + "confidence": 0.904 + }, + { + "text": "me,", + "start": 7.24, + "end": 7.46, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 7.46, + "end": 7.5, + "confidence": 0.131 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 7.52, + "end": 8.28, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.827, + "words": [ + { + "text": "What", + "start": 7.52, + "end": 7.56, + "confidence": 0.545 + }, + { + "text": "are", + "start": 7.56, + "end": 7.6, + "confidence": 0.851 + }, + { + "text": "you", + "start": 7.6, + "end": 7.64, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 7.64, + "end": 7.68, + "confidence": 0.849 + }, + { + "text": "me,", + "start": 7.68, + "end": 8.16, + "confidence": 0.951 + }, + { + "text": "dude?", + "start": 8.16, + "end": 8.28, + "confidence": 0.857 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 10.46, + "end": 12.48, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.774, + "words": [ + { + "text": "What", + "start": 10.46, + "end": 10.54, + "confidence": 0.422 + }, + { + "text": "are", + "start": 10.54, + "end": 10.6, + "confidence": 0.79 + }, + { + "text": "you", + "start": 10.6, + "end": 11.38, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 11.38, + "end": 11.66, + "confidence": 0.809 + }, + { + "text": "me,", + "start": 11.66, + "end": 12.44, + "confidence": 0.956 + }, + { + "text": "dude?", + "start": 12.44, + "end": 12.48, + "confidence": 0.837 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 12.48, + "end": 14.46, + "text": " What are you telling me, guys?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.762, + "words": [ + { + "text": "What", + "start": 12.48, + "end": 12.52, + "confidence": 0.504 + }, + { + "text": "are", + "start": 12.52, + "end": 12.56, + "confidence": 0.845 + }, + { + "text": "you", + "start": 12.56, + "end": 12.6, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 12.6, + "end": 12.64, + "confidence": 0.844 + }, + { + "text": "me,", + "start": 12.64, + "end": 13.18, + "confidence": 0.962 + }, + { + "text": "guys?", + "start": 13.18, + "end": 14.46, + "confidence": 0.57 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 16.5, + "end": 21.3, + "text": " What are you telling me, guys?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.936, + "words": [ + { + "text": "What", + "start": 16.5, + "end": 20.42, + "confidence": 0.887 + }, + { + "text": "are", + "start": 20.42, + "end": 20.54, + "confidence": 0.936 + }, + { + "text": "you", + "start": 20.54, + "end": 20.7, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 20.7, + "end": 20.94, + "confidence": 0.934 + }, + { + "text": "me,", + "start": 20.94, + "end": 21.26, + "confidence": 0.897 + }, + { + "text": "guys?", + "start": 21.26, + "end": 21.3, + "confidence": 0.969 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 21.5, + "end": 22.78, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.953, + "words": [ + { + "text": "What", + "start": 21.5, + "end": 22.2, + "confidence": 0.951 + }, + { + "text": "are", + "start": 22.2, + "end": 22.24, + "confidence": 0.966 + }, + { + "text": "you", + "start": 22.24, + "end": 22.36, + "confidence": 0.997 + }, + { + "text": "telling", + "start": 22.36, + "end": 22.5, + "confidence": 0.862 + }, + { + "text": "me,", + "start": 22.5, + "end": 22.7, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 22.7, + "end": 22.78, + "confidence": 0.967 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 22.78, + "end": 23.02, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.4002814409209461, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.856, + "words": [ + { + "text": "What", + "start": 22.78, + "end": 22.82, + "confidence": 0.54 + }, + { + "text": "are", + "start": 22.82, + "end": 22.86, + "confidence": 0.888 + }, + { + "text": "you", + "start": 22.86, + "end": 22.9, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 22.9, + "end": 22.94, + "confidence": 0.882 + }, + { + "text": "me,", + "start": 22.94, + "end": 22.98, + "confidence": 0.975 + }, + { + "text": "dude?", + "start": 22.98, + "end": 23.02, + "confidence": 0.955 + } + ] + }, + { + "id": 8, + "seek": 2600, + "start": 26.02, + "end": 28.74, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.523, + "words": [ + { + "text": "What", + "start": 26.02, + "end": 26.06, + "confidence": 0.145 + }, + { + "text": "are", + "start": 26.06, + "end": 27.16, + "confidence": 0.442 + }, + { + "text": "you", + "start": 27.16, + "end": 28.24, + "confidence": 0.944 + }, + { + "text": "telling", + "start": 28.24, + "end": 28.28, + "confidence": 0.536 + }, + { + "text": "me,", + "start": 28.28, + "end": 28.7, + "confidence": 0.926 + }, + { + "text": "dude?", + "start": 28.7, + "end": 28.74, + "confidence": 0.683 + } + ] + }, + { + "id": 9, + "seek": 2600, + "start": 30.5, + "end": 31.8, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.586, + "words": [ + { + "text": "What", + "start": 30.5, + "end": 31.1, + "confidence": 0.193 + }, + { + "text": "are", + "start": 31.1, + "end": 31.14, + "confidence": 0.532 + }, + { + "text": "you", + "start": 31.14, + "end": 31.46, + "confidence": 0.966 + }, + { + "text": "telling", + "start": 31.46, + "end": 31.5, + "confidence": 0.588 + }, + { + "text": "me,", + "start": 31.5, + "end": 31.68, + "confidence": 0.895 + }, + { + "text": "dude?", + "start": 31.68, + "end": 31.8, + "confidence": 0.776 + } + ] + }, + { + "id": 10, + "seek": 2600, + "start": 31.8, + "end": 32.58, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.664, + "words": [ + { + "text": "What", + "start": 31.8, + "end": 32.26, + "confidence": 0.321 + }, + { + "text": "are", + "start": 32.26, + "end": 32.3, + "confidence": 0.594 + }, + { + "text": "you", + "start": 32.3, + "end": 32.4, + "confidence": 0.972 + }, + { + "text": "telling", + "start": 32.4, + "end": 32.44, + "confidence": 0.609 + }, + { + "text": "me,", + "start": 32.44, + "end": 32.48, + "confidence": 0.891 + }, + { + "text": "dude?", + "start": 32.48, + "end": 32.58, + "confidence": 0.85 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 33.52, + "end": 35.4, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.711, + "words": [ + { + "text": "What", + "start": 33.52, + "end": 33.56, + "confidence": 0.44 + }, + { + "text": "are", + "start": 33.56, + "end": 33.66, + "confidence": 0.602 + }, + { + "text": "you", + "start": 33.66, + "end": 33.74, + "confidence": 0.972 + }, + { + "text": "telling", + "start": 33.74, + "end": 34.84, + "confidence": 0.635 + }, + { + "text": "me,", + "start": 34.84, + "end": 35.36, + "confidence": 0.901 + }, + { + "text": "dude?", + "start": 35.36, + "end": 35.4, + "confidence": 0.88 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 36.24, + "end": 38.1, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.745, + "words": [ + { + "text": "What", + "start": 36.24, + "end": 36.34, + "confidence": 0.474 + }, + { + "text": "are", + "start": 36.34, + "end": 36.38, + "confidence": 0.621 + }, + { + "text": "you", + "start": 36.38, + "end": 37.34, + "confidence": 0.973 + }, + { + "text": "telling", + "start": 37.34, + "end": 37.42, + "confidence": 0.715 + }, + { + "text": "me,", + "start": 37.42, + "end": 38.06, + "confidence": 0.929 + }, + { + "text": "dude?", + "start": 38.06, + "end": 38.1, + "confidence": 0.9 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 38.1, + "end": 38.62, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.787, + "words": [ + { + "text": "What", + "start": 38.1, + "end": 38.14, + "confidence": 0.555 + }, + { + "text": "are", + "start": 38.14, + "end": 38.18, + "confidence": 0.674 + }, + { + "text": "you", + "start": 38.18, + "end": 38.22, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 38.22, + "end": 38.26, + "confidence": 0.751 + }, + { + "text": "me,", + "start": 38.26, + "end": 38.3, + "confidence": 0.936 + }, + { + "text": "dude?", + "start": 38.3, + "end": 38.62, + "confidence": 0.921 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 39.52, + "end": 41.92, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.813, + "words": [ + { + "text": "What", + "start": 39.52, + "end": 40.78, + "confidence": 0.577 + }, + { + "text": "are", + "start": 40.78, + "end": 41.06, + "confidence": 0.737 + }, + { + "text": "you", + "start": 41.06, + "end": 41.14, + "confidence": 0.982 + }, + { + "text": "telling", + "start": 41.14, + "end": 41.18, + "confidence": 0.785 + }, + { + "text": "me,", + "start": 41.18, + "end": 41.86, + "confidence": 0.941 + }, + { + "text": "dude?", + "start": 41.86, + "end": 41.92, + "confidence": 0.936 + } + ] + }, + { + "id": 15, + "seek": 2600, + "start": 41.92, + "end": 42.86, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.834, + "words": [ + { + "text": "What", + "start": 41.92, + "end": 41.96, + "confidence": 0.644 + }, + { + "text": "are", + "start": 41.96, + "end": 42.0, + "confidence": 0.752 + }, + { + "text": "you", + "start": 42.0, + "end": 42.08, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 42.08, + "end": 42.3, + "confidence": 0.788 + }, + { + "text": "me,", + "start": 42.3, + "end": 42.82, + "confidence": 0.953 + }, + { + "text": "dude?", + "start": 42.82, + "end": 42.86, + "confidence": 0.938 + } + ] + }, + { + "id": 16, + "seek": 2600, + "start": 44.5, + "end": 46.46, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.795, + "words": [ + { + "text": "What", + "start": 44.5, + "end": 45.0, + "confidence": 0.502 + }, + { + "text": "are", + "start": 45.0, + "end": 45.64, + "confidence": 0.732 + }, + { + "text": "you", + "start": 45.64, + "end": 45.72, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 45.72, + "end": 45.92, + "confidence": 0.789 + }, + { + "text": "me,", + "start": 45.92, + "end": 46.38, + "confidence": 0.952 + }, + { + "text": "dude?", + "start": 46.38, + "end": 46.46, + "confidence": 0.932 + } + ] + }, + { + "id": 17, + "seek": 2600, + "start": 46.46, + "end": 46.94, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.789, + "words": [ + { + "text": "What", + "start": 46.46, + "end": 46.5, + "confidence": 0.481 + }, + { + "text": "are", + "start": 46.5, + "end": 46.54, + "confidence": 0.742 + }, + { + "text": "you", + "start": 46.54, + "end": 46.58, + "confidence": 0.987 + }, + { + "text": "telling", + "start": 46.58, + "end": 46.62, + "confidence": 0.779 + }, + { + "text": "me,", + "start": 46.62, + "end": 46.66, + "confidence": 0.948 + }, + { + "text": "dude?", + "start": 46.66, + "end": 46.94, + "confidence": 0.932 + } + ] + }, + { + "id": 18, + "seek": 2600, + "start": 47.52, + "end": 48.46, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.792, + "words": [ + { + "text": "What", + "start": 47.52, + "end": 47.56, + "confidence": 0.479 + }, + { + "text": "are", + "start": 47.56, + "end": 48.04, + "confidence": 0.746 + }, + { + "text": "you", + "start": 48.04, + "end": 48.1, + "confidence": 0.987 + }, + { + "text": "telling", + "start": 48.1, + "end": 48.28, + "confidence": 0.785 + }, + { + "text": "me,", + "start": 48.28, + "end": 48.42, + "confidence": 0.95 + }, + { + "text": "dude?", + "start": 48.42, + "end": 48.46, + "confidence": 0.936 + } + ] + }, + { + "id": 19, + "seek": 2600, + "start": 49.52, + "end": 51.38, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.838, + "words": [ + { + "text": "What", + "start": 49.52, + "end": 50.24, + "confidence": 0.651 + }, + { + "text": "are", + "start": 50.24, + "end": 50.48, + "confidence": 0.757 + }, + { + "text": "you", + "start": 50.48, + "end": 50.56, + "confidence": 0.989 + }, + { + "text": "telling", + "start": 50.56, + "end": 50.6, + "confidence": 0.793 + }, + { + "text": "me,", + "start": 50.6, + "end": 51.32, + "confidence": 0.95 + }, + { + "text": "dude?", + "start": 51.32, + "end": 51.38, + "confidence": 0.943 + } + ] + }, + { + "id": 20, + "seek": 2600, + "start": 51.98, + "end": 54.06, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.26995800480698096, + "compression_ratio": 9.136363636363637, + "no_speech_prob": 0.0041488660499453545, + "confidence": 0.78, + "words": [ + { + "text": "What", + "start": 51.98, + "end": 52.14, + "confidence": 0.43 + }, + { + "text": "are", + "start": 52.14, + "end": 52.62, + "confidence": 0.737 + }, + { + "text": "you", + "start": 52.62, + "end": 52.72, + "confidence": 0.988 + }, + { + "text": "telling", + "start": 52.72, + "end": 52.88, + "confidence": 0.81 + }, + { + "text": "me,", + "start": 52.88, + "end": 54.02, + "confidence": 0.943 + }, + { + "text": "dude?", + "start": 54.02, + "end": 54.06, + "confidence": 0.944 + } + ] + }, + { + "id": 21, + "seek": 5400, + "start": 54.06, + "end": 55.0, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.7, + "words": [ + { + "text": "What", + "start": 54.06, + "end": 54.1, + "confidence": 0.506 + }, + { + "text": "are", + "start": 54.1, + "end": 54.2, + "confidence": 0.588 + }, + { + "text": "you", + "start": 54.2, + "end": 54.46, + "confidence": 0.876 + }, + { + "text": "telling", + "start": 54.46, + "end": 54.5, + "confidence": 0.747 + }, + { + "text": "me,", + "start": 54.5, + "end": 54.96, + "confidence": 0.71 + }, + { + "text": "dude?", + "start": 54.96, + "end": 55.0, + "confidence": 0.848 + } + ] + }, + { + "id": 22, + "seek": 5400, + "start": 56.5, + "end": 58.0, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.739, + "words": [ + { + "text": "What", + "start": 56.5, + "end": 56.82, + "confidence": 0.512 + }, + { + "text": "are", + "start": 56.82, + "end": 56.92, + "confidence": 0.568 + }, + { + "text": "you", + "start": 56.92, + "end": 57.12, + "confidence": 0.964 + }, + { + "text": "telling", + "start": 57.12, + "end": 57.16, + "confidence": 0.808 + }, + { + "text": "me,", + "start": 57.16, + "end": 57.96, + "confidence": 0.803 + }, + { + "text": "dude?", + "start": 57.96, + "end": 58.0, + "confidence": 0.897 + } + ] + }, + { + "id": 23, + "seek": 5400, + "start": 58.0, + "end": 58.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.741, + "words": [ + { + "text": "What", + "start": 58.0, + "end": 58.64, + "confidence": 0.48 + }, + { + "text": "are", + "start": 58.64, + "end": 58.68, + "confidence": 0.584 + }, + { + "text": "you", + "start": 58.68, + "end": 58.78, + "confidence": 0.954 + }, + { + "text": "telling", + "start": 58.78, + "end": 58.82, + "confidence": 0.816 + }, + { + "text": "me,", + "start": 58.82, + "end": 58.86, + "confidence": 0.837 + }, + { + "text": "dude?", + "start": 58.86, + "end": 58.9, + "confidence": 0.908 + } + ] + }, + { + "id": 24, + "seek": 5400, + "start": 59.52, + "end": 60.5, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.783, + "words": [ + { + "text": "What", + "start": 59.52, + "end": 60.02, + "confidence": 0.58 + }, + { + "text": "are", + "start": 60.02, + "end": 60.18, + "confidence": 0.641 + }, + { + "text": "you", + "start": 60.18, + "end": 60.22, + "confidence": 0.967 + }, + { + "text": "telling", + "start": 60.22, + "end": 60.26, + "confidence": 0.826 + }, + { + "text": "me,", + "start": 60.26, + "end": 60.38, + "confidence": 0.853 + }, + { + "text": "dude?", + "start": 60.38, + "end": 60.5, + "confidence": 0.911 + } + ] + }, + { + "id": 25, + "seek": 5400, + "start": 61.52, + "end": 62.82, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.803, + "words": [ + { + "text": "What", + "start": 61.52, + "end": 62.34, + "confidence": 0.613 + }, + { + "text": "are", + "start": 62.34, + "end": 62.38, + "confidence": 0.66 + }, + { + "text": "you", + "start": 62.38, + "end": 62.42, + "confidence": 0.967 + }, + { + "text": "telling", + "start": 62.42, + "end": 62.74, + "confidence": 0.835 + }, + { + "text": "me,", + "start": 62.74, + "end": 62.78, + "confidence": 0.892 + }, + { + "text": "dude?", + "start": 62.78, + "end": 62.82, + "confidence": 0.918 + } + ] + }, + { + "id": 26, + "seek": 5400, + "start": 63.52, + "end": 65.59, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.835, + "words": [ + { + "text": "What", + "start": 63.52, + "end": 64.12, + "confidence": 0.731 + }, + { + "text": "are", + "start": 64.12, + "end": 64.26, + "confidence": 0.666 + }, + { + "text": "you", + "start": 64.26, + "end": 64.3, + "confidence": 0.971 + }, + { + "text": "telling", + "start": 64.3, + "end": 64.6, + "confidence": 0.864 + }, + { + "text": "me,", + "start": 64.6, + "end": 65.02, + "confidence": 0.902 + }, + { + "text": "dude?", + "start": 65.02, + "end": 65.59, + "confidence": 0.92 + } + ] + }, + { + "id": 27, + "seek": 5400, + "start": 65.59, + "end": 66.94, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.825, + "words": [ + { + "text": "What", + "start": 65.59, + "end": 65.84, + "confidence": 0.635 + }, + { + "text": "are", + "start": 65.84, + "end": 65.88, + "confidence": 0.691 + }, + { + "text": "you", + "start": 65.88, + "end": 65.92, + "confidence": 0.975 + }, + { + "text": "telling", + "start": 65.92, + "end": 65.96, + "confidence": 0.866 + }, + { + "text": "me,", + "start": 65.96, + "end": 66.86, + "confidence": 0.918 + }, + { + "text": "dude?", + "start": 66.86, + "end": 66.94, + "confidence": 0.927 + } + ] + }, + { + "id": 28, + "seek": 5400, + "start": 67.52, + "end": 69.18, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.821, + "words": [ + { + "text": "What", + "start": 67.52, + "end": 67.76, + "confidence": 0.572 + }, + { + "text": "are", + "start": 67.76, + "end": 67.9, + "confidence": 0.721 + }, + { + "text": "you", + "start": 67.9, + "end": 67.98, + "confidence": 0.979 + }, + { + "text": "telling", + "start": 67.98, + "end": 68.06, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 68.06, + "end": 68.74, + "confidence": 0.923 + }, + { + "text": "dude?", + "start": 68.74, + "end": 69.18, + "confidence": 0.931 + } + ] + }, + { + "id": 29, + "seek": 5400, + "start": 69.86, + "end": 72.14, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.831, + "words": [ + { + "text": "What", + "start": 69.86, + "end": 69.9, + "confidence": 0.61 + }, + { + "text": "are", + "start": 69.9, + "end": 70.14, + "confidence": 0.724 + }, + { + "text": "you", + "start": 70.14, + "end": 70.54, + "confidence": 0.98 + }, + { + "text": "telling", + "start": 70.54, + "end": 71.0, + "confidence": 0.876 + }, + { + "text": "me,", + "start": 71.0, + "end": 71.9, + "confidence": 0.928 + }, + { + "text": "dude?", + "start": 71.9, + "end": 72.14, + "confidence": 0.932 + } + ] + }, + { + "id": 30, + "seek": 5400, + "start": 72.14, + "end": 72.76, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.813, + "words": [ + { + "text": "What", + "start": 72.14, + "end": 72.4, + "confidence": 0.516 + }, + { + "text": "are", + "start": 72.4, + "end": 72.44, + "confidence": 0.736 + }, + { + "text": "you", + "start": 72.44, + "end": 72.52, + "confidence": 0.982 + }, + { + "text": "telling", + "start": 72.52, + "end": 72.6, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 72.6, + "end": 72.72, + "confidence": 0.943 + }, + { + "text": "dude?", + "start": 72.72, + "end": 72.76, + "confidence": 0.937 + } + ] + }, + { + "id": 31, + "seek": 5400, + "start": 74.08, + "end": 76.34, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.85, + "words": [ + { + "text": "What", + "start": 74.08, + "end": 74.32, + "confidence": 0.651 + }, + { + "text": "are", + "start": 74.32, + "end": 74.36, + "confidence": 0.755 + }, + { + "text": "you", + "start": 74.36, + "end": 74.94, + "confidence": 0.984 + }, + { + "text": "telling", + "start": 74.94, + "end": 75.56, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 75.56, + "end": 76.3, + "confidence": 0.943 + }, + { + "text": "dude?", + "start": 76.3, + "end": 76.34, + "confidence": 0.937 + } + ] + }, + { + "id": 32, + "seek": 5400, + "start": 76.34, + "end": 77.98, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.839, + "words": [ + { + "text": "What", + "start": 76.34, + "end": 77.38, + "confidence": 0.607 + }, + { + "text": "are", + "start": 77.38, + "end": 77.42, + "confidence": 0.759 + }, + { + "text": "you", + "start": 77.42, + "end": 77.46, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 77.46, + "end": 77.54, + "confidence": 0.879 + }, + { + "text": "me,", + "start": 77.54, + "end": 77.94, + "confidence": 0.938 + }, + { + "text": "dude?", + "start": 77.94, + "end": 77.98, + "confidence": 0.936 + } + ] + }, + { + "id": 33, + "seek": 5400, + "start": 77.98, + "end": 79.08, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.85, + "words": [ + { + "text": "What", + "start": 77.98, + "end": 78.02, + "confidence": 0.636 + }, + { + "text": "are", + "start": 78.02, + "end": 78.06, + "confidence": 0.75 + }, + { + "text": "you", + "start": 78.06, + "end": 78.92, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 78.92, + "end": 78.96, + "confidence": 0.894 + }, + { + "text": "me,", + "start": 78.96, + "end": 79.04, + "confidence": 0.954 + }, + { + "text": "dude?", + "start": 79.04, + "end": 79.08, + "confidence": 0.939 + } + ] + }, + { + "id": 34, + "seek": 5400, + "start": 80.22, + "end": 81.66, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.17690415449545416, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0238738264888525, + "confidence": 0.813, + "words": [ + { + "text": "What", + "start": 80.22, + "end": 80.26, + "confidence": 0.484 + }, + { + "text": "are", + "start": 80.26, + "end": 80.6, + "confidence": 0.745 + }, + { + "text": "you", + "start": 80.6, + "end": 80.68, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 80.68, + "end": 80.96, + "confidence": 0.899 + }, + { + "text": "me,", + "start": 80.96, + "end": 81.62, + "confidence": 0.96 + }, + { + "text": "dude?", + "start": 81.62, + "end": 81.66, + "confidence": 0.942 + } + ] + }, + { + "id": 35, + "seek": 8200, + "start": 82.02, + "end": 83.08, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.772, + "words": [ + { + "text": "What", + "start": 82.02, + "end": 82.06, + "confidence": 0.701 + }, + { + "text": "are", + "start": 82.06, + "end": 82.28, + "confidence": 0.488 + }, + { + "text": "you", + "start": 82.28, + "end": 82.38, + "confidence": 0.934 + }, + { + "text": "telling", + "start": 82.38, + "end": 82.42, + "confidence": 0.755 + }, + { + "text": "me,", + "start": 82.42, + "end": 82.86, + "confidence": 0.92 + }, + { + "text": "dude?", + "start": 82.86, + "end": 83.08, + "confidence": 0.956 + } + ] + }, + { + "id": 36, + "seek": 8200, + "start": 84.34, + "end": 85.3, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.786, + "words": [ + { + "text": "What", + "start": 84.34, + "end": 84.38, + "confidence": 0.529 + }, + { + "text": "are", + "start": 84.38, + "end": 84.64, + "confidence": 0.637 + }, + { + "text": "you", + "start": 84.64, + "end": 85.12, + "confidence": 0.969 + }, + { + "text": "telling", + "start": 85.12, + "end": 85.16, + "confidence": 0.815 + }, + { + "text": "me,", + "start": 85.16, + "end": 85.26, + "confidence": 0.932 + }, + { + "text": "dude?", + "start": 85.26, + "end": 85.3, + "confidence": 0.952 + } + ] + }, + { + "id": 37, + "seek": 8200, + "start": 85.52, + "end": 87.94, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.836, + "words": [ + { + "text": "What", + "start": 85.52, + "end": 86.82, + "confidence": 0.676 + }, + { + "text": "are", + "start": 86.82, + "end": 86.86, + "confidence": 0.703 + }, + { + "text": "you", + "start": 86.86, + "end": 86.98, + "confidence": 0.97 + }, + { + "text": "telling", + "start": 86.98, + "end": 87.02, + "confidence": 0.826 + }, + { + "text": "me,", + "start": 87.02, + "end": 87.9, + "confidence": 0.942 + }, + { + "text": "dude?", + "start": 87.9, + "end": 87.94, + "confidence": 0.955 + } + ] + }, + { + "id": 38, + "seek": 8200, + "start": 87.94, + "end": 89.56, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.849, + "words": [ + { + "text": "What", + "start": 87.94, + "end": 88.34, + "confidence": 0.7 + }, + { + "text": "are", + "start": 88.34, + "end": 89.16, + "confidence": 0.717 + }, + { + "text": "you", + "start": 89.16, + "end": 89.44, + "confidence": 0.976 + }, + { + "text": "telling", + "start": 89.44, + "end": 89.48, + "confidence": 0.84 + }, + { + "text": "me,", + "start": 89.48, + "end": 89.52, + "confidence": 0.954 + }, + { + "text": "dude?", + "start": 89.52, + "end": 89.56, + "confidence": 0.954 + } + ] + }, + { + "id": 39, + "seek": 8200, + "start": 89.94, + "end": 90.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.859, + "words": [ + { + "text": "What", + "start": 89.94, + "end": 90.1, + "confidence": 0.747 + }, + { + "text": "are", + "start": 90.1, + "end": 90.14, + "confidence": 0.718 + }, + { + "text": "you", + "start": 90.14, + "end": 90.28, + "confidence": 0.976 + }, + { + "text": "telling", + "start": 90.28, + "end": 90.5, + "confidence": 0.847 + }, + { + "text": "me,", + "start": 90.5, + "end": 90.86, + "confidence": 0.944 + }, + { + "text": "dude?", + "start": 90.86, + "end": 90.9, + "confidence": 0.959 + } + ] + }, + { + "id": 40, + "seek": 8200, + "start": 91.52, + "end": 92.2, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.851, + "words": [ + { + "text": "What", + "start": 91.52, + "end": 91.6, + "confidence": 0.723 + }, + { + "text": "are", + "start": 91.6, + "end": 91.64, + "confidence": 0.718 + }, + { + "text": "you", + "start": 91.64, + "end": 91.68, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 91.68, + "end": 91.72, + "confidence": 0.83 + }, + { + "text": "me,", + "start": 91.72, + "end": 92.16, + "confidence": 0.944 + }, + { + "text": "dude?", + "start": 92.16, + "end": 92.2, + "confidence": 0.958 + } + ] + }, + { + "id": 41, + "seek": 8200, + "start": 93.52, + "end": 94.66, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.862, + "words": [ + { + "text": "What", + "start": 93.52, + "end": 94.04, + "confidence": 0.743 + }, + { + "text": "are", + "start": 94.04, + "end": 94.08, + "confidence": 0.73 + }, + { + "text": "you", + "start": 94.08, + "end": 94.24, + "confidence": 0.981 + }, + { + "text": "telling", + "start": 94.24, + "end": 94.28, + "confidence": 0.848 + }, + { + "text": "me,", + "start": 94.28, + "end": 94.36, + "confidence": 0.949 + }, + { + "text": "dude?", + "start": 94.36, + "end": 94.66, + "confidence": 0.962 + } + ] + }, + { + "id": 42, + "seek": 8200, + "start": 95.52, + "end": 98.44, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.867, + "words": [ + { + "text": "What", + "start": 95.52, + "end": 97.08, + "confidence": 0.72 + }, + { + "text": "are", + "start": 97.08, + "end": 97.16, + "confidence": 0.762 + }, + { + "text": "you", + "start": 97.16, + "end": 97.28, + "confidence": 0.984 + }, + { + "text": "telling", + "start": 97.28, + "end": 97.72, + "confidence": 0.862 + }, + { + "text": "me,", + "start": 97.72, + "end": 98.4, + "confidence": 0.951 + }, + { + "text": "dude?", + "start": 98.4, + "end": 98.44, + "confidence": 0.962 + } + ] + }, + { + "id": 43, + "seek": 8200, + "start": 98.44, + "end": 98.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.885, + "words": [ + { + "text": "What", + "start": 98.44, + "end": 98.48, + "confidence": 0.77 + }, + { + "text": "are", + "start": 98.48, + "end": 98.52, + "confidence": 0.781 + }, + { + "text": "you", + "start": 98.52, + "end": 98.78, + "confidence": 0.986 + }, + { + "text": "telling", + "start": 98.78, + "end": 98.82, + "confidence": 0.867 + }, + { + "text": "me,", + "start": 98.82, + "end": 98.86, + "confidence": 0.965 + }, + { + "text": "dude?", + "start": 98.86, + "end": 98.9, + "confidence": 0.965 + } + ] + }, + { + "id": 44, + "seek": 8200, + "start": 99.52, + "end": 101.8, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.887, + "words": [ + { + "text": "What", + "start": 99.52, + "end": 101.06, + "confidence": 0.741 + }, + { + "text": "are", + "start": 101.06, + "end": 101.1, + "confidence": 0.806 + }, + { + "text": "you", + "start": 101.1, + "end": 101.14, + "confidence": 0.989 + }, + { + "text": "telling", + "start": 101.14, + "end": 101.44, + "confidence": 0.88 + }, + { + "text": "me,", + "start": 101.44, + "end": 101.76, + "confidence": 0.972 + }, + { + "text": "dude?", + "start": 101.76, + "end": 101.8, + "confidence": 0.966 + } + ] + }, + { + "id": 45, + "seek": 8200, + "start": 101.8, + "end": 103.02, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.881, + "words": [ + { + "text": "What", + "start": 101.8, + "end": 102.82, + "confidence": 0.731 + }, + { + "text": "are", + "start": 102.82, + "end": 102.86, + "confidence": 0.796 + }, + { + "text": "you", + "start": 102.86, + "end": 102.9, + "confidence": 0.988 + }, + { + "text": "telling", + "start": 102.9, + "end": 102.94, + "confidence": 0.868 + }, + { + "text": "me,", + "start": 102.94, + "end": 102.98, + "confidence": 0.971 + }, + { + "text": "dude?", + "start": 102.98, + "end": 103.02, + "confidence": 0.966 + } + ] + }, + { + "id": 46, + "seek": 8200, + "start": 104.5, + "end": 105.34, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.888, + "words": [ + { + "text": "What", + "start": 104.5, + "end": 104.82, + "confidence": 0.737 + }, + { + "text": "are", + "start": 104.82, + "end": 105.16, + "confidence": 0.806 + }, + { + "text": "you", + "start": 105.16, + "end": 105.2, + "confidence": 0.99 + }, + { + "text": "telling", + "start": 105.2, + "end": 105.24, + "confidence": 0.885 + }, + { + "text": "me,", + "start": 105.24, + "end": 105.3, + "confidence": 0.974 + }, + { + "text": "dude?", + "start": 105.3, + "end": 105.34, + "confidence": 0.967 + } + ] + }, + { + "id": 47, + "seek": 8200, + "start": 106.5, + "end": 108.02, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.874, + "words": [ + { + "text": "What", + "start": 106.5, + "end": 106.92, + "confidence": 0.697 + }, + { + "text": "are", + "start": 106.92, + "end": 107.1, + "confidence": 0.786 + }, + { + "text": "you", + "start": 107.1, + "end": 107.14, + "confidence": 0.99 + }, + { + "text": "telling", + "start": 107.14, + "end": 107.18, + "confidence": 0.876 + }, + { + "text": "me,", + "start": 107.18, + "end": 107.76, + "confidence": 0.971 + }, + { + "text": "dude?", + "start": 107.76, + "end": 108.02, + "confidence": 0.968 + } + ] + }, + { + "id": 48, + "seek": 8200, + "start": 108.02, + "end": 109.54, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1252204868155466, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0009642249788157642, + "confidence": 0.82, + "words": [ + { + "text": "What", + "start": 108.02, + "end": 108.06, + "confidence": 0.48 + }, + { + "text": "are", + "start": 108.06, + "end": 108.2, + "confidence": 0.773 + }, + { + "text": "you", + "start": 108.2, + "end": 108.24, + "confidence": 0.99 + }, + { + "text": "telling", + "start": 108.24, + "end": 108.34, + "confidence": 0.878 + }, + { + "text": "me,", + "start": 108.34, + "end": 109.48, + "confidence": 0.97 + }, + { + "text": "dude?", + "start": 109.48, + "end": 109.54, + "confidence": 0.97 + } + ] + }, + { + "id": 49, + "seek": 11000, + "start": 110.02, + "end": 110.56, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.719, + "words": [ + { + "text": "What", + "start": 110.02, + "end": 110.16, + "confidence": 0.419 + }, + { + "text": "are", + "start": 110.16, + "end": 110.2, + "confidence": 0.523 + }, + { + "text": "you", + "start": 110.2, + "end": 110.24, + "confidence": 0.913 + }, + { + "text": "telling", + "start": 110.24, + "end": 110.48, + "confidence": 0.79 + }, + { + "text": "me,", + "start": 110.48, + "end": 110.52, + "confidence": 0.904 + }, + { + "text": "dude?", + "start": 110.52, + "end": 110.56, + "confidence": 0.964 + } + ] + }, + { + "id": 50, + "seek": 11000, + "start": 112.14, + "end": 113.52, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.835, + "words": [ + { + "text": "What", + "start": 112.14, + "end": 112.3, + "confidence": 0.644 + }, + { + "text": "are", + "start": 112.3, + "end": 113.22, + "confidence": 0.734 + }, + { + "text": "you", + "start": 113.22, + "end": 113.4, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 113.4, + "end": 113.44, + "confidence": 0.821 + }, + { + "text": "me,", + "start": 113.44, + "end": 113.48, + "confidence": 0.933 + }, + { + "text": "dude?", + "start": 113.48, + "end": 113.52, + "confidence": 0.962 + } + ] + }, + { + "id": 51, + "seek": 11000, + "start": 113.9, + "end": 114.9, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.832, + "words": [ + { + "text": "What", + "start": 113.9, + "end": 114.06, + "confidence": 0.615 + }, + { + "text": "are", + "start": 114.06, + "end": 114.16, + "confidence": 0.733 + }, + { + "text": "you", + "start": 114.16, + "end": 114.36, + "confidence": 0.978 + }, + { + "text": "telling", + "start": 114.36, + "end": 114.4, + "confidence": 0.832 + }, + { + "text": "me,", + "start": 114.4, + "end": 114.86, + "confidence": 0.934 + }, + { + "text": "dude?", + "start": 114.86, + "end": 114.9, + "confidence": 0.967 + } + ] + }, + { + "id": 52, + "seek": 11000, + "start": 115.52, + "end": 117.54, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.86, + "words": [ + { + "text": "What", + "start": 115.52, + "end": 116.54, + "confidence": 0.67 + }, + { + "text": "are", + "start": 116.54, + "end": 116.58, + "confidence": 0.788 + }, + { + "text": "you", + "start": 116.58, + "end": 116.64, + "confidence": 0.984 + }, + { + "text": "telling", + "start": 116.64, + "end": 116.68, + "confidence": 0.852 + }, + { + "text": "me,", + "start": 116.68, + "end": 117.44, + "confidence": 0.947 + }, + { + "text": "dude?", + "start": 117.44, + "end": 117.54, + "confidence": 0.966 + } + ] + }, + { + "id": 53, + "seek": 11000, + "start": 118.18, + "end": 120.14, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.88, + "words": [ + { + "text": "What", + "start": 118.18, + "end": 118.36, + "confidence": 0.758 + }, + { + "text": "are", + "start": 118.36, + "end": 118.4, + "confidence": 0.789 + }, + { + "text": "you", + "start": 118.4, + "end": 118.74, + "confidence": 0.985 + }, + { + "text": "telling", + "start": 118.74, + "end": 118.82, + "confidence": 0.858 + }, + { + "text": "me,", + "start": 118.82, + "end": 120.1, + "confidence": 0.948 + }, + { + "text": "dude?", + "start": 120.1, + "end": 120.14, + "confidence": 0.968 + } + ] + }, + { + "id": 54, + "seek": 11000, + "start": 120.14, + "end": 120.38, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.884, + "words": [ + { + "text": "What", + "start": 120.14, + "end": 120.18, + "confidence": 0.757 + }, + { + "text": "are", + "start": 120.18, + "end": 120.22, + "confidence": 0.808 + }, + { + "text": "you", + "start": 120.22, + "end": 120.26, + "confidence": 0.986 + }, + { + "text": "telling", + "start": 120.26, + "end": 120.3, + "confidence": 0.857 + }, + { + "text": "me,", + "start": 120.3, + "end": 120.34, + "confidence": 0.954 + }, + { + "text": "dude?", + "start": 120.34, + "end": 120.38, + "confidence": 0.966 + } + ] + }, + { + "id": 55, + "seek": 11000, + "start": 121.52, + "end": 123.36, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.874, + "words": [ + { + "text": "What", + "start": 121.52, + "end": 122.72, + "confidence": 0.718 + }, + { + "text": "are", + "start": 122.72, + "end": 123.2, + "confidence": 0.788 + }, + { + "text": "you", + "start": 123.2, + "end": 123.24, + "confidence": 0.986 + }, + { + "text": "telling", + "start": 123.24, + "end": 123.28, + "confidence": 0.865 + }, + { + "text": "me,", + "start": 123.28, + "end": 123.32, + "confidence": 0.952 + }, + { + "text": "dude?", + "start": 123.32, + "end": 123.36, + "confidence": 0.968 + } + ] + }, + { + "id": 56, + "seek": 11000, + "start": 123.52, + "end": 125.14, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.882, + "words": [ + { + "text": "What", + "start": 123.52, + "end": 124.56, + "confidence": 0.718 + }, + { + "text": "are", + "start": 124.56, + "end": 124.74, + "confidence": 0.821 + }, + { + "text": "you", + "start": 124.74, + "end": 125.02, + "confidence": 0.989 + }, + { + "text": "telling", + "start": 125.02, + "end": 125.06, + "confidence": 0.878 + }, + { + "text": "me,", + "start": 125.06, + "end": 125.1, + "confidence": 0.952 + }, + { + "text": "dude?", + "start": 125.1, + "end": 125.14, + "confidence": 0.968 + } + ] + }, + { + "id": 57, + "seek": 11000, + "start": 125.52, + "end": 127.58, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.894, + "words": [ + { + "text": "What", + "start": 125.52, + "end": 126.58, + "confidence": 0.747 + }, + { + "text": "are", + "start": 126.58, + "end": 126.9, + "confidence": 0.837 + }, + { + "text": "you", + "start": 126.9, + "end": 127.06, + "confidence": 0.991 + }, + { + "text": "telling", + "start": 127.06, + "end": 127.14, + "confidence": 0.883 + }, + { + "text": "me,", + "start": 127.14, + "end": 127.54, + "confidence": 0.962 + }, + { + "text": "dude?", + "start": 127.54, + "end": 127.58, + "confidence": 0.969 + } + ] + }, + { + "id": 58, + "seek": 11000, + "start": 127.58, + "end": 129.28, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.895, + "words": [ + { + "text": "What", + "start": 127.58, + "end": 128.98, + "confidence": 0.719 + }, + { + "text": "are", + "start": 128.98, + "end": 129.02, + "confidence": 0.859 + }, + { + "text": "you", + "start": 129.02, + "end": 129.06, + "confidence": 0.992 + }, + { + "text": "telling", + "start": 129.06, + "end": 129.1, + "confidence": 0.896 + }, + { + "text": "me,", + "start": 129.1, + "end": 129.14, + "confidence": 0.965 + }, + { + "text": "dude?", + "start": 129.14, + "end": 129.28, + "confidence": 0.971 + } + ] + }, + { + "id": 59, + "seek": 11000, + "start": 129.72, + "end": 131.68, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.89, + "words": [ + { + "text": "What", + "start": 129.72, + "end": 131.02, + "confidence": 0.703 + }, + { + "text": "are", + "start": 131.02, + "end": 131.46, + "confidence": 0.855 + }, + { + "text": "you", + "start": 131.46, + "end": 131.5, + "confidence": 0.992 + }, + { + "text": "telling", + "start": 131.5, + "end": 131.54, + "confidence": 0.89 + }, + { + "text": "me,", + "start": 131.54, + "end": 131.64, + "confidence": 0.964 + }, + { + "text": "dude?", + "start": 131.64, + "end": 131.68, + "confidence": 0.97 + } + ] + }, + { + "id": 60, + "seek": 11000, + "start": 131.68, + "end": 133.0, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.903, + "words": [ + { + "text": "What", + "start": 131.68, + "end": 132.36, + "confidence": 0.746 + }, + { + "text": "are", + "start": 132.36, + "end": 132.4, + "confidence": 0.86 + }, + { + "text": "you", + "start": 132.4, + "end": 132.44, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 132.44, + "end": 132.48, + "confidence": 0.903 + }, + { + "text": "me,", + "start": 132.48, + "end": 132.52, + "confidence": 0.969 + }, + { + "text": "dude?", + "start": 132.52, + "end": 133.0, + "confidence": 0.972 + } + ] + }, + { + "id": 61, + "seek": 11000, + "start": 133.72, + "end": 135.54, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.891, + "words": [ + { + "text": "What", + "start": 133.72, + "end": 134.08, + "confidence": 0.692 + }, + { + "text": "are", + "start": 134.08, + "end": 134.14, + "confidence": 0.859 + }, + { + "text": "you", + "start": 134.14, + "end": 134.38, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 134.38, + "end": 134.42, + "confidence": 0.9 + }, + { + "text": "me,", + "start": 134.42, + "end": 135.5, + "confidence": 0.971 + }, + { + "text": "dude?", + "start": 135.5, + "end": 135.54, + "confidence": 0.974 + } + ] + }, + { + "id": 62, + "seek": 11000, + "start": 135.54, + "end": 136.88, + "text": " What are you telling me, dude?", + "tokens": [ + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.1162933564521897, + "compression_ratio": 9.840909090909092, + "no_speech_prob": 0.0005178684368729591, + "confidence": 0.855, + "words": [ + { + "text": "What", + "start": 135.54, + "end": 135.58, + "confidence": 0.534 + }, + { + "text": "are", + "start": 135.58, + "end": 135.62, + "confidence": 0.856 + }, + { + "text": "you", + "start": 135.62, + "end": 135.66, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 135.66, + "end": 135.7, + "confidence": 0.906 + }, + { + "text": "me,", + "start": 135.7, + "end": 135.74, + "confidence": 0.972 + }, + { + "text": "dude?", + "start": 135.74, + "end": 136.88, + "confidence": 0.975 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto.cpu/smartphone.mp3.words.json b/tests/expected/tiny_auto.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f90643caf0519b49ba0cbc0381cd8d8ced1f218b --- /dev/null +++ b/tests/expected/tiny_auto.cpu/smartphone.mp3.words.json @@ -0,0 +1,5038 @@ +{ + "text": " C'est évidence que dit Nicolas. Mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup très souvent ementionné. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes. C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très fondablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent. On s'est espécie de nous voter dans la relation à l'objet. C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de relation de médiation avec le monde qui rendent un peu avec la même sédiforme de le jeu. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses et de rojets. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure, en avoir besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira jamais? Et puis, il faut en remettre la faute sur les gens qui ont créé cette route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine, soit définissant des beurs, on définisse un signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 1.38, + "text": " C'est évidence que dit Nicolas.", + "tokens": [ + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.66, + "confidence": 0.849 + }, + { + "text": "évidence", + "start": 0.66, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.06, + "confidence": 0.883 + }, + { + "text": "dit", + "start": 1.06, + "end": 1.16, + "confidence": 0.344 + }, + { + "text": "Nicolas.", + "start": 1.16, + "end": 1.38, + "confidence": 0.921 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.66, + "end": 3.62, + "text": " Mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 6313, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 1254, + 425, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.745, + "words": [ + { + "text": "Mais", + "start": 1.66, + "end": 1.9, + "confidence": 0.956 + }, + { + "text": "je", + "start": 1.9, + "end": 2.18, + "confidence": 0.629 + }, + { + "text": "me", + "start": 2.18, + "end": 2.3, + "confidence": 0.943 + }, + { + "text": "l'étais", + "start": 2.3, + "end": 2.54, + "confidence": 0.708 + }, + { + "text": "jamais", + "start": 2.54, + "end": 2.78, + "confidence": 0.962 + }, + { + "text": "formulé", + "start": 2.78, + "end": 3.2, + "confidence": 0.541 + }, + { + "text": "comme", + "start": 3.2, + "end": 3.34, + "confidence": 0.975 + }, + { + "text": "ça.", + "start": 3.34, + "end": 3.62, + "confidence": 0.979 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 4.14, + "end": 8.82, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.762, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.22, + "confidence": 0.39 + }, + { + "text": "qui", + "start": 4.22, + "end": 4.34, + "confidence": 0.934 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.46, + "confidence": 0.735 + }, + { + "text": "la", + "start": 4.46, + "end": 4.56, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.56, + "end": 4.96, + "confidence": 0.933 + }, + { + "text": "du", + "start": 4.96, + "end": 5.16, + "confidence": 0.936 + }, + { + "text": "smartphone,", + "start": 5.16, + "end": 5.74, + "confidence": 0.909 + }, + { + "text": "c'est", + "start": 5.74, + "end": 6.12, + "confidence": 0.871 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.2, + "confidence": 0.982 + }, + { + "text": "seulement", + "start": 6.2, + "end": 6.52, + "confidence": 0.991 + }, + { + "text": "la", + "start": 6.52, + "end": 6.76, + "confidence": 0.627 + }, + { + "text": "cumulation", + "start": 6.76, + "end": 7.18, + "confidence": 0.679 + }, + { + "text": "des", + "start": 7.18, + "end": 7.54, + "confidence": 0.752 + }, + { + "text": "fonctions,", + "start": 7.54, + "end": 8.1, + "confidence": 0.826 + }, + { + "text": "mais", + "start": 8.1, + "end": 8.42, + "confidence": 0.511 + }, + { + "text": "la", + "start": 8.42, + "end": 8.58, + "confidence": 0.717 + }, + { + "text": "manière", + "start": 8.58, + "end": 8.82, + "confidence": 0.457 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 8.9, + "end": 10.84, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.3, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.04, + "confidence": 0.329 + }, + { + "text": "quelques", + "start": 9.04, + "end": 9.26, + "confidence": 0.282 + }, + { + "text": "interagues", + "start": 9.26, + "end": 10.18, + "confidence": 0.238 + }, + { + "text": "entraîne.", + "start": 10.18, + "end": 10.84, + "confidence": 0.421 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 11.04, + "end": 12.92, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "Et", + "start": 11.04, + "end": 11.1, + "confidence": 0.394 + }, + { + "text": "il", + "start": 11.1, + "end": 11.24, + "confidence": 0.128 + }, + { + "text": "est", + "start": 11.24, + "end": 11.38, + "confidence": 0.237 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.62, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.62, + "end": 11.78, + "confidence": 0.879 + }, + { + "text": "la", + "start": 11.78, + "end": 11.82, + "confidence": 0.968 + }, + { + "text": "photo", + "start": 11.82, + "end": 11.92, + "confidence": 0.811 + }, + { + "text": "c'est", + "start": 11.92, + "end": 12.18, + "confidence": 0.792 + }, + { + "text": "hyper", + "start": 12.18, + "end": 12.38, + "confidence": 0.939 + }, + { + "text": "convaincant.", + "start": 12.38, + "end": 12.92, + "confidence": 0.494 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.26, + "end": 18.03, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 38262, + 421, + 6, + 388, + 257, + 8862, + 8796 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.553, + "words": [ + { + "text": "Alors", + "start": 13.26, + "end": 13.48, + "confidence": 0.904 + }, + { + "text": "évidemment,", + "start": 13.48, + "end": 13.92, + "confidence": 0.785 + }, + { + "text": "il", + "start": 13.92, + "end": 14.4, + "confidence": 0.961 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.85 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.874 + }, + { + "text": "les", + "start": 15.38, + "end": 15.62, + "confidence": 0.933 + }, + { + "text": "interfaces", + "start": 15.62, + "end": 15.86, + "confidence": 0.359 + }, + { + "text": "les", + "start": 15.86, + "end": 16.5, + "confidence": 0.414 + }, + { + "text": "grand-attêtes", + "start": 16.5, + "end": 16.94, + "confidence": 0.178 + }, + { + "text": "qu'il", + "start": 16.94, + "end": 17.18, + "confidence": 0.632 + }, + { + "text": "a", + "start": 17.18, + "end": 17.24, + "confidence": 0.969 + }, + { + "text": "été", + "start": 17.24, + "end": 17.5, + "confidence": 0.957 + }, + { + "text": "beaucoup", + "start": 17.5, + "end": 18.03, + "confidence": 0.572 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 18.03, + "end": 19.26, + "text": " très souvent ementionné.", + "tokens": [ + 5732, + 20847, + 846, + 1251, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.541, + "words": [ + { + "text": "très", + "start": 18.03, + "end": 18.66, + "confidence": 0.959 + }, + { + "text": "souvent", + "start": 18.66, + "end": 18.82, + "confidence": 0.994 + }, + { + "text": "ementionné.", + "start": 18.82, + "end": 19.26, + "confidence": 0.365 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.86, + "end": 23.54, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.648, + "words": [ + { + "text": "Mais", + "start": 19.86, + "end": 20.22, + "confidence": 0.975 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.56, + "confidence": 0.479 + }, + { + "text": "il", + "start": 20.56, + "end": 20.6, + "confidence": 0.978 + }, + { + "text": "faudrait", + "start": 20.6, + "end": 20.74, + "confidence": 0.753 + }, + { + "text": "que", + "start": 20.74, + "end": 20.92, + "confidence": 0.378 + }, + { + "text": "les", + "start": 20.92, + "end": 20.98, + "confidence": 0.182 + }, + { + "text": "profites", + "start": 20.98, + "end": 21.26, + "confidence": 0.622 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.7, + "confidence": 0.502 + }, + { + "text": "de", + "start": 21.7, + "end": 21.84, + "confidence": 0.468 + }, + { + "text": "20", + "start": 21.84, + "end": 22.08, + "confidence": 0.924 + }, + { + "text": "ans", + "start": 22.08, + "end": 22.28, + "confidence": 0.937 + }, + { + "text": "pendant", + "start": 22.28, + "end": 22.46, + "confidence": 0.903 + }, + { + "text": "l'été,", + "start": 22.46, + "end": 22.96, + "confidence": 0.499 + }, + { + "text": "les", + "start": 22.96, + "end": 23.04, + "confidence": 0.861 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.934 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 23.58, + "end": 25.26, + "text": " nous ont appris à piquer sur des icônes.", + "tokens": [ + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.663, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.833 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.86, + "confidence": 0.976 + }, + { + "text": "appris", + "start": 23.86, + "end": 24.06, + "confidence": 0.947 + }, + { + "text": "à", + "start": 24.06, + "end": 24.24, + "confidence": 0.24 + }, + { + "text": "piquer", + "start": 24.24, + "end": 24.42, + "confidence": 0.45 + }, + { + "text": "sur", + "start": 24.42, + "end": 24.68, + "confidence": 0.749 + }, + { + "text": "des", + "start": 24.68, + "end": 24.8, + "confidence": 0.96 + }, + { + "text": "icônes.", + "start": 24.8, + "end": 25.26, + "confidence": 0.656 + } + ] + }, + { + "id": 9, + "seek": 2556, + "start": 25.58, + "end": 30.56, + "text": " C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible.", + "tokens": [ + 383, + 6, + 377, + 1769, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.643, + "words": [ + { + "text": "C'est", + "start": 25.58, + "end": 25.66, + "confidence": 0.449 + }, + { + "text": "ce", + "start": 25.66, + "end": 25.72, + "confidence": 0.491 + }, + { + "text": "que", + "start": 25.72, + "end": 25.94, + "confidence": 0.935 + }, + { + "text": "le", + "start": 25.94, + "end": 26.6, + "confidence": 0.377 + }, + { + "text": "smartphone", + "start": 26.6, + "end": 26.86, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 26.86, + "end": 27.42, + "confidence": 0.813 + }, + { + "text": "le", + "start": 27.42, + "end": 27.64, + "confidence": 0.957 + }, + { + "text": "toucher,", + "start": 27.64, + "end": 28.06, + "confidence": 0.73 + }, + { + "text": "qui", + "start": 28.06, + "end": 28.18, + "confidence": 0.209 + }, + { + "text": "rend", + "start": 28.18, + "end": 28.34, + "confidence": 0.877 + }, + { + "text": "le", + "start": 28.34, + "end": 28.68, + "confidence": 0.991 + }, + { + "text": "contact", + "start": 28.68, + "end": 28.96, + "confidence": 0.854 + }, + { + "text": "plus", + "start": 28.96, + "end": 29.48, + "confidence": 0.865 + }, + { + "text": "direct,", + "start": 29.48, + "end": 29.96, + "confidence": 0.692 + }, + { + "text": "plus", + "start": 29.96, + "end": 30.24, + "confidence": 0.928 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.56, + "confidence": 0.332 + } + ] + }, + { + "id": 10, + "seek": 2556, + "start": 31.04, + "end": 34.34, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner", + "tokens": [ + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.827, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.914 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.34, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.64, + "confidence": 0.378 + }, + { + "text": "il", + "start": 31.64, + "end": 31.78, + "confidence": 0.935 + }, + { + "text": "faudrait", + "start": 31.78, + "end": 31.88, + "confidence": 0.99 + }, + { + "text": "parler", + "start": 31.88, + "end": 32.12, + "confidence": 0.882 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.3, + "confidence": 0.889 + }, + { + "text": "des", + "start": 32.3, + "end": 32.44, + "confidence": 0.927 + }, + { + "text": "applications", + "start": 32.44, + "end": 32.78, + "confidence": 0.839 + }, + { + "text": "qui", + "start": 32.78, + "end": 33.16, + "confidence": 0.652 + }, + { + "text": "permettent", + "start": 33.16, + "end": 33.68, + "confidence": 0.951 + }, + { + "text": "de", + "start": 33.68, + "end": 33.9, + "confidence": 0.952 + }, + { + "text": "contourner", + "start": 33.9, + "end": 34.34, + "confidence": 0.787 + } + ] + }, + { + "id": 11, + "seek": 2556, + "start": 34.34, + "end": 37.72, + "text": " le côté tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 476, + 18437, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.766, + "words": [ + { + "text": "le", + "start": 34.34, + "end": 34.52, + "confidence": 0.989 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.72, + "confidence": 0.983 + }, + { + "text": "tout", + "start": 34.72, + "end": 34.96, + "confidence": 0.954 + }, + { + "text": "flu", + "start": 34.96, + "end": 35.2, + "confidence": 0.548 + }, + { + "text": "de", + "start": 35.2, + "end": 35.42, + "confidence": 0.248 + }, + { + "text": "la", + "start": 35.42, + "end": 35.72, + "confidence": 0.902 + }, + { + "text": "navigation", + "start": 35.72, + "end": 36.04, + "confidence": 0.913 + }, + { + "text": "web", + "start": 36.04, + "end": 36.64, + "confidence": 0.93 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.76, + "confidence": 0.746 + }, + { + "text": "aller", + "start": 36.76, + "end": 36.94, + "confidence": 0.992 + }, + { + "text": "directement", + "start": 36.94, + "end": 37.46, + "confidence": 0.986 + }, + { + "text": "en", + "start": 37.46, + "end": 37.68, + "confidence": 0.644 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.72, + "confidence": 0.689 + } + ] + }, + { + "id": 12, + "seek": 2556, + "start": 37.72, + "end": 43.06, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.725, + "words": [ + { + "text": "Bref,", + "start": 37.72, + "end": 38.9, + "confidence": 0.967 + }, + { + "text": "tout", + "start": 38.9, + "end": 38.98, + "confidence": 0.786 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.36, + "confidence": 0.985 + }, + { + "text": "ce", + "start": 39.36, + "end": 39.68, + "confidence": 0.929 + }, + { + "text": "sont", + "start": 39.68, + "end": 39.84, + "confidence": 0.963 + }, + { + "text": "les", + "start": 39.84, + "end": 40.06, + "confidence": 0.976 + }, + { + "text": "conditions", + "start": 40.06, + "end": 40.54, + "confidence": 0.96 + }, + { + "text": "qui", + "start": 40.54, + "end": 40.9, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.9, + "end": 41.54, + "confidence": 0.986 + }, + { + "text": "de", + "start": 41.54, + "end": 41.58, + "confidence": 0.989 + }, + { + "text": "créer", + "start": 41.58, + "end": 41.92, + "confidence": 0.918 + }, + { + "text": "cette", + "start": 41.92, + "end": 42.3, + "confidence": 0.518 + }, + { + "text": "objet,", + "start": 42.3, + "end": 42.7, + "confidence": 0.213 + }, + { + "text": "en", + "start": 42.7, + "end": 42.8, + "confidence": 0.191 + }, + { + "text": "Nicolas,", + "start": 42.8, + "end": 43.06, + "confidence": 0.508 + } + ] + }, + { + "id": 13, + "seek": 2556, + "start": 43.1, + "end": 46.48, + "text": " dit qu'il est très fondablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 6176, + 421, + 6, + 388, + 871, + 5732, + 9557, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.768, + "words": [ + { + "text": "dit", + "start": 43.1, + "end": 43.52, + "confidence": 0.551 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.68, + "confidence": 0.969 + }, + { + "text": "est", + "start": 43.68, + "end": 43.8, + "confidence": 0.851 + }, + { + "text": "très", + "start": 43.8, + "end": 43.94, + "confidence": 0.368 + }, + { + "text": "fondablement", + "start": 43.94, + "end": 44.68, + "confidence": 0.593 + }, + { + "text": "inédit", + "start": 44.68, + "end": 45.52, + "confidence": 0.66 + }, + { + "text": "dans", + "start": 45.52, + "end": 45.74, + "confidence": 0.778 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 45.98, + "confidence": 0.825 + }, + { + "text": "de", + "start": 45.98, + "end": 46.08, + "confidence": 0.978 + }, + { + "text": "l'humanité.", + "start": 46.08, + "end": 46.48, + "confidence": 0.991 + } + ] + }, + { + "id": 14, + "seek": 2556, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.16, + "confidence": 0.939 + }, + { + "text": "ça", + "start": 47.16, + "end": 47.46, + "confidence": 0.854 + }, + { + "text": "s'assoulait", + "start": 47.46, + "end": 47.78, + "confidence": 0.591 + }, + { + "text": "d'une", + "start": 47.78, + "end": 48.02, + "confidence": 0.619 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.14, + "confidence": 0.966 + }, + { + "text": "interrogation.", + "start": 48.14, + "end": 48.76, + "confidence": 0.833 + } + ] + }, + { + "id": 15, + "seek": 2556, + "start": 49.26, + "end": 54.34, + "text": " Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 530, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Est-ce", + "start": 49.26, + "end": 49.7, + "confidence": 0.804 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.989 + }, + { + "text": "le", + "start": 49.74, + "end": 49.78, + "confidence": 0.882 + }, + { + "text": "fait", + "start": 49.78, + "end": 49.96, + "confidence": 0.962 + }, + { + "text": "que", + "start": 49.96, + "end": 50.08, + "confidence": 0.935 + }, + { + "text": "cette", + "start": 50.08, + "end": 50.24, + "confidence": 0.95 + }, + { + "text": "objet", + "start": 50.24, + "end": 50.5, + "confidence": 0.968 + }, + { + "text": "soit", + "start": 50.5, + "end": 50.96, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 50.96, + "end": 51.78, + "confidence": 0.91 + }, + { + "text": "un", + "start": 51.78, + "end": 52.06, + "confidence": 0.442 + }, + { + "text": "d'huies", + "start": 52.06, + "end": 52.32, + "confidence": 0.298 + }, + { + "text": "que", + "start": 52.32, + "end": 52.4, + "confidence": 0.966 + }, + { + "text": "notre", + "start": 52.4, + "end": 52.58, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.58, + "end": 53.1, + "confidence": 0.714 + }, + { + "text": "a", + "start": 53.1, + "end": 53.44, + "confidence": 0.522 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.62, + "confidence": 0.659 + }, + { + "text": "est", + "start": 53.62, + "end": 54.0, + "confidence": 0.643 + }, + { + "text": "aussi", + "start": 54.0, + "end": 54.34, + "confidence": 0.808 + } + ] + }, + { + "id": 16, + "seek": 2556, + "start": 54.34, + "end": 55.32, + "text": " un rapport inédit?", + "tokens": [ + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.849, + "words": [ + { + "text": "un", + "start": 54.34, + "end": 54.72, + "confidence": 0.506 + }, + { + "text": "rapport", + "start": 54.72, + "end": 54.94, + "confidence": 0.999 + }, + { + "text": "inédit?", + "start": 54.94, + "end": 55.32, + "confidence": 0.956 + } + ] + }, + { + "id": 17, + "seek": 5548, + "start": 55.6, + "end": 58.6, + "text": " Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 6311, + 712, + 1531, + 22829 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.626, + "words": [ + { + "text": "Je", + "start": 55.6, + "end": 55.84, + "confidence": 0.335 + }, + { + "text": "veux", + "start": 55.84, + "end": 55.94, + "confidence": 0.432 + }, + { + "text": "dire,", + "start": 55.94, + "end": 56.16, + "confidence": 0.992 + }, + { + "text": "est-ce", + "start": 56.16, + "end": 56.34, + "confidence": 0.951 + }, + { + "text": "que", + "start": 56.34, + "end": 56.4, + "confidence": 0.973 + }, + { + "text": "le", + "start": 56.4, + "end": 56.54, + "confidence": 0.987 + }, + { + "text": "rapport", + "start": 56.54, + "end": 56.76, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.76, + "end": 57.1, + "confidence": 0.906 + }, + { + "text": "a", + "start": 57.1, + "end": 57.14, + "confidence": 0.974 + }, + { + "text": "au", + "start": 57.14, + "end": 57.26, + "confidence": 0.319 + }, + { + "text": "sein", + "start": 57.26, + "end": 57.36, + "confidence": 0.244 + }, + { + "text": "de", + "start": 57.36, + "end": 57.44, + "confidence": 0.177 + }, + { + "text": "foi", + "start": 57.44, + "end": 57.54, + "confidence": 0.163 + }, + { + "text": "n'est", + "start": 57.54, + "end": 57.78, + "confidence": 0.777 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.32, + "confidence": 0.606 + }, + { + "text": "à", + "start": 58.32, + "end": 58.46, + "confidence": 0.482 + }, + { + "text": "celui", + "start": 58.46, + "end": 58.6, + "confidence": 0.831 + } + ] + }, + { + "id": 18, + "seek": 5548, + "start": 58.72, + "end": 62.81, + "text": " qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone?", + "tokens": [ + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.781, + "words": [ + { + "text": "qu'on", + "start": 58.72, + "end": 58.88, + "confidence": 0.941 + }, + { + "text": "entretenait", + "start": 58.88, + "end": 59.38, + "confidence": 0.657 + }, + { + "text": "à", + "start": 59.38, + "end": 59.44, + "confidence": 0.973 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.64, + "confidence": 0.909 + }, + { + "text": "objectes", + "start": 59.64, + "end": 60.02, + "confidence": 0.528 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.38, + "confidence": 0.547 + }, + { + "text": "comme", + "start": 60.38, + "end": 60.86, + "confidence": 0.662 + }, + { + "text": "la", + "start": 60.86, + "end": 61.46, + "confidence": 0.898 + }, + { + "text": "voiture", + "start": 61.46, + "end": 61.8, + "confidence": 0.954 + }, + { + "text": "ou", + "start": 61.8, + "end": 62.32, + "confidence": 0.74 + }, + { + "text": "le", + "start": 62.32, + "end": 62.6, + "confidence": 0.847 + }, + { + "text": "téléphone?", + "start": 62.6, + "end": 62.81, + "confidence": 0.979 + } + ] + }, + { + "id": 19, + "seek": 5548, + "start": 62.81, + "end": 66.07, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.882, + "words": [ + { + "text": "Il", + "start": 62.81, + "end": 65.42, + "confidence": 0.866 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.52, + "confidence": 0.865 + }, + { + "text": "a", + "start": 65.52, + "end": 65.56, + "confidence": 0.965 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.62, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.62, + "end": 66.07, + "confidence": 0.858 + } + ] + }, + { + "id": 20, + "seek": 5548, + "start": 66.07, + "end": 69.74, + "text": " On s'est espécie de nous voter dans la relation à l'objet.", + "tokens": [ + 1282, + 262, + 6, + 377, + 7089, + 526, + 4260, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.552, + "words": [ + { + "text": "On", + "start": 66.07, + "end": 67.0, + "confidence": 0.29 + }, + { + "text": "s'est", + "start": 67.0, + "end": 67.26, + "confidence": 0.562 + }, + { + "text": "espécie", + "start": 67.26, + "end": 67.46, + "confidence": 0.233 + }, + { + "text": "de", + "start": 67.46, + "end": 67.58, + "confidence": 0.963 + }, + { + "text": "nous", + "start": 67.58, + "end": 67.8, + "confidence": 0.631 + }, + { + "text": "voter", + "start": 67.8, + "end": 68.24, + "confidence": 0.241 + }, + { + "text": "dans", + "start": 68.24, + "end": 68.68, + "confidence": 0.853 + }, + { + "text": "la", + "start": 68.68, + "end": 68.88, + "confidence": 0.61 + }, + { + "text": "relation", + "start": 68.88, + "end": 69.22, + "confidence": 0.932 + }, + { + "text": "à", + "start": 69.22, + "end": 69.34, + "confidence": 0.792 + }, + { + "text": "l'objet.", + "start": 69.34, + "end": 69.74, + "confidence": 0.889 + } + ] + }, + { + "id": 21, + "seek": 5548, + "start": 70.18, + "end": 74.78, + "text": " C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs", + "tokens": [ + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 10521, + 537, + 394, + 6992, + 421, + 6, + 266, + 257, + 9995, + 368, + 11, + 5173, + 476, + 1266, + 11, + 1512, + 33643, + 25929 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.491, + "words": [ + { + "text": "C'est", + "start": 70.18, + "end": 70.34, + "confidence": 0.907 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.72, + "confidence": 0.527 + }, + { + "text": "éterréciant", + "start": 70.72, + "end": 71.56, + "confidence": 0.277 + }, + { + "text": "parce", + "start": 71.56, + "end": 71.82, + "confidence": 0.298 + }, + { + "text": "qu'on", + "start": 71.82, + "end": 72.3, + "confidence": 0.775 + }, + { + "text": "a", + "start": 72.3, + "end": 72.4, + "confidence": 0.563 + }, + { + "text": "impression", + "start": 72.4, + "end": 72.74, + "confidence": 0.148 + }, + { + "text": "de,", + "start": 72.74, + "end": 73.36, + "confidence": 0.211 + }, + { + "text": "comme", + "start": 73.36, + "end": 73.8, + "confidence": 0.843 + }, + { + "text": "le", + "start": 73.8, + "end": 73.96, + "confidence": 0.9 + }, + { + "text": "10,", + "start": 73.96, + "end": 74.28, + "confidence": 0.395 + }, + { + "text": "les", + "start": 74.28, + "end": 74.32, + "confidence": 0.581 + }, + { + "text": "utilisateurs", + "start": 74.32, + "end": 74.78, + "confidence": 0.745 + } + ] + }, + { + "id": 22, + "seek": 5548, + "start": 74.8, + "end": 77.93, + "text": " et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de", + "tokens": [ + 1030, + 1512, + 6484, + 11, + 23576, + 257, + 1078, + 45768, + 394, + 368, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 11, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.464, + "words": [ + { + "text": "et", + "start": 74.8, + "end": 74.92, + "confidence": 0.147 + }, + { + "text": "les", + "start": 74.92, + "end": 74.96, + "confidence": 0.242 + }, + { + "text": "efforts,", + "start": 74.96, + "end": 75.22, + "confidence": 0.108 + }, + { + "text": "elles", + "start": 75.22, + "end": 75.32, + "confidence": 0.108 + }, + { + "text": "aident", + "start": 75.32, + "end": 75.44, + "confidence": 0.289 + }, + { + "text": "dépendant", + "start": 75.44, + "end": 76.0, + "confidence": 0.576 + }, + { + "text": "de", + "start": 76.0, + "end": 76.16, + "confidence": 0.301 + }, + { + "text": "cette", + "start": 76.16, + "end": 76.2, + "confidence": 0.519 + }, + { + "text": "objet", + "start": 76.2, + "end": 76.48, + "confidence": 0.961 + }, + { + "text": "d'un", + "start": 76.48, + "end": 76.86, + "confidence": 0.883 + }, + { + "text": "lieu,", + "start": 76.86, + "end": 77.06, + "confidence": 0.333 + }, + { + "text": "en", + "start": 77.06, + "end": 77.18, + "confidence": 0.837 + }, + { + "text": "fait,", + "start": 77.18, + "end": 77.42, + "confidence": 0.958 + }, + { + "text": "une", + "start": 77.42, + "end": 77.46, + "confidence": 0.57 + }, + { + "text": "espèce", + "start": 77.46, + "end": 77.74, + "confidence": 0.961 + }, + { + "text": "de", + "start": 77.74, + "end": 77.93, + "confidence": 0.601 + } + ] + }, + { + "id": 23, + "seek": 5548, + "start": 77.93, + "end": 82.98, + "text": " relation de médiation avec le monde qui rendent un peu avec la même sédiforme de", + "tokens": [ + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 517, + 5604, + 4163, + 635, + 5698, + 262, + 7811, + 8629, + 68, + 368 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.556, + "words": [ + { + "text": "relation", + "start": 77.93, + "end": 78.32, + "confidence": 0.839 + }, + { + "text": "de", + "start": 78.32, + "end": 78.7, + "confidence": 0.723 + }, + { + "text": "médiation", + "start": 78.7, + "end": 79.4, + "confidence": 0.872 + }, + { + "text": "avec", + "start": 79.4, + "end": 79.74, + "confidence": 0.964 + }, + { + "text": "le", + "start": 79.74, + "end": 79.88, + "confidence": 0.985 + }, + { + "text": "monde", + "start": 79.88, + "end": 80.24, + "confidence": 0.909 + }, + { + "text": "qui", + "start": 80.24, + "end": 81.02, + "confidence": 0.871 + }, + { + "text": "rendent", + "start": 81.02, + "end": 81.74, + "confidence": 0.623 + }, + { + "text": "un", + "start": 81.74, + "end": 81.84, + "confidence": 0.224 + }, + { + "text": "peu", + "start": 81.84, + "end": 81.88, + "confidence": 0.223 + }, + { + "text": "avec", + "start": 81.88, + "end": 82.1, + "confidence": 0.863 + }, + { + "text": "la", + "start": 82.1, + "end": 82.24, + "confidence": 0.565 + }, + { + "text": "même", + "start": 82.24, + "end": 82.32, + "confidence": 0.384 + }, + { + "text": "sédiforme", + "start": 82.32, + "end": 82.86, + "confidence": 0.29 + }, + { + "text": "de", + "start": 82.86, + "end": 82.98, + "confidence": 0.899 + } + ] + }, + { + "id": 24, + "seek": 8298, + "start": 83.0, + "end": 87.66, + "text": " le jeu. Donc, à objets inédits, rapport inédits.", + "tokens": [ + 476, + 16748, + 13, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.614, + "words": [ + { + "text": "le", + "start": 83.0, + "end": 83.12, + "confidence": 0.128 + }, + { + "text": "jeu.", + "start": 83.12, + "end": 83.64, + "confidence": 0.505 + }, + { + "text": "Donc,", + "start": 83.64, + "end": 84.48, + "confidence": 0.513 + }, + { + "text": "à", + "start": 84.48, + "end": 84.86, + "confidence": 0.634 + }, + { + "text": "objets", + "start": 84.86, + "end": 85.36, + "confidence": 0.547 + }, + { + "text": "inédits,", + "start": 85.36, + "end": 86.24, + "confidence": 0.73 + }, + { + "text": "rapport", + "start": 86.24, + "end": 86.62, + "confidence": 0.928 + }, + { + "text": "inédits.", + "start": 86.62, + "end": 87.66, + "confidence": 0.921 + } + ] + }, + { + "id": 25, + "seek": 8298, + "start": 88.08, + "end": 93.78, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses", + "tokens": [ + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 27998, + 9085 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.624, + "words": [ + { + "text": "Et,", + "start": 88.08, + "end": 88.3, + "confidence": 0.87 + }, + { + "text": "ce", + "start": 88.3, + "end": 88.8, + "confidence": 0.462 + }, + { + "text": "rapport,", + "start": 88.8, + "end": 89.28, + "confidence": 0.997 + }, + { + "text": "si", + "start": 89.28, + "end": 89.56, + "confidence": 0.913 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.771 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.84, + "confidence": 0.313 + }, + { + "text": "Nicolas,", + "start": 89.84, + "end": 90.54, + "confidence": 0.358 + }, + { + "text": "frère", + "start": 90.54, + "end": 91.06, + "confidence": 0.405 + }, + { + "text": "caractérisée", + "start": 91.06, + "end": 91.7, + "confidence": 0.567 + }, + { + "text": "par", + "start": 91.7, + "end": 92.12, + "confidence": 0.868 + }, + { + "text": "un", + "start": 92.12, + "end": 92.32, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.32, + "end": 92.96, + "confidence": 0.932 + }, + { + "text": "de", + "start": 92.96, + "end": 93.24, + "confidence": 0.812 + }, + { + "text": "dépenses", + "start": 93.24, + "end": 93.78, + "confidence": 0.404 + } + ] + }, + { + "id": 26, + "seek": 8298, + "start": 94.36, + "end": 94.98, + "text": " et de rojets.", + "tokens": [ + 1030, + 368, + 744, + 73, + 1385, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.583, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.986 + }, + { + "text": "de", + "start": 94.52, + "end": 94.56, + "confidence": 0.996 + }, + { + "text": "rojets.", + "start": 94.56, + "end": 94.98, + "confidence": 0.41 + } + ] + }, + { + "id": 27, + "seek": 8298, + "start": 95.8, + "end": 100.4, + "text": " Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes", + "tokens": [ + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 5732, + 962, + 1712, + 14953, + 287, + 6, + 29093, + 730, + 2657, + 279 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.749, + "words": [ + { + "text": "Bon,", + "start": 95.8, + "end": 96.06, + "confidence": 0.71 + }, + { + "text": "en", + "start": 96.06, + "end": 96.54, + "confidence": 0.92 + }, + { + "text": "vrai,", + "start": 96.54, + "end": 96.88, + "confidence": 0.994 + }, + { + "text": "il", + "start": 96.88, + "end": 97.1, + "confidence": 0.985 + }, + { + "text": "faudrait", + "start": 97.1, + "end": 97.5, + "confidence": 0.932 + }, + { + "text": "remonter", + "start": 97.5, + "end": 98.02, + "confidence": 0.574 + }, + { + "text": "très", + "start": 98.02, + "end": 98.46, + "confidence": 0.984 + }, + { + "text": "très", + "start": 98.46, + "end": 98.76, + "confidence": 0.597 + }, + { + "text": "finement", + "start": 98.76, + "end": 99.36, + "confidence": 0.476 + }, + { + "text": "toute", + "start": 99.36, + "end": 99.68, + "confidence": 0.367 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.02, + "confidence": 0.909 + }, + { + "text": "des", + "start": 100.02, + "end": 100.2, + "confidence": 0.949 + }, + { + "text": "objectes", + "start": 100.2, + "end": 100.4, + "confidence": 0.805 + } + ] + }, + { + "id": 28, + "seek": 8298, + "start": 100.4, + "end": 105.14, + "text": " techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement", + "tokens": [ + 7512, + 1030, + 368, + 9580, + 1536, + 526, + 2405, + 2680, + 476, + 4941, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.665, + "words": [ + { + "text": "techniques", + "start": 100.4, + "end": 101.02, + "confidence": 0.954 + }, + { + "text": "et", + "start": 101.02, + "end": 101.52, + "confidence": 0.967 + }, + { + "text": "de", + "start": 101.52, + "end": 101.64, + "confidence": 0.973 + }, + { + "text": "leur", + "start": 101.64, + "end": 101.78, + "confidence": 0.811 + }, + { + "text": "infération", + "start": 101.78, + "end": 102.22, + "confidence": 0.227 + }, + { + "text": "dans", + "start": 102.22, + "end": 102.46, + "confidence": 0.518 + }, + { + "text": "le", + "start": 102.46, + "end": 102.54, + "confidence": 0.511 + }, + { + "text": "vie", + "start": 102.54, + "end": 102.78, + "confidence": 0.612 + }, + { + "text": "pour", + "start": 102.78, + "end": 103.06, + "confidence": 0.933 + }, + { + "text": "déterminer", + "start": 103.06, + "end": 103.64, + "confidence": 0.954 + }, + { + "text": "si", + "start": 103.64, + "end": 103.74, + "confidence": 0.488 + }, + { + "text": "ce", + "start": 103.74, + "end": 103.86, + "confidence": 0.98 + }, + { + "text": "rapport", + "start": 103.86, + "end": 104.1, + "confidence": 0.997 + }, + { + "text": "est", + "start": 104.1, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.14, + "confidence": 0.854 + } + ] + }, + { + "id": 29, + "seek": 8298, + "start": 105.26, + "end": 105.78, + "text": " inédit.", + "tokens": [ + 294, + 7811, + 270, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.943, + "words": [ + { + "text": "inédit.", + "start": 105.26, + "end": 105.78, + "confidence": 0.943 + } + ] + }, + { + "id": 30, + "seek": 8298, + "start": 106.14, + "end": 109.36, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4346100035167876, + "compression_ratio": 1.5907335907335907, + "no_speech_prob": 3.121094050584361e-05, + "confidence": 0.798, + "words": [ + { + "text": "Mais", + "start": 106.14, + "end": 106.34, + "confidence": 0.872 + }, + { + "text": "j'ai", + "start": 106.34, + "end": 106.9, + "confidence": 0.924 + }, + { + "text": "l'impression", + "start": 106.9, + "end": 107.28, + "confidence": 0.967 + }, + { + "text": "comme", + "start": 107.28, + "end": 107.5, + "confidence": 0.709 + }, + { + "text": "ça", + "start": 107.5, + "end": 107.68, + "confidence": 0.955 + }, + { + "text": "que", + "start": 107.68, + "end": 107.96, + "confidence": 0.91 + }, + { + "text": "Nicolas", + "start": 107.96, + "end": 108.36, + "confidence": 0.986 + }, + { + "text": "se", + "start": 108.36, + "end": 108.66, + "confidence": 0.569 + }, + { + "text": "trompe", + "start": 108.66, + "end": 109.0, + "confidence": 0.532 + }, + { + "text": "pas", + "start": 109.0, + "end": 109.1, + "confidence": 0.715 + }, + { + "text": "vraiment.", + "start": 109.1, + "end": 109.36, + "confidence": 0.923 + } + ] + }, + { + "id": 31, + "seek": 10970, + "start": 109.72, + "end": 114.26, + "text": " Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même", + "tokens": [ + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.829, + "words": [ + { + "text": "Pour", + "start": 109.72, + "end": 110.08, + "confidence": 0.92 + }, + { + "text": "autant,", + "start": 110.08, + "end": 110.36, + "confidence": 0.986 + }, + { + "text": "je", + "start": 110.36, + "end": 110.4, + "confidence": 0.894 + }, + { + "text": "sache.", + "start": 110.4, + "end": 110.86, + "confidence": 0.531 + }, + { + "text": "Il", + "start": 110.86, + "end": 111.16, + "confidence": 0.851 + }, + { + "text": "y", + "start": 111.16, + "end": 111.22, + "confidence": 0.872 + }, + { + "text": "a", + "start": 111.22, + "end": 111.28, + "confidence": 0.939 + }, + { + "text": "eu", + "start": 111.28, + "end": 111.38, + "confidence": 0.891 + }, + { + "text": "plein", + "start": 111.38, + "end": 111.76, + "confidence": 0.833 + }, + { + "text": "de", + "start": 111.76, + "end": 112.04, + "confidence": 0.94 + }, + { + "text": "discussions", + "start": 112.04, + "end": 112.46, + "confidence": 0.711 + }, + { + "text": "autour", + "start": 112.46, + "end": 112.96, + "confidence": 0.97 + }, + { + "text": "de", + "start": 112.96, + "end": 113.46, + "confidence": 0.952 + }, + { + "text": "la", + "start": 113.46, + "end": 113.5, + "confidence": 0.955 + }, + { + "text": "voiture", + "start": 113.5, + "end": 113.8, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.8, + "end": 114.02, + "confidence": 0.523 + }, + { + "text": "même", + "start": 114.02, + "end": 114.26, + "confidence": 0.963 + } + ] + }, + { + "id": 32, + "seek": 10970, + "start": 114.42, + "end": 118.76, + "text": " du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en", + "tokens": [ + 1581, + 47159, + 13, + 6313, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.732, + "words": [ + { + "text": "du", + "start": 114.42, + "end": 114.6, + "confidence": 0.677 + }, + { + "text": "téléphone.", + "start": 114.6, + "end": 115.16, + "confidence": 0.983 + }, + { + "text": "Mais", + "start": 115.16, + "end": 115.72, + "confidence": 0.649 + }, + { + "text": "la", + "start": 115.72, + "end": 116.0, + "confidence": 0.809 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.38, + "confidence": 0.766 + }, + { + "text": "n'était", + "start": 116.38, + "end": 116.62, + "confidence": 0.952 + }, + { + "text": "pas", + "start": 116.62, + "end": 117.02, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.02, + "end": 117.16, + "confidence": 0.98 + }, + { + "text": "même", + "start": 117.16, + "end": 117.32, + "confidence": 0.944 + }, + { + "text": "mort,", + "start": 117.32, + "end": 117.66, + "confidence": 0.519 + }, + { + "text": "donc", + "start": 117.66, + "end": 117.78, + "confidence": 0.871 + }, + { + "text": "le", + "start": 117.78, + "end": 118.32, + "confidence": 0.932 + }, + { + "text": "rejet", + "start": 118.32, + "end": 118.62, + "confidence": 0.524 + }, + { + "text": "n'en", + "start": 118.62, + "end": 118.76, + "confidence": 0.472 + } + ] + }, + { + "id": 33, + "seek": 10970, + "start": 118.76, + "end": 119.66, + "text": " plus n'était pas du même mort.", + "tokens": [ + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.853, + "words": [ + { + "text": "plus", + "start": 118.76, + "end": 118.9, + "confidence": 0.313 + }, + { + "text": "n'était", + "start": 118.9, + "end": 119.1, + "confidence": 0.973 + }, + { + "text": "pas", + "start": 119.1, + "end": 119.26, + "confidence": 0.998 + }, + { + "text": "du", + "start": 119.26, + "end": 119.36, + "confidence": 0.988 + }, + { + "text": "même", + "start": 119.36, + "end": 119.5, + "confidence": 0.999 + }, + { + "text": "mort.", + "start": 119.5, + "end": 119.66, + "confidence": 0.984 + } + ] + }, + { + "id": 34, + "seek": 10970, + "start": 120.06, + "end": 122.94, + "text": " On peut adorer sa bagnure, en avoir besoin pour plein de choses.", + "tokens": [ + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 11, + 465, + 10853, + 19207, + 2016, + 21088, + 368, + 14488, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.753, + "words": [ + { + "text": "On", + "start": 120.06, + "end": 120.22, + "confidence": 0.969 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.32, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.32, + "end": 120.64, + "confidence": 0.854 + }, + { + "text": "sa", + "start": 120.64, + "end": 120.86, + "confidence": 0.918 + }, + { + "text": "bagnure,", + "start": 120.86, + "end": 121.38, + "confidence": 0.368 + }, + { + "text": "en", + "start": 121.38, + "end": 121.52, + "confidence": 0.56 + }, + { + "text": "avoir", + "start": 121.52, + "end": 121.66, + "confidence": 0.969 + }, + { + "text": "besoin", + "start": 121.66, + "end": 122.08, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 122.08, + "end": 122.34, + "confidence": 0.92 + }, + { + "text": "plein", + "start": 122.34, + "end": 122.64, + "confidence": 0.9 + }, + { + "text": "de", + "start": 122.64, + "end": 122.78, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.78, + "end": 122.94, + "confidence": 0.994 + } + ] + }, + { + "id": 35, + "seek": 10970, + "start": 123.36, + "end": 126.38, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.826, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.48, + "confidence": 0.557 + }, + { + "text": "là,", + "start": 123.48, + "end": 123.66, + "confidence": 0.527 + }, + { + "text": "le", + "start": 123.66, + "end": 124.02, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.02, + "end": 124.5, + "confidence": 0.966 + }, + { + "text": "quand", + "start": 124.5, + "end": 124.9, + "confidence": 0.774 + }, + { + "text": "on", + "start": 124.9, + "end": 125.02, + "confidence": 0.986 + }, + { + "text": "va", + "start": 125.02, + "end": 125.1, + "confidence": 0.968 + }, + { + "text": "se", + "start": 125.1, + "end": 125.2, + "confidence": 0.862 + }, + { + "text": "coucher,", + "start": 125.2, + "end": 125.64, + "confidence": 0.791 + }, + { + "text": "on", + "start": 125.64, + "end": 126.04, + "confidence": 0.974 + }, + { + "text": "la", + "start": 126.04, + "end": 126.12, + "confidence": 0.783 + }, + { + "text": "laisse.", + "start": 126.12, + "end": 126.38, + "confidence": 0.981 + } + ] + }, + { + "id": 36, + "seek": 10970, + "start": 127.06, + "end": 130.0, + "text": " On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot.", + "tokens": [ + 1282, + 287, + 6, + 64, + 1736, + 2680, + 635, + 2135, + 11, + 6932, + 322, + 871, + 1263, + 271, + 11, + 6932, + 322, + 297, + 6, + 268, + 275, + 18832, + 1736, + 1609, + 417, + 6471, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.66, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.3, + "confidence": 0.742 + }, + { + "text": "l'a", + "start": 127.3, + "end": 127.46, + "confidence": 0.714 + }, + { + "text": "pas", + "start": 127.46, + "end": 127.64, + "confidence": 0.99 + }, + { + "text": "dans", + "start": 127.64, + "end": 127.82, + "confidence": 0.967 + }, + { + "text": "la", + "start": 127.82, + "end": 127.94, + "confidence": 0.956 + }, + { + "text": "main,", + "start": 127.94, + "end": 128.3, + "confidence": 0.978 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.44, + "confidence": 0.967 + }, + { + "text": "on", + "start": 128.44, + "end": 128.6, + "confidence": 0.993 + }, + { + "text": "est", + "start": 128.6, + "end": 128.66, + "confidence": 0.602 + }, + { + "text": "collis,", + "start": 128.66, + "end": 129.2, + "confidence": 0.346 + }, + { + "text": "quand", + "start": 129.2, + "end": 129.24, + "confidence": 0.524 + }, + { + "text": "on", + "start": 129.24, + "end": 129.32, + "confidence": 0.991 + }, + { + "text": "n'en", + "start": 129.32, + "end": 129.38, + "confidence": 0.537 + }, + { + "text": "mène", + "start": 129.38, + "end": 129.56, + "confidence": 0.434 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.72, + "confidence": 0.998 + }, + { + "text": "au", + "start": 129.72, + "end": 129.82, + "confidence": 0.811 + }, + { + "text": "chiot.", + "start": 129.82, + "end": 130.0, + "confidence": 0.473 + } + ] + }, + { + "id": 37, + "seek": 10970, + "start": 130.88, + "end": 135.1, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une", + "tokens": [ + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.672, + "words": [ + { + "text": "On", + "start": 130.88, + "end": 131.0, + "confidence": 0.991 + }, + { + "text": "pouvait", + "start": 131.0, + "end": 131.24, + "confidence": 0.532 + }, + { + "text": "être", + "start": 131.24, + "end": 131.7, + "confidence": 0.545 + }, + { + "text": "émervé", + "start": 131.7, + "end": 132.2, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.2, + "end": 132.42, + "confidence": 0.82 + }, + { + "text": "son", + "start": 132.42, + "end": 132.68, + "confidence": 0.448 + }, + { + "text": "mome", + "start": 132.68, + "end": 133.04, + "confidence": 0.261 + }, + { + "text": "qui", + "start": 133.04, + "end": 133.26, + "confidence": 0.901 + }, + { + "text": "occupeait", + "start": 133.26, + "end": 133.72, + "confidence": 0.641 + }, + { + "text": "la", + "start": 133.72, + "end": 133.8, + "confidence": 0.8 + }, + { + "text": "ligne", + "start": 133.8, + "end": 134.0, + "confidence": 0.976 + }, + { + "text": "de", + "start": 134.0, + "end": 134.14, + "confidence": 0.964 + }, + { + "text": "téléphone", + "start": 134.14, + "end": 134.44, + "confidence": 0.977 + }, + { + "text": "pendant", + "start": 134.44, + "end": 134.8, + "confidence": 0.92 + }, + { + "text": "une", + "start": 134.8, + "end": 135.1, + "confidence": 0.838 + } + ] + }, + { + "id": 38, + "seek": 10970, + "start": 135.1, + "end": 136.84, + "text": " heure chaque soir pour discuter avec un copain.", + "tokens": [ + 30027, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.394765736144266, + "compression_ratio": 1.7249190938511327, + "no_speech_prob": 9.048193533089943e-06, + "confidence": 0.922, + "words": [ + { + "text": "heure", + "start": 135.1, + "end": 135.36, + "confidence": 0.701 + }, + { + "text": "chaque", + "start": 135.36, + "end": 135.52, + "confidence": 0.957 + }, + { + "text": "soir", + "start": 135.52, + "end": 135.76, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.76, + "end": 135.94, + "confidence": 0.983 + }, + { + "text": "discuter", + "start": 135.94, + "end": 136.26, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.26, + "end": 136.44, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.44, + "end": 136.6, + "confidence": 0.976 + }, + { + "text": "copain.", + "start": 136.6, + "end": 136.84, + "confidence": 0.948 + } + ] + }, + { + "id": 39, + "seek": 13702, + "start": 137.26, + "end": 141.8, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui", + "tokens": [ + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.716, + "words": [ + { + "text": "Mais", + "start": 137.26, + "end": 137.52, + "confidence": 0.967 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.6, + "confidence": 0.859 + }, + { + "text": "ne", + "start": 137.6, + "end": 137.68, + "confidence": 0.681 + }, + { + "text": "ressemble", + "start": 137.68, + "end": 138.14, + "confidence": 0.743 + }, + { + "text": "pas", + "start": 138.14, + "end": 138.66, + "confidence": 0.524 + }, + { + "text": "à", + "start": 138.66, + "end": 138.94, + "confidence": 0.978 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.02, + "confidence": 0.605 + }, + { + "text": "qu'on", + "start": 139.02, + "end": 139.16, + "confidence": 0.961 + }, + { + "text": "peut", + "start": 139.16, + "end": 139.3, + "confidence": 0.586 + }, + { + "text": "ressentir", + "start": 139.3, + "end": 140.04, + "confidence": 0.898 + }, + { + "text": "à", + "start": 140.04, + "end": 140.24, + "confidence": 0.352 + }, + { + "text": "voir", + "start": 140.24, + "end": 140.48, + "confidence": 0.918 + }, + { + "text": "même", + "start": 140.48, + "end": 140.86, + "confidence": 0.324 + }, + { + "text": "mome", + "start": 140.86, + "end": 141.18, + "confidence": 0.374 + }, + { + "text": "aujourd'hui", + "start": 141.18, + "end": 141.8, + "confidence": 0.949 + } + ] + }, + { + "id": 40, + "seek": 13702, + "start": 141.92, + "end": 145.76, + "text": " continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 2993, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.584, + "words": [ + { + "text": "continuuellement", + "start": 141.92, + "end": 142.8, + "confidence": 0.327 + }, + { + "text": "avec", + "start": 142.8, + "end": 143.18, + "confidence": 0.903 + }, + { + "text": "son", + "start": 143.18, + "end": 143.36, + "confidence": 0.866 + }, + { + "text": "smartphone", + "start": 143.36, + "end": 143.64, + "confidence": 0.467 + }, + { + "text": "dans", + "start": 143.64, + "end": 143.92, + "confidence": 0.554 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.972 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.26, + "confidence": 0.997 + }, + { + "text": "comme", + "start": 144.26, + "end": 144.52, + "confidence": 0.95 + }, + { + "text": "c'était", + "start": 144.52, + "end": 144.78, + "confidence": 0.574 + }, + { + "text": "une", + "start": 144.78, + "end": 144.94, + "confidence": 0.971 + }, + { + "text": "sorte", + "start": 144.94, + "end": 145.1, + "confidence": 0.642 + }, + { + "text": "de", + "start": 145.1, + "end": 145.18, + "confidence": 0.268 + }, + { + "text": "estimateur", + "start": 145.18, + "end": 145.76, + "confidence": 0.415 + } + ] + }, + { + "id": 41, + "seek": 13702, + "start": 145.94, + "end": 148.88, + "text": " extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 1279, + 4212, + 368, + 287, + 6, + 686, + 15970, + 271, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.401, + "words": [ + { + "text": "extère", + "start": 145.94, + "end": 146.38, + "confidence": 0.357 + }, + { + "text": "de", + "start": 146.38, + "end": 146.46, + "confidence": 0.253 + }, + { + "text": "l'intempis", + "start": 146.46, + "end": 146.7, + "confidence": 0.153 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.858 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.32, + "confidence": 0.824 + }, + { + "text": "à", + "start": 147.32, + "end": 147.44, + "confidence": 0.494 + }, + { + "text": "l'éantrénée,", + "start": 147.44, + "end": 147.94, + "confidence": 0.476 + }, + { + "text": "ça", + "start": 147.94, + "end": 148.0, + "confidence": 0.772 + }, + { + "text": "m'a", + "start": 148.0, + "end": 148.26, + "confidence": 0.532 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.42, + "confidence": 0.181 + }, + { + "text": "immédiate.", + "start": 148.42, + "end": 148.88, + "confidence": 0.537 + } + ] + }, + { + "id": 42, + "seek": 13702, + "start": 149.08, + "end": 152.02, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi.", + "tokens": [ + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 2016, + 3269, + 6212, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.529, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.3, + "confidence": 0.285 + }, + { + "text": "je", + "start": 149.3, + "end": 149.34, + "confidence": 0.425 + }, + { + "text": "dis", + "start": 149.34, + "end": 149.42, + "confidence": 0.316 + }, + { + "text": "ça", + "start": 149.42, + "end": 149.62, + "confidence": 0.953 + }, + { + "text": "pour", + "start": 149.62, + "end": 149.72, + "confidence": 0.971 + }, + { + "text": "le", + "start": 149.72, + "end": 149.82, + "confidence": 0.993 + }, + { + "text": "mome,", + "start": 149.82, + "end": 150.24, + "confidence": 0.575 + }, + { + "text": "mais", + "start": 150.24, + "end": 150.44, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 150.44, + "end": 151.1, + "confidence": 0.775 + }, + { + "text": "va", + "start": 151.1, + "end": 151.28, + "confidence": 0.381 + }, + { + "text": "là", + "start": 151.28, + "end": 151.42, + "confidence": 0.633 + }, + { + "text": "pour", + "start": 151.42, + "end": 151.62, + "confidence": 0.398 + }, + { + "text": "nos", + "start": 151.62, + "end": 151.7, + "confidence": 0.807 + }, + { + "text": "aussi.", + "start": 151.7, + "end": 152.02, + "confidence": 0.145 + } + ] + }, + { + "id": 43, + "seek": 13702, + "start": 152.66, + "end": 154.4, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.851, + "words": [ + { + "text": "Donc,", + "start": 152.66, + "end": 153.0, + "confidence": 0.985 + }, + { + "text": "rapport", + "start": 153.0, + "end": 153.42, + "confidence": 0.977 + }, + { + "text": "immédiate", + "start": 153.42, + "end": 154.26, + "confidence": 0.825 + }, + { + "text": "d'accord.", + "start": 154.26, + "end": 154.4, + "confidence": 0.806 + } + ] + }, + { + "id": 44, + "seek": 13702, + "start": 154.4, + "end": 157.95, + "text": " Mais pourquoi, à ton impression qu'on en sortira jamais?", + "tokens": [ + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 14540, + 30 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.791, + "words": [ + { + "text": "Mais", + "start": 154.4, + "end": 155.9, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.9, + "end": 156.36, + "confidence": 0.959 + }, + { + "text": "à", + "start": 156.36, + "end": 156.48, + "confidence": 0.821 + }, + { + "text": "ton", + "start": 156.48, + "end": 156.66, + "confidence": 0.952 + }, + { + "text": "impression", + "start": 156.66, + "end": 156.96, + "confidence": 0.932 + }, + { + "text": "qu'on", + "start": 156.96, + "end": 157.28, + "confidence": 0.906 + }, + { + "text": "en", + "start": 157.28, + "end": 157.34, + "confidence": 0.789 + }, + { + "text": "sortira", + "start": 157.34, + "end": 157.84, + "confidence": 0.543 + }, + { + "text": "jamais?", + "start": 157.84, + "end": 157.95, + "confidence": 0.503 + } + ] + }, + { + "id": 45, + "seek": 13702, + "start": 157.95, + "end": 162.37, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette", + "tokens": [ + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550 + ], + "temperature": 0.0, + "avg_logprob": -0.5734858703613281, + "compression_ratio": 1.5562700964630225, + "no_speech_prob": 3.1260256037057843e-06, + "confidence": 0.82, + "words": [ + { + "text": "Et", + "start": 157.95, + "end": 159.26, + "confidence": 0.838 + }, + { + "text": "puis,", + "start": 159.26, + "end": 159.58, + "confidence": 0.481 + }, + { + "text": "il", + "start": 159.58, + "end": 159.62, + "confidence": 0.665 + }, + { + "text": "faut", + "start": 159.62, + "end": 159.66, + "confidence": 0.942 + }, + { + "text": "en", + "start": 159.66, + "end": 159.72, + "confidence": 0.925 + }, + { + "text": "remettre", + "start": 159.72, + "end": 160.1, + "confidence": 0.985 + }, + { + "text": "la", + "start": 160.1, + "end": 160.28, + "confidence": 0.702 + }, + { + "text": "faute", + "start": 160.28, + "end": 160.62, + "confidence": 0.55 + }, + { + "text": "sur", + "start": 160.62, + "end": 160.9, + "confidence": 0.954 + }, + { + "text": "les", + "start": 160.9, + "end": 161.22, + "confidence": 0.81 + }, + { + "text": "gens", + "start": 161.22, + "end": 161.42, + "confidence": 0.985 + }, + { + "text": "qui", + "start": 161.42, + "end": 161.58, + "confidence": 0.981 + }, + { + "text": "ont", + "start": 161.58, + "end": 161.62, + "confidence": 0.955 + }, + { + "text": "créé", + "start": 161.62, + "end": 162.3, + "confidence": 0.957 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.37, + "confidence": 0.9 + } + ] + }, + { + "id": 46, + "seek": 16228, + "start": 162.37, + "end": 165.3, + "text": " route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux.", + "tokens": [ + 7955, + 3551, + 303, + 3409, + 2449, + 1030, + 33227, + 401, + 1925, + 11, + 1956, + 257, + 6176, + 1531, + 748, + 285, + 1925, + 971, + 45781, + 11, + 3551, + 303, + 3409, + 2449, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.397, + "words": [ + { + "text": "route", + "start": 162.37, + "end": 162.6, + "confidence": 0.076 + }, + { + "text": "merveilleux", + "start": 162.6, + "end": 163.3, + "confidence": 0.635 + }, + { + "text": "et", + "start": 163.3, + "end": 163.42, + "confidence": 0.837 + }, + { + "text": "diabolique,", + "start": 163.42, + "end": 163.82, + "confidence": 0.328 + }, + { + "text": "qui", + "start": 163.82, + "end": 163.9, + "confidence": 0.365 + }, + { + "text": "a", + "start": 163.9, + "end": 163.98, + "confidence": 0.054 + }, + { + "text": "dit", + "start": 163.98, + "end": 164.06, + "confidence": 0.085 + }, + { + "text": "à", + "start": 164.06, + "end": 164.1, + "confidence": 0.433 + }, + { + "text": "bollique", + "start": 164.1, + "end": 164.3, + "confidence": 0.297 + }, + { + "text": "par", + "start": 164.3, + "end": 164.52, + "confidence": 0.634 + }, + { + "text": "coeur,", + "start": 164.52, + "end": 164.78, + "confidence": 0.361 + }, + { + "text": "merveilleux.", + "start": 164.78, + "end": 165.3, + "confidence": 0.982 + } + ] + }, + { + "id": 47, + "seek": 16228, + "start": 167.36, + "end": 168.7, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.748, + "words": [ + { + "text": "Les", + "start": 167.36, + "end": 167.4, + "confidence": 0.513 + }, + { + "text": "économistes", + "start": 167.4, + "end": 167.5, + "confidence": 0.937 + }, + { + "text": "parlent", + "start": 167.5, + "end": 167.78, + "confidence": 0.806 + }, + { + "text": "de", + "start": 167.78, + "end": 167.82, + "confidence": 0.855 + }, + { + "text": "dépendance", + "start": 167.82, + "end": 168.32, + "confidence": 0.758 + }, + { + "text": "du", + "start": 168.32, + "end": 168.5, + "confidence": 0.979 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.7, + "confidence": 0.389 + } + ] + }, + { + "id": 48, + "seek": 16228, + "start": 168.84, + "end": 172.66, + "text": " Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine,", + "tokens": [ + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 6302, + 11, + 269, + 6, + 377, + 517, + 12703, + 1108, + 1433, + 533, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.7643054464588994, + "compression_ratio": 1.544041450777202, + "no_speech_prob": 3.89045562769752e-05, + "confidence": 0.487, + "words": [ + { + "text": "Ces", + "start": 168.84, + "end": 169.14, + "confidence": 0.531 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.5, + "confidence": 0.725 + }, + { + "text": "en", + "start": 169.5, + "end": 169.62, + "confidence": 0.757 + }, + { + "text": "fait,", + "start": 169.62, + "end": 169.68, + "confidence": 0.974 + }, + { + "text": "on", + "start": 169.68, + "end": 169.74, + "confidence": 0.306 + }, + { + "text": "est", + "start": 169.74, + "end": 169.86, + "confidence": 0.82 + }, + { + "text": "un", + "start": 169.86, + "end": 170.16, + "confidence": 0.245 + }, + { + "text": "santé", + "start": 170.16, + "end": 170.72, + "confidence": 0.943 + }, + { + "text": "qui", + "start": 170.72, + "end": 170.88, + "confidence": 0.839 + }, + { + "text": "a", + "start": 170.88, + "end": 170.96, + "confidence": 0.533 + }, + { + "text": "été", + "start": 170.96, + "end": 171.08, + "confidence": 0.992 + }, + { + "text": "établie,", + "start": 171.08, + "end": 171.6, + "confidence": 0.316 + }, + { + "text": "c'est", + "start": 171.6, + "end": 171.74, + "confidence": 0.55 + }, + { + "text": "un", + "start": 171.74, + "end": 171.88, + "confidence": 0.651 + }, + { + "text": "soit", + "start": 171.88, + "end": 172.12, + "confidence": 0.284 + }, + { + "text": "mon", + "start": 172.12, + "end": 172.28, + "confidence": 0.225 + }, + { + "text": "termine,", + "start": 172.28, + "end": 172.66, + "confidence": 0.288 + } + ] + }, + { + "id": 49, + "seek": 17312, + "start": 173.14, + "end": 177.42, + "text": " soit définissant des beurs, on définisse un signalétique.", + "tokens": [ + 50364, + 12703, + 40763, + 29492, + 730, + 312, + 2156, + 11, + 322, + 40763, + 7746, + 517, + 6358, + 42379, + 13, + 51436 + ], + "temperature": 0.0, + "avg_logprob": -0.9428024291992188, + "compression_ratio": 1.0169491525423728, + "no_speech_prob": 6.687085260637105e-05, + "confidence": 0.403, + "words": [ + { + "text": "soit", + "start": 173.14, + "end": 174.04, + "confidence": 0.127 + }, + { + "text": "définissant", + "start": 174.04, + "end": 175.5, + "confidence": 0.58 + }, + { + "text": "des", + "start": 175.5, + "end": 175.68, + "confidence": 0.813 + }, + { + "text": "beurs,", + "start": 175.68, + "end": 175.96, + "confidence": 0.378 + }, + { + "text": "on", + "start": 175.96, + "end": 176.04, + "confidence": 0.165 + }, + { + "text": "définisse", + "start": 176.04, + "end": 176.42, + "confidence": 0.536 + }, + { + "text": "un", + "start": 176.42, + "end": 176.6, + "confidence": 0.266 + }, + { + "text": "signalétique.", + "start": 176.6, + "end": 177.42, + "confidence": 0.544 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/accurate_japanese.mp3.words.json b/tests/expected/tiny_auto/accurate_japanese.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..27f2495a3cd420fdfd8bf8b3d500d19111e8c5b2 --- /dev/null +++ b/tests/expected/tiny_auto/accurate_japanese.mp3.words.json @@ -0,0 +1,1642 @@ +{ + "text": "いきます ニュースタブでのサイトメイション機が 実際と違う検に関するご質問いただいております同じ度メインでデレクトリーごとに 別再度として管理上をしているサイトバリマスのサプテレクトリーごとにわけたサイトは それぞれ パブリシャーセンターに登録していくぐるニュース上ではサイトとして認識され パブリコンアイコンはサイトボトフのものが 正しく標準されますしかしGoogle検査結果のニュースタブでは パブリコンサイトメイショー機ともに 正しくない以上時にいいなりますこちらいたしても ではありません パブリコンはサブテレクトリーごとに設定した", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 6.62, + "text": "いきます ニュースタブでのサイトメイション機が 実際と違う検に関するご質問いただいております", + "tokens": [ + 50364, + 47348, + 15096, + 233, + 26167, + 3384, + 9550, + 12144, + 28889, + 2474, + 2972, + 23607, + 8040, + 7588, + 21647, + 8040, + 43891, + 4824, + 17543, + 5142, + 220, + 33197, + 34837, + 3193, + 49806, + 38739, + 250, + 4108, + 5196, + 95, + 22570, + 9991, + 43450, + 11361, + 32418, + 18549, + 6117, + 19420, + 50704 + ], + "temperature": 0.0, + "avg_logprob": -0.4374900633288968, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.19216813147068024, + "confidence": 0.559, + "words": [ + { + "text": "いきます", + "start": 0.08, + "end": 0.6, + "confidence": 0.256 + }, + { + "text": " ニ", + "start": 0.6, + "end": 1.32, + "confidence": 0.215 + }, + { + "text": "ュ", + "start": 1.32, + "end": 1.38, + "confidence": 0.98 + }, + { + "text": "ー", + "start": 1.38, + "end": 1.5, + "confidence": 0.942 + }, + { + "text": "ス", + "start": 1.5, + "end": 1.52, + "confidence": 0.963 + }, + { + "text": "タ", + "start": 1.52, + "end": 1.68, + "confidence": 0.876 + }, + { + "text": "ブ", + "start": 1.68, + "end": 1.8, + "confidence": 0.795 + }, + { + "text": "で", + "start": 1.8, + "end": 2.02, + "confidence": 0.946 + }, + { + "text": "の", + "start": 2.02, + "end": 2.28, + "confidence": 0.852 + }, + { + "text": "サ", + "start": 2.28, + "end": 2.56, + "confidence": 0.014 + }, + { + "text": "イ", + "start": 2.56, + "end": 2.66, + "confidence": 0.983 + }, + { + "text": "ト", + "start": 2.66, + "end": 2.78, + "confidence": 0.991 + }, + { + "text": "メ", + "start": 2.78, + "end": 2.94, + "confidence": 0.395 + }, + { + "text": "イ", + "start": 2.94, + "end": 3.0, + "confidence": 0.241 + }, + { + "text": "ショ", + "start": 3.0, + "end": 3.14, + "confidence": 0.895 + }, + { + "text": "ン", + "start": 3.14, + "end": 3.2, + "confidence": 0.981 + }, + { + "text": "機", + "start": 3.2, + "end": 3.34, + "confidence": 0.381 + }, + { + "text": "が", + "start": 3.34, + "end": 3.5, + "confidence": 0.646 + }, + { + "text": " ", + "start": 3.5, + "end": 3.64, + "confidence": 0.231 + }, + { + "text": "実", + "start": 3.64, + "end": 3.76, + "confidence": 0.766 + }, + { + "text": "際", + "start": 3.76, + "end": 3.9, + "confidence": 0.981 + }, + { + "text": "と", + "start": 3.9, + "end": 4.08, + "confidence": 0.921 + }, + { + "text": "違う", + "start": 4.08, + "end": 4.38, + "confidence": 0.904 + }, + { + "text": "検", + "start": 4.38, + "end": 4.64, + "confidence": 0.313 + }, + { + "text": "に", + "start": 4.64, + "end": 4.74, + "confidence": 0.195 + }, + { + "text": "関", + "start": 4.74, + "end": 4.94, + "confidence": 0.497 + }, + { + "text": "する", + "start": 4.94, + "end": 5.12, + "confidence": 0.99 + }, + { + "text": "ご", + "start": 5.12, + "end": 5.3, + "confidence": 0.648 + }, + { + "text": "質", + "start": 5.3, + "end": 5.46, + "confidence": 0.869 + }, + { + "text": "問", + "start": 5.46, + "end": 5.62, + "confidence": 0.993 + }, + { + "text": "いただ", + "start": 5.62, + "end": 5.92, + "confidence": 0.94 + }, + { + "text": "いて", + "start": 5.92, + "end": 6.14, + "confidence": 0.981 + }, + { + "text": "お", + "start": 6.14, + "end": 6.26, + "confidence": 0.827 + }, + { + "text": "ります", + "start": 6.26, + "end": 6.62, + "confidence": 0.973 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.94, + "end": 13.58, + "text": "同じ度メインでデレクトリーごとに 別再度として管理上をしているサイトバリマスの", + "tokens": [ + 50704, + 13089, + 9257, + 13127, + 21647, + 8040, + 4824, + 2474, + 31327, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 23131, + 13876, + 5708, + 5998, + 8822, + 22979, + 23607, + 8040, + 7588, + 18593, + 12376, + 13258, + 9550, + 2972, + 51044 + ], + "temperature": 0.0, + "avg_logprob": -0.4374900633288968, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.19216813147068024, + "confidence": 0.525, + "words": [ + { + "text": "同", + "start": 6.94, + "end": 7.28, + "confidence": 0.469 + }, + { + "text": "じ", + "start": 7.28, + "end": 7.44, + "confidence": 0.218 + }, + { + "text": "度", + "start": 7.44, + "end": 7.62, + "confidence": 0.139 + }, + { + "text": "メ", + "start": 7.62, + "end": 7.74, + "confidence": 0.209 + }, + { + "text": "イ", + "start": 7.74, + "end": 7.82, + "confidence": 0.904 + }, + { + "text": "ン", + "start": 7.82, + "end": 7.84, + "confidence": 0.911 + }, + { + "text": "で", + "start": 7.84, + "end": 8.0, + "confidence": 0.979 + }, + { + "text": "デ", + "start": 8.0, + "end": 8.14, + "confidence": 0.37 + }, + { + "text": "レ", + "start": 8.14, + "end": 8.28, + "confidence": 0.674 + }, + { + "text": "ク", + "start": 8.28, + "end": 8.36, + "confidence": 0.974 + }, + { + "text": "ト", + "start": 8.36, + "end": 8.5, + "confidence": 0.979 + }, + { + "text": "リ", + "start": 8.5, + "end": 8.62, + "confidence": 0.96 + }, + { + "text": "ー", + "start": 8.62, + "end": 8.72, + "confidence": 0.701 + }, + { + "text": "ご", + "start": 8.72, + "end": 8.86, + "confidence": 0.267 + }, + { + "text": "と", + "start": 8.86, + "end": 9.0, + "confidence": 0.993 + }, + { + "text": "に", + "start": 9.0, + "end": 9.14, + "confidence": 0.967 + }, + { + "text": " ", + "start": 9.14, + "end": 9.34, + "confidence": 0.12 + }, + { + "text": "別", + "start": 9.34, + "end": 9.4, + "confidence": 0.527 + }, + { + "text": "再", + "start": 9.4, + "end": 9.58, + "confidence": 0.341 + }, + { + "text": "度", + "start": 9.58, + "end": 9.74, + "confidence": 0.323 + }, + { + "text": "と", + "start": 9.74, + "end": 9.92, + "confidence": 0.891 + }, + { + "text": "して", + "start": 9.92, + "end": 10.4, + "confidence": 0.998 + }, + { + "text": "管", + "start": 10.4, + "end": 10.86, + "confidence": 0.418 + }, + { + "text": "理", + "start": 10.86, + "end": 11.08, + "confidence": 1.0 + }, + { + "text": "上", + "start": 11.08, + "end": 11.36, + "confidence": 0.472 + }, + { + "text": "を", + "start": 11.36, + "end": 11.56, + "confidence": 0.987 + }, + { + "text": "して", + "start": 11.56, + "end": 11.84, + "confidence": 0.931 + }, + { + "text": "いる", + "start": 11.84, + "end": 12.16, + "confidence": 0.939 + }, + { + "text": "サ", + "start": 12.16, + "end": 12.52, + "confidence": 0.069 + }, + { + "text": "イ", + "start": 12.52, + "end": 12.58, + "confidence": 0.985 + }, + { + "text": "ト", + "start": 12.58, + "end": 12.7, + "confidence": 0.976 + }, + { + "text": "バ", + "start": 12.7, + "end": 12.82, + "confidence": 0.297 + }, + { + "text": "リ", + "start": 12.82, + "end": 12.96, + "confidence": 0.362 + }, + { + "text": "マ", + "start": 12.96, + "end": 13.12, + "confidence": 0.19 + }, + { + "text": "ス", + "start": 13.12, + "end": 13.22, + "confidence": 0.859 + }, + { + "text": "の", + "start": 13.22, + "end": 13.58, + "confidence": 0.701 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 13.68, + "end": 20.74, + "text": "サプテレクトリーごとにわけたサイトは それぞれ パブリシャーセンターに登録していくぐるニュース上では", + "tokens": [ + 51044, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 9206, + 7625, + 3368, + 23607, + 8040, + 7588, + 3065, + 47765, + 31563, + 4132, + 15096, + 239, + 28889, + 12376, + 11054, + 17233, + 3384, + 31223, + 4824, + 30736, + 4108, + 46246, + 8822, + 49394, + 35849, + 4895, + 34737, + 26167, + 3384, + 9550, + 5708, + 16719, + 51408 + ], + "temperature": 0.0, + "avg_logprob": -0.4374900633288968, + "compression_ratio": 1.6122448979591837, + "no_speech_prob": 0.19216813147068024, + "confidence": 0.622, + "words": [ + { + "text": "サ", + "start": 13.68, + "end": 14.22, + "confidence": 0.397 + }, + { + "text": "プ", + "start": 14.22, + "end": 14.32, + "confidence": 0.532 + }, + { + "text": "テ", + "start": 14.32, + "end": 14.42, + "confidence": 0.637 + }, + { + "text": "レ", + "start": 14.42, + "end": 14.58, + "confidence": 0.825 + }, + { + "text": "ク", + "start": 14.58, + "end": 14.66, + "confidence": 0.474 + }, + { + "text": "ト", + "start": 14.66, + "end": 14.78, + "confidence": 0.973 + }, + { + "text": "リ", + "start": 14.78, + "end": 14.92, + "confidence": 0.873 + }, + { + "text": "ー", + "start": 14.92, + "end": 15.04, + "confidence": 0.917 + }, + { + "text": "ご", + "start": 15.04, + "end": 15.1, + "confidence": 0.016 + }, + { + "text": "と", + "start": 15.1, + "end": 15.38, + "confidence": 0.991 + }, + { + "text": "に", + "start": 15.38, + "end": 15.86, + "confidence": 0.493 + }, + { + "text": "わ", + "start": 15.86, + "end": 16.02, + "confidence": 0.756 + }, + { + "text": "け", + "start": 16.02, + "end": 16.14, + "confidence": 0.988 + }, + { + "text": "た", + "start": 16.14, + "end": 16.28, + "confidence": 0.951 + }, + { + "text": "サ", + "start": 16.28, + "end": 16.42, + "confidence": 0.802 + }, + { + "text": "イ", + "start": 16.42, + "end": 16.5, + "confidence": 0.981 + }, + { + "text": "ト", + "start": 16.5, + "end": 16.6, + "confidence": 0.904 + }, + { + "text": "は", + "start": 16.6, + "end": 17.08, + "confidence": 0.976 + }, + { + "text": " それ", + "start": 17.08, + "end": 17.44, + "confidence": 0.128 + }, + { + "text": "ぞ", + "start": 17.44, + "end": 17.6, + "confidence": 0.85 + }, + { + "text": "れ", + "start": 17.6, + "end": 17.72, + "confidence": 0.995 + }, + { + "text": " パ", + "start": 17.72, + "end": 17.86, + "confidence": 0.177 + }, + { + "text": "ブ", + "start": 17.86, + "end": 17.94, + "confidence": 0.941 + }, + { + "text": "リ", + "start": 17.94, + "end": 18.04, + "confidence": 0.991 + }, + { + "text": "シ", + "start": 18.04, + "end": 18.18, + "confidence": 0.755 + }, + { + "text": "ャ", + "start": 18.18, + "end": 18.26, + "confidence": 0.825 + }, + { + "text": "ー", + "start": 18.26, + "end": 18.42, + "confidence": 0.765 + }, + { + "text": "セ", + "start": 18.42, + "end": 18.44, + "confidence": 0.588 + }, + { + "text": "ン", + "start": 18.44, + "end": 18.52, + "confidence": 0.832 + }, + { + "text": "ター", + "start": 18.52, + "end": 18.6, + "confidence": 0.998 + }, + { + "text": "に", + "start": 18.6, + "end": 18.76, + "confidence": 0.858 + }, + { + "text": "登録", + "start": 18.76, + "end": 19.12, + "confidence": 0.902 + }, + { + "text": "して", + "start": 19.12, + "end": 19.38, + "confidence": 0.766 + }, + { + "text": "いく", + "start": 19.38, + "end": 19.62, + "confidence": 0.877 + }, + { + "text": "ぐ", + "start": 19.62, + "end": 19.78, + "confidence": 0.221 + }, + { + "text": "る", + "start": 19.78, + "end": 19.9, + "confidence": 0.85 + }, + { + "text": "ニ", + "start": 19.9, + "end": 20.02, + "confidence": 0.68 + }, + { + "text": "ュ", + "start": 20.02, + "end": 20.14, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 20.14, + "end": 20.16, + "confidence": 0.989 + }, + { + "text": "ス", + "start": 20.16, + "end": 20.22, + "confidence": 0.982 + }, + { + "text": "上", + "start": 20.22, + "end": 20.4, + "confidence": 0.236 + }, + { + "text": "では", + "start": 20.4, + "end": 20.74, + "confidence": 0.763 + } + ] + }, + { + "id": 3, + "seek": 2088, + "start": 20.9, + "end": 27.0, + "text": "サイトとして認識され パブリコンアイコンはサイトボトフのものが 正しく標準されます", + "tokens": [ + 50364, + 23607, + 8040, + 7588, + 3193, + 8822, + 22041, + 43143, + 6722, + 4132, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 12817, + 8040, + 18066, + 4824, + 3065, + 23607, + 8040, + 7588, + 37626, + 7588, + 17320, + 2972, + 44726, + 5142, + 220, + 15789, + 26568, + 46299, + 28220, + 6722, + 4132, + 5368, + 50684 + ], + "temperature": 0.0, + "avg_logprob": -0.3597618494278345, + "compression_ratio": 1.6075949367088607, + "no_speech_prob": 0.0655483603477478, + "confidence": 0.378, + "words": [ + { + "text": "サ", + "start": 20.9, + "end": 21.26, + "confidence": 0.002 + }, + { + "text": "イ", + "start": 21.26, + "end": 21.34, + "confidence": 0.939 + }, + { + "text": "ト", + "start": 21.34, + "end": 21.44, + "confidence": 0.379 + }, + { + "text": "と", + "start": 21.44, + "end": 21.58, + "confidence": 0.831 + }, + { + "text": "して", + "start": 21.58, + "end": 21.8, + "confidence": 0.981 + }, + { + "text": "認", + "start": 21.8, + "end": 22.08, + "confidence": 0.179 + }, + { + "text": "識", + "start": 22.08, + "end": 22.2, + "confidence": 0.986 + }, + { + "text": "さ", + "start": 22.2, + "end": 22.34, + "confidence": 0.982 + }, + { + "text": "れ", + "start": 22.34, + "end": 22.64, + "confidence": 0.948 + }, + { + "text": " パ", + "start": 22.64, + "end": 22.86, + "confidence": 0.141 + }, + { + "text": "ブ", + "start": 22.86, + "end": 23.0, + "confidence": 0.051 + }, + { + "text": "リ", + "start": 23.0, + "end": 23.18, + "confidence": 0.007 + }, + { + "text": "コ", + "start": 23.18, + "end": 23.22, + "confidence": 0.817 + }, + { + "text": "ン", + "start": 23.22, + "end": 23.3, + "confidence": 0.887 + }, + { + "text": "ア", + "start": 23.3, + "end": 23.46, + "confidence": 0.107 + }, + { + "text": "イ", + "start": 23.46, + "end": 23.52, + "confidence": 0.957 + }, + { + "text": "コ", + "start": 23.52, + "end": 23.64, + "confidence": 0.308 + }, + { + "text": "ン", + "start": 23.64, + "end": 23.86, + "confidence": 0.893 + }, + { + "text": "は", + "start": 23.86, + "end": 23.88, + "confidence": 0.834 + }, + { + "text": "サ", + "start": 23.88, + "end": 24.04, + "confidence": 0.642 + }, + { + "text": "イ", + "start": 24.04, + "end": 24.16, + "confidence": 0.997 + }, + { + "text": "ト", + "start": 24.16, + "end": 24.24, + "confidence": 0.989 + }, + { + "text": "ボ", + "start": 24.24, + "end": 24.38, + "confidence": 0.588 + }, + { + "text": "ト", + "start": 24.38, + "end": 24.52, + "confidence": 0.861 + }, + { + "text": "フ", + "start": 24.52, + "end": 24.66, + "confidence": 0.037 + }, + { + "text": "の", + "start": 24.66, + "end": 24.72, + "confidence": 0.66 + }, + { + "text": "もの", + "start": 24.72, + "end": 24.9, + "confidence": 0.501 + }, + { + "text": "が", + "start": 24.9, + "end": 25.08, + "confidence": 0.338 + }, + { + "text": " ", + "start": 25.08, + "end": 25.32, + "confidence": 0.283 + }, + { + "text": "正", + "start": 25.32, + "end": 25.4, + "confidence": 0.631 + }, + { + "text": "しく", + "start": 25.4, + "end": 25.64, + "confidence": 0.979 + }, + { + "text": "標", + "start": 25.64, + "end": 25.92, + "confidence": 0.106 + }, + { + "text": "準", + "start": 25.92, + "end": 26.1, + "confidence": 0.809 + }, + { + "text": "さ", + "start": 26.1, + "end": 26.28, + "confidence": 0.979 + }, + { + "text": "れ", + "start": 26.28, + "end": 26.56, + "confidence": 0.983 + }, + { + "text": "ます", + "start": 26.56, + "end": 27.0, + "confidence": 0.893 + } + ] + }, + { + "id": 4, + "seek": 2088, + "start": 27.28, + "end": 33.82, + "text": "しかしGoogle検査結果のニュースタブでは パブリコンサイトメイショー機ともに 正しくない以上時にいいなります", + "tokens": [ + 50684, + 32156, + 2849, + 12104, + 3127, + 38739, + 250, + 17238, + 119, + 35181, + 2972, + 34737, + 26167, + 3384, + 9550, + 12144, + 28889, + 16719, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 23607, + 8040, + 7588, + 21647, + 8040, + 43891, + 3384, + 17543, + 3193, + 4801, + 4108, + 220, + 15789, + 26568, + 9311, + 29497, + 6611, + 4108, + 13806, + 3203, + 19420, + 51024 + ], + "temperature": 0.0, + "avg_logprob": -0.3597618494278345, + "compression_ratio": 1.6075949367088607, + "no_speech_prob": 0.0655483603477478, + "confidence": 0.448, + "words": [ + { + "text": "しか", + "start": 27.28, + "end": 27.58, + "confidence": 0.909 + }, + { + "text": "し", + "start": 27.58, + "end": 27.72, + "confidence": 0.989 + }, + { + "text": "Go", + "start": 27.72, + "end": 27.88, + "confidence": 0.036 + }, + { + "text": "ogle", + "start": 27.88, + "end": 28.04, + "confidence": 0.994 + }, + { + "text": "検", + "start": 28.04, + "end": 28.32, + "confidence": 0.262 + }, + { + "text": "査", + "start": 28.32, + "end": 28.46, + "confidence": 0.681 + }, + { + "text": "結果", + "start": 28.46, + "end": 28.72, + "confidence": 0.979 + }, + { + "text": "の", + "start": 28.72, + "end": 28.92, + "confidence": 0.977 + }, + { + "text": "ニ", + "start": 28.92, + "end": 29.06, + "confidence": 0.781 + }, + { + "text": "ュ", + "start": 29.06, + "end": 29.14, + "confidence": 0.995 + }, + { + "text": "ー", + "start": 29.14, + "end": 29.2, + "confidence": 0.984 + }, + { + "text": "ス", + "start": 29.2, + "end": 29.24, + "confidence": 0.969 + }, + { + "text": "タ", + "start": 29.24, + "end": 29.34, + "confidence": 0.833 + }, + { + "text": "ブ", + "start": 29.34, + "end": 29.44, + "confidence": 0.69 + }, + { + "text": "では", + "start": 29.44, + "end": 29.66, + "confidence": 0.961 + }, + { + "text": " パ", + "start": 29.66, + "end": 30.02, + "confidence": 0.081 + }, + { + "text": "ブ", + "start": 30.02, + "end": 30.14, + "confidence": 0.038 + }, + { + "text": "リ", + "start": 30.14, + "end": 30.34, + "confidence": 0.009 + }, + { + "text": "コ", + "start": 30.34, + "end": 30.36, + "confidence": 0.889 + }, + { + "text": "ン", + "start": 30.36, + "end": 30.42, + "confidence": 0.942 + }, + { + "text": "サ", + "start": 30.42, + "end": 30.54, + "confidence": 0.443 + }, + { + "text": "イ", + "start": 30.54, + "end": 30.62, + "confidence": 0.976 + }, + { + "text": "ト", + "start": 30.62, + "end": 30.72, + "confidence": 0.97 + }, + { + "text": "メ", + "start": 30.72, + "end": 30.86, + "confidence": 0.297 + }, + { + "text": "イ", + "start": 30.86, + "end": 30.92, + "confidence": 0.629 + }, + { + "text": "ショ", + "start": 30.92, + "end": 31.02, + "confidence": 0.426 + }, + { + "text": "ー", + "start": 31.02, + "end": 31.08, + "confidence": 0.624 + }, + { + "text": "機", + "start": 31.08, + "end": 31.24, + "confidence": 0.147 + }, + { + "text": "と", + "start": 31.24, + "end": 31.42, + "confidence": 0.385 + }, + { + "text": "も", + "start": 31.42, + "end": 31.56, + "confidence": 0.863 + }, + { + "text": "に", + "start": 31.56, + "end": 31.66, + "confidence": 0.935 + }, + { + "text": " ", + "start": 31.66, + "end": 31.8, + "confidence": 0.196 + }, + { + "text": "正", + "start": 31.8, + "end": 31.96, + "confidence": 0.443 + }, + { + "text": "しく", + "start": 31.96, + "end": 32.18, + "confidence": 0.998 + }, + { + "text": "ない", + "start": 32.18, + "end": 32.4, + "confidence": 0.982 + }, + { + "text": "以上", + "start": 32.4, + "end": 32.72, + "confidence": 0.129 + }, + { + "text": "時", + "start": 32.72, + "end": 32.96, + "confidence": 0.107 + }, + { + "text": "に", + "start": 32.96, + "end": 33.14, + "confidence": 0.962 + }, + { + "text": "いい", + "start": 33.14, + "end": 33.36, + "confidence": 0.751 + }, + { + "text": "な", + "start": 33.36, + "end": 33.48, + "confidence": 0.844 + }, + { + "text": "ります", + "start": 33.48, + "end": 33.82, + "confidence": 0.929 + } + ] + }, + { + "id": 5, + "seek": 2088, + "start": 34.06, + "end": 40.36, + "text": "こちらいたしても ではありません パブリコンはサブテレクトリーごとに設定した", + "tokens": [ + 51024, + 34395, + 17679, + 8822, + 4801, + 220, + 16719, + 14498, + 30250, + 15096, + 239, + 28889, + 12376, + 18066, + 4824, + 3065, + 23607, + 28889, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 39035, + 12088, + 8533, + 51404 + ], + "temperature": 0.0, + "avg_logprob": -0.3597618494278345, + "compression_ratio": 1.6075949367088607, + "no_speech_prob": 0.0655483603477478, + "confidence": 0.402, + "words": [ + { + "text": "こちら", + "start": 34.06, + "end": 34.54, + "confidence": 0.398 + }, + { + "text": "いた", + "start": 34.54, + "end": 34.82, + "confidence": 0.252 + }, + { + "text": "して", + "start": 34.82, + "end": 35.04, + "confidence": 0.14 + }, + { + "text": "も", + "start": 35.04, + "end": 35.18, + "confidence": 0.408 + }, + { + "text": " ", + "start": 35.18, + "end": 35.38, + "confidence": 0.072 + }, + { + "text": "では", + "start": 35.38, + "end": 35.46, + "confidence": 0.462 + }, + { + "text": "あり", + "start": 35.46, + "end": 35.7, + "confidence": 0.961 + }, + { + "text": "ません", + "start": 35.7, + "end": 36.14, + "confidence": 0.999 + }, + { + "text": " パ", + "start": 36.14, + "end": 36.9, + "confidence": 0.213 + }, + { + "text": "ブ", + "start": 36.9, + "end": 37.02, + "confidence": 0.011 + }, + { + "text": "リ", + "start": 37.02, + "end": 37.16, + "confidence": 0.207 + }, + { + "text": "コ", + "start": 37.16, + "end": 37.18, + "confidence": 0.943 + }, + { + "text": "ン", + "start": 37.18, + "end": 37.84, + "confidence": 0.826 + }, + { + "text": "は", + "start": 37.84, + "end": 37.86, + "confidence": 0.951 + }, + { + "text": "サ", + "start": 37.86, + "end": 38.22, + "confidence": 0.248 + }, + { + "text": "ブ", + "start": 38.22, + "end": 38.32, + "confidence": 0.981 + }, + { + "text": "テ", + "start": 38.32, + "end": 38.42, + "confidence": 0.332 + }, + { + "text": "レ", + "start": 38.42, + "end": 38.56, + "confidence": 0.469 + }, + { + "text": "ク", + "start": 38.56, + "end": 38.66, + "confidence": 0.756 + }, + { + "text": "ト", + "start": 38.66, + "end": 38.78, + "confidence": 0.749 + }, + { + "text": "リ", + "start": 38.78, + "end": 38.92, + "confidence": 0.988 + }, + { + "text": "ー", + "start": 38.92, + "end": 38.96, + "confidence": 0.508 + }, + { + "text": "ご", + "start": 38.96, + "end": 39.08, + "confidence": 0.046 + }, + { + "text": "と", + "start": 39.08, + "end": 39.24, + "confidence": 0.975 + }, + { + "text": "に", + "start": 39.24, + "end": 39.58, + "confidence": 0.975 + }, + { + "text": "設", + "start": 39.58, + "end": 39.84, + "confidence": 0.567 + }, + { + "text": "定", + "start": 39.84, + "end": 40.04, + "confidence": 0.997 + }, + { + "text": "した", + "start": 40.04, + "end": 40.36, + "confidence": 0.909 + } + ] + } + ], + "language": "ja" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/bonjour.wav.words.json b/tests/expected/tiny_auto/bonjour.wav.words.json new file mode 100644 index 0000000000000000000000000000000000000000..909a1200e3fa63c503656008c29c6e0b49d27dd8 --- /dev/null +++ b/tests/expected/tiny_auto/bonjour.wav.words.json @@ -0,0 +1,32 @@ +{ + "text": " Bonjour !", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.14, + "end": 0.96, + "text": " Bonjour !", + "tokens": [ + 50364, + 25431, + 2298, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -0.6979154109954834, + "compression_ratio": 0.5294117647058824, + "no_speech_prob": 0.019094161689281464, + "confidence": 0.829, + "words": [ + { + "text": "Bonjour !", + "start": 0.14, + "end": 0.96, + "confidence": 0.829 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny_auto/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..94ad28d337dc6fddb6fdaf73d921885655a0767e --- /dev/null +++ b/tests/expected/tiny_auto/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,154 @@ +{ + "text": " Боже улыл! Эскому зарегиан! Боже улыл! Эскому зарегиан!", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 1.88, + "text": " Боже улыл!", + "tokens": [ + 50364, + 5697, + 9292, + 1595, + 30975, + 693, + 0, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.9446654068796259, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.04302893579006195, + "confidence": 0.317, + "words": [ + { + "text": "Боже", + "start": 0.44, + "end": 0.86, + "confidence": 0.423 + }, + { + "text": "улыл!", + "start": 0.86, + "end": 1.88, + "confidence": 0.261 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.88, + "end": 3.14, + "text": " Эскому зарегиан!", + "tokens": [ + 50464, + 5381, + 4218, + 4161, + 17821, + 4953, + 435, + 1416, + 0, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.9446654068796259, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.04302893579006195, + "confidence": 0.395, + "words": [ + { + "text": "Эскому", + "start": 1.88, + "end": 2.3, + "confidence": 0.286 + }, + { + "text": "зарегиан!", + "start": 2.3, + "end": 3.14, + "confidence": 0.505 + } + ] + }, + { + "id": 2, + "seek": 3000, + "start": 32.98, + "end": 34.02, + "text": " Боже улыл!", + "tokens": [ + 50364, + 5697, + 9292, + 1595, + 30975, + 693, + 0, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.3079313729938708, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.2232234627008438, + "confidence": 0.577, + "words": [ + { + "text": "Боже", + "start": 32.98, + "end": 33.36, + "confidence": 0.446 + }, + { + "text": "улыл!", + "start": 33.36, + "end": 34.02, + "confidence": 0.686 + } + ] + }, + { + "id": 3, + "seek": 3000, + "start": 34.42, + "end": 35.72, + "text": " Эскому зарегиан!", + "tokens": [ + 50564, + 5381, + 4218, + 4161, + 17821, + 4953, + 435, + 1416, + 0, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.3079313729938708, + "compression_ratio": 0.8166666666666667, + "no_speech_prob": 0.2232234627008438, + "confidence": 0.849, + "words": [ + { + "text": "Эскому", + "start": 34.42, + "end": 34.84, + "confidence": 0.691 + }, + { + "text": "зарегиан!", + "start": 34.84, + "end": 35.72, + "confidence": 0.992 + } + ] + } + ], + "language": "ru" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/empty.mp3.words.json b/tests/expected/tiny_auto/empty.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b0b537f1b7823415bf5f37956f5821918c868d52 --- /dev/null +++ b/tests/expected/tiny_auto/empty.mp3.words.json @@ -0,0 +1,5 @@ +{ + "text": "", + "segments": [], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/gaenswein15.mp3.words.json b/tests/expected/tiny_auto/gaenswein15.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..fdb40535ff632c8344a45af8471ae026c5287d99 --- /dev/null +++ b/tests/expected/tiny_auto/gaenswein15.mp3.words.json @@ -0,0 +1,337 @@ +{ + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen Rätus ist dann nicht so weitergegangen wie sich Papstbälle dick das gewünscht hatte. Das hat er als Meshale im Rätus", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.92, + "end": 8.04, + "text": " Wie wieder zu dazu ist Meshfuchs von 1962 als Meshale für die außerordentliche Form des grullischen", + "tokens": [ + 50364, + 9233, + 6216, + 2164, + 13034, + 1418, + 376, + 14935, + 69, + 37503, + 2957, + 39498, + 3907, + 376, + 14935, + 1220, + 2959, + 978, + 39428, + 765, + 7698, + 68, + 10126, + 730, + 677, + 858, + 6282, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.6859013052547679, + "compression_ratio": 1.3293413173652695, + "no_speech_prob": 0.04553895816206932, + "confidence": 0.503, + "words": [ + { + "text": "Wie", + "start": 0.92, + "end": 1.1, + "confidence": 0.459 + }, + { + "text": "wieder", + "start": 1.1, + "end": 1.4, + "confidence": 0.891 + }, + { + "text": "zu", + "start": 1.4, + "end": 1.6, + "confidence": 0.283 + }, + { + "text": "dazu", + "start": 1.6, + "end": 1.92, + "confidence": 0.173 + }, + { + "text": "ist", + "start": 1.92, + "end": 2.14, + "confidence": 0.649 + }, + { + "text": "Meshfuchs", + "start": 2.14, + "end": 2.74, + "confidence": 0.332 + }, + { + "text": "von", + "start": 2.74, + "end": 3.22, + "confidence": 0.626 + }, + { + "text": "1962", + "start": 3.22, + "end": 4.84, + "confidence": 0.182 + }, + { + "text": "als", + "start": 4.84, + "end": 5.24, + "confidence": 0.883 + }, + { + "text": "Meshale", + "start": 5.24, + "end": 5.78, + "confidence": 0.389 + }, + { + "text": "für", + "start": 5.78, + "end": 5.92, + "confidence": 0.935 + }, + { + "text": "die", + "start": 5.92, + "end": 6.08, + "confidence": 0.973 + }, + { + "text": "außerordentliche", + "start": 6.08, + "end": 7.04, + "confidence": 0.898 + }, + { + "text": "Form", + "start": 7.04, + "end": 7.4, + "confidence": 0.64 + }, + { + "text": "des", + "start": 7.4, + "end": 7.62, + "confidence": 0.974 + }, + { + "text": "grullischen", + "start": 7.62, + "end": 8.04, + "confidence": 0.371 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 9.42, + "end": 12.78, + "text": " Rätus ist dann nicht so weitergegangen wie sich Papstbälle dick das gewünscht hatte.", + "tokens": [ + 50764, + 497, + 3628, + 301, + 1418, + 3594, + 1979, + 370, + 8988, + 432, + 47152, + 3355, + 3041, + 15919, + 372, + 65, + 31447, + 18659, + 1482, + 6906, + 3412, + 82, + 4701, + 13299, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.6859013052547679, + "compression_ratio": 1.3293413173652695, + "no_speech_prob": 0.04553895816206932, + "confidence": 0.63, + "words": [ + { + "text": "Rätus", + "start": 9.42, + "end": 9.44, + "confidence": 0.381 + }, + { + "text": "ist", + "start": 9.44, + "end": 9.62, + "confidence": 0.897 + }, + { + "text": "dann", + "start": 9.62, + "end": 9.78, + "confidence": 0.586 + }, + { + "text": "nicht", + "start": 9.78, + "end": 9.96, + "confidence": 0.979 + }, + { + "text": "so", + "start": 9.96, + "end": 10.12, + "confidence": 0.991 + }, + { + "text": "weitergegangen", + "start": 10.12, + "end": 10.88, + "confidence": 0.703 + }, + { + "text": "wie", + "start": 10.88, + "end": 11.04, + "confidence": 0.77 + }, + { + "text": "sich", + "start": 11.04, + "end": 11.24, + "confidence": 0.968 + }, + { + "text": "Papstbälle", + "start": 11.24, + "end": 11.72, + "confidence": 0.453 + }, + { + "text": "dick", + "start": 11.72, + "end": 11.9, + "confidence": 0.249 + }, + { + "text": "das", + "start": 11.9, + "end": 12.08, + "confidence": 0.833 + }, + { + "text": "gewünscht", + "start": 12.08, + "end": 12.58, + "confidence": 0.801 + }, + { + "text": "hatte.", + "start": 12.58, + "end": 12.78, + "confidence": 0.906 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 14.0, + "end": 15.28, + "text": " Das hat er als Meshale im Rätus", + "tokens": [ + 51014, + 2846, + 2385, + 1189, + 3907, + 376, + 14935, + 1220, + 566, + 497, + 3628, + 301, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.6859013052547679, + "compression_ratio": 1.3293413173652695, + "no_speech_prob": 0.04553895816206932, + "confidence": 0.409, + "words": [ + { + "text": "Das", + "start": 14.0, + "end": 14.2, + "confidence": 0.938 + }, + { + "text": "hat", + "start": 14.2, + "end": 14.4, + "confidence": 0.955 + }, + { + "text": "er", + "start": 14.4, + "end": 14.56, + "confidence": 0.736 + }, + { + "text": "als", + "start": 14.56, + "end": 14.7, + "confidence": 0.83 + }, + { + "text": "Meshale", + "start": 14.7, + "end": 15.1, + "confidence": 0.188 + }, + { + "text": "im", + "start": 15.1, + "end": 15.12, + "confidence": 0.05 + }, + { + "text": "Rätus", + "start": 15.12, + "end": 15.28, + "confidence": 0.665 + } + ] + } + ], + "language": "de" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/gloria.mp3.words.json b/tests/expected/tiny_auto/gloria.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..0deccb915476de8c29df813f0282806fbc7a4a32 --- /dev/null +++ b/tests/expected/tiny_auto/gloria.mp3.words.json @@ -0,0 +1,576 @@ +{ + "text": " Hello. You're my girlfriend. How are you? I'm okay. Why will be? I said she could stay with us, but she feels better. Oh, she can. There's more to be for long. What if you can stay as long as you want? My girlfriend. Really, mixture. I agree.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 1.34, + "end": 1.78, + "text": " Hello.", + "tokens": [ + 50364, + 2425, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.407, + "words": [ + { + "text": "Hello.", + "start": 1.34, + "end": 1.78, + "confidence": 0.407 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.96, + "end": 4.26, + "text": " You're my girlfriend.", + "tokens": [ + 50464, + 509, + 434, + 452, + 10369, + 13, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.175, + "words": [ + { + "text": "You're", + "start": 1.96, + "end": 3.12, + "confidence": 0.192 + }, + { + "text": "my", + "start": 3.12, + "end": 3.2, + "confidence": 0.206 + }, + { + "text": "girlfriend.", + "start": 3.2, + "end": 4.26, + "confidence": 0.123 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 5.52, + "end": 6.54, + "text": " How are you?", + "tokens": [ + 50614, + 1012, + 366, + 291, + 30, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.627, + "words": [ + { + "text": "How", + "start": 5.52, + "end": 5.8, + "confidence": 0.49 + }, + { + "text": "are", + "start": 5.8, + "end": 6.16, + "confidence": 0.524 + }, + { + "text": "you?", + "start": 6.16, + "end": 6.54, + "confidence": 0.959 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 6.76, + "end": 7.8, + "text": " I'm okay.", + "tokens": [ + 50714, + 286, + 478, + 1392, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.836, + "words": [ + { + "text": "I'm", + "start": 6.76, + "end": 7.1, + "confidence": 0.838 + }, + { + "text": "okay.", + "start": 7.1, + "end": 7.8, + "confidence": 0.83 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 8.38, + "end": 9.14, + "text": " Why will be?", + "tokens": [ + 50764, + 1545, + 486, + 312, + 30, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.382, + "words": [ + { + "text": "Why", + "start": 8.38, + "end": 8.66, + "confidence": 0.343 + }, + { + "text": "will", + "start": 8.66, + "end": 8.96, + "confidence": 0.523 + }, + { + "text": "be?", + "start": 8.96, + "end": 9.14, + "confidence": 0.311 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 9.4, + "end": 11.52, + "text": " I said she could stay with us, but she feels better.", + "tokens": [ + 50814, + 286, + 848, + 750, + 727, + 1754, + 365, + 505, + 11, + 457, + 750, + 3417, + 1101, + 13, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.691, + "words": [ + { + "text": "I", + "start": 9.4, + "end": 9.52, + "confidence": 0.285 + }, + { + "text": "said", + "start": 9.52, + "end": 9.66, + "confidence": 0.621 + }, + { + "text": "she", + "start": 9.66, + "end": 9.82, + "confidence": 0.849 + }, + { + "text": "could", + "start": 9.82, + "end": 9.98, + "confidence": 0.912 + }, + { + "text": "stay", + "start": 9.98, + "end": 10.14, + "confidence": 0.992 + }, + { + "text": "with", + "start": 10.14, + "end": 10.26, + "confidence": 0.986 + }, + { + "text": "us,", + "start": 10.26, + "end": 10.44, + "confidence": 0.782 + }, + { + "text": "but", + "start": 10.64, + "end": 10.72, + "confidence": 0.225 + }, + { + "text": "she", + "start": 10.72, + "end": 10.94, + "confidence": 0.971 + }, + { + "text": "feels", + "start": 10.94, + "end": 11.24, + "confidence": 0.762 + }, + { + "text": "better.", + "start": 11.24, + "end": 11.52, + "confidence": 0.987 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 12.18, + "end": 13.44, + "text": " Oh, she can.", + "tokens": [ + 50964, + 876, + 11, + 750, + 393, + 13, + 51064 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.287, + "words": [ + { + "text": "Oh,", + "start": 12.18, + "end": 12.6, + "confidence": 0.181 + }, + { + "text": "she", + "start": 12.8, + "end": 12.96, + "confidence": 0.686 + }, + { + "text": "can.", + "start": 12.96, + "end": 13.44, + "confidence": 0.191 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 13.5, + "end": 15.2, + "text": " There's more to be for long.", + "tokens": [ + 51064, + 821, + 311, + 544, + 281, + 312, + 337, + 938, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.682, + "words": [ + { + "text": "There's", + "start": 13.5, + "end": 14.44, + "confidence": 0.518 + }, + { + "text": "more", + "start": 14.44, + "end": 14.56, + "confidence": 0.814 + }, + { + "text": "to", + "start": 14.56, + "end": 14.74, + "confidence": 0.414 + }, + { + "text": "be", + "start": 14.74, + "end": 14.8, + "confidence": 0.949 + }, + { + "text": "for", + "start": 14.8, + "end": 15.0, + "confidence": 0.987 + }, + { + "text": "long.", + "start": 15.0, + "end": 15.2, + "confidence": 0.814 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 15.38, + "end": 16.86, + "text": " What if you can stay as long as you want?", + "tokens": [ + 51114, + 708, + 498, + 291, + 393, + 1754, + 382, + 938, + 382, + 291, + 528, + 30, + 51214 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.771, + "words": [ + { + "text": "What", + "start": 15.38, + "end": 15.54, + "confidence": 0.387 + }, + { + "text": "if", + "start": 15.54, + "end": 15.62, + "confidence": 0.918 + }, + { + "text": "you", + "start": 15.62, + "end": 15.7, + "confidence": 0.97 + }, + { + "text": "can", + "start": 15.7, + "end": 15.98, + "confidence": 0.946 + }, + { + "text": "stay", + "start": 15.98, + "end": 16.12, + "confidence": 0.958 + }, + { + "text": "as", + "start": 16.12, + "end": 16.24, + "confidence": 0.308 + }, + { + "text": "long", + "start": 16.24, + "end": 16.36, + "confidence": 0.993 + }, + { + "text": "as", + "start": 16.36, + "end": 16.54, + "confidence": 0.992 + }, + { + "text": "you", + "start": 16.54, + "end": 16.62, + "confidence": 0.959 + }, + { + "text": "want?", + "start": 16.62, + "end": 16.86, + "confidence": 0.82 + } + ] + }, + { + "id": 9, + "seek": 0, + "start": 16.86, + "end": 17.74, + "text": " My girlfriend.", + "tokens": [ + 51214, + 1222, + 10369, + 13, + 51264 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.141, + "words": [ + { + "text": "My", + "start": 16.86, + "end": 17.34, + "confidence": 0.125 + }, + { + "text": "girlfriend.", + "start": 17.34, + "end": 17.74, + "confidence": 0.159 + } + ] + }, + { + "id": 10, + "seek": 0, + "start": 17.98, + "end": 18.9, + "text": " Really, mixture.", + "tokens": [ + 51264, + 4083, + 11, + 9925, + 13, + 51314 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.439, + "words": [ + { + "text": "Really,", + "start": 17.98, + "end": 18.34, + "confidence": 0.704 + }, + { + "text": "mixture.", + "start": 18.7, + "end": 18.9, + "confidence": 0.273 + } + ] + }, + { + "id": 11, + "seek": 0, + "start": 19.72, + "end": 20.22, + "text": " I agree.", + "tokens": [ + 51314, + 286, + 3986, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.5520876790141012, + "compression_ratio": 1.4319526627218935, + "no_speech_prob": 0.11146047711372375, + "confidence": 0.16, + "words": [ + { + "text": "I", + "start": 19.72, + "end": 20.06, + "confidence": 0.098 + }, + { + "text": "agree.", + "start": 20.06, + "end": 20.22, + "confidence": 0.259 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/japanese.mp3.words.json b/tests/expected/tiny_auto/japanese.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..1855ddbe74669dde3fd8b940a16b96afc31e27fd --- /dev/null +++ b/tests/expected/tiny_auto/japanese.mp3.words.json @@ -0,0 +1,1518 @@ +{ + "text": "いきます 入室タブでの最図免証記が実際と違う県に関するご質問いただいております同じ度面でデレクトリーごとに 別再度として管理上をしている際と針ます サプテレクトリーごとにわけたサイトはそれぞれ パブリシャーセンターに登録していくグルニュース上では 別再度として認識され パンコン愛行は最図ごと物物は正しく表示されますしかし グルニュースタブでは バブコン最図免証記ともに正しくない以上 時にいいなります こちらいたしても ではありませんパブコンはサプテレクトリーごとに 設定した", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 6.62, + "text": "いきます 入室タブでの最図免証記が実際と違う県に関するご質問いただいております", + "tokens": [ + 50364, + 47348, + 220, + 14028, + 2415, + 97, + 12144, + 28889, + 2474, + 2972, + 8661, + 3919, + 111, + 2347, + 235, + 5396, + 120, + 16958, + 5142, + 33197, + 34837, + 3193, + 49806, + 2862, + 234, + 4108, + 5196, + 95, + 22570, + 9991, + 43450, + 11361, + 32418, + 18549, + 6117, + 19420, + 50712 + ], + "temperature": 0.0, + "avg_logprob": -0.568177216972401, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.189762145280838, + "confidence": 0.551, + "words": [ + { + "text": "いきます", + "start": 0.08, + "end": 0.6, + "confidence": 0.153 + }, + { + "text": " ", + "start": 0.6, + "end": 1.18, + "confidence": 0.305 + }, + { + "text": "入", + "start": 1.18, + "end": 1.38, + "confidence": 0.435 + }, + { + "text": "室", + "start": 1.38, + "end": 1.52, + "confidence": 0.501 + }, + { + "text": "タ", + "start": 1.52, + "end": 1.68, + "confidence": 0.201 + }, + { + "text": "ブ", + "start": 1.68, + "end": 1.78, + "confidence": 0.837 + }, + { + "text": "で", + "start": 1.78, + "end": 1.98, + "confidence": 0.947 + }, + { + "text": "の", + "start": 1.98, + "end": 2.22, + "confidence": 0.907 + }, + { + "text": "最", + "start": 2.22, + "end": 2.64, + "confidence": 0.277 + }, + { + "text": "図", + "start": 2.64, + "end": 2.78, + "confidence": 0.349 + }, + { + "text": "免", + "start": 2.78, + "end": 2.98, + "confidence": 0.647 + }, + { + "text": "証", + "start": 2.98, + "end": 3.18, + "confidence": 0.282 + }, + { + "text": "記", + "start": 3.18, + "end": 3.32, + "confidence": 0.299 + }, + { + "text": "が", + "start": 3.32, + "end": 3.5, + "confidence": 0.976 + }, + { + "text": "実", + "start": 3.5, + "end": 3.74, + "confidence": 0.343 + }, + { + "text": "際", + "start": 3.74, + "end": 3.9, + "confidence": 0.975 + }, + { + "text": "と", + "start": 3.9, + "end": 4.08, + "confidence": 0.91 + }, + { + "text": "違う", + "start": 4.08, + "end": 4.38, + "confidence": 0.803 + }, + { + "text": "県", + "start": 4.38, + "end": 4.6, + "confidence": 0.405 + }, + { + "text": "に", + "start": 4.6, + "end": 4.74, + "confidence": 0.988 + }, + { + "text": "関", + "start": 4.74, + "end": 4.94, + "confidence": 0.543 + }, + { + "text": "する", + "start": 4.94, + "end": 5.12, + "confidence": 0.947 + }, + { + "text": "ご", + "start": 5.12, + "end": 5.3, + "confidence": 0.499 + }, + { + "text": "質", + "start": 5.3, + "end": 5.46, + "confidence": 0.943 + }, + { + "text": "問", + "start": 5.46, + "end": 5.6, + "confidence": 0.99 + }, + { + "text": "いただ", + "start": 5.6, + "end": 5.92, + "confidence": 0.922 + }, + { + "text": "いて", + "start": 5.92, + "end": 6.16, + "confidence": 0.982 + }, + { + "text": "お", + "start": 6.16, + "end": 6.26, + "confidence": 0.79 + }, + { + "text": "ります", + "start": 6.26, + "end": 6.62, + "confidence": 0.975 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.92, + "end": 12.9, + "text": "同じ度面でデレクトリーごとに 別再度として管理上をしている際と針", + "tokens": [ + 50712, + 13089, + 9257, + 13127, + 8833, + 2474, + 31327, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 23131, + 13876, + 5708, + 5998, + 8822, + 22979, + 34837, + 3193, + 5873, + 251, + 51010 + ], + "temperature": 0.0, + "avg_logprob": -0.568177216972401, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.189762145280838, + "confidence": 0.524, + "words": [ + { + "text": "同", + "start": 6.92, + "end": 7.3, + "confidence": 0.83 + }, + { + "text": "じ", + "start": 7.3, + "end": 7.42, + "confidence": 0.227 + }, + { + "text": "度", + "start": 7.42, + "end": 7.6, + "confidence": 0.138 + }, + { + "text": "面", + "start": 7.6, + "end": 7.76, + "confidence": 0.296 + }, + { + "text": "で", + "start": 7.76, + "end": 8.0, + "confidence": 0.567 + }, + { + "text": "デ", + "start": 8.0, + "end": 8.16, + "confidence": 0.51 + }, + { + "text": "レ", + "start": 8.16, + "end": 8.28, + "confidence": 0.481 + }, + { + "text": "ク", + "start": 8.28, + "end": 8.36, + "confidence": 0.98 + }, + { + "text": "ト", + "start": 8.36, + "end": 8.5, + "confidence": 0.984 + }, + { + "text": "リ", + "start": 8.5, + "end": 8.62, + "confidence": 0.967 + }, + { + "text": "ー", + "start": 8.62, + "end": 8.7, + "confidence": 0.644 + }, + { + "text": "ご", + "start": 8.7, + "end": 8.84, + "confidence": 0.227 + }, + { + "text": "と", + "start": 8.84, + "end": 8.98, + "confidence": 0.988 + }, + { + "text": "に", + "start": 8.98, + "end": 9.14, + "confidence": 0.988 + }, + { + "text": " ", + "start": 9.14, + "end": 9.38, + "confidence": 0.261 + }, + { + "text": "別", + "start": 9.38, + "end": 9.42, + "confidence": 0.405 + }, + { + "text": "再", + "start": 9.42, + "end": 9.58, + "confidence": 0.427 + }, + { + "text": "度", + "start": 9.58, + "end": 9.74, + "confidence": 0.327 + }, + { + "text": "と", + "start": 9.74, + "end": 9.9, + "confidence": 0.886 + }, + { + "text": "して", + "start": 9.9, + "end": 10.3, + "confidence": 0.998 + }, + { + "text": "管", + "start": 10.3, + "end": 10.88, + "confidence": 0.821 + }, + { + "text": "理", + "start": 10.88, + "end": 11.06, + "confidence": 1.0 + }, + { + "text": "上", + "start": 11.06, + "end": 11.38, + "confidence": 0.466 + }, + { + "text": "を", + "start": 11.38, + "end": 11.54, + "confidence": 0.966 + }, + { + "text": "して", + "start": 11.54, + "end": 11.84, + "confidence": 0.916 + }, + { + "text": "いる", + "start": 11.84, + "end": 12.16, + "confidence": 0.979 + }, + { + "text": "際", + "start": 12.16, + "end": 12.54, + "confidence": 0.122 + }, + { + "text": "と", + "start": 12.54, + "end": 12.7, + "confidence": 0.712 + }, + { + "text": "針", + "start": 12.7, + "end": 12.9, + "confidence": 0.264 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 12.9, + "end": 19.55, + "text": "ます サプテレクトリーごとにわけたサイトはそれぞれ パブリシャーセンターに登録していく", + "tokens": [ + 51010, + 5368, + 220, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 9206, + 7625, + 3368, + 23607, + 8040, + 7588, + 3065, + 13873, + 31563, + 4132, + 15096, + 239, + 28889, + 12376, + 11054, + 17233, + 3384, + 31223, + 4824, + 30736, + 4108, + 46246, + 8822, + 49394, + 51340 + ], + "temperature": 0.0, + "avg_logprob": -0.568177216972401, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.189762145280838, + "confidence": 0.715, + "words": [ + { + "text": "ます", + "start": 12.9, + "end": 13.22, + "confidence": 0.344 + }, + { + "text": " ", + "start": 13.22, + "end": 14.1, + "confidence": 0.34 + }, + { + "text": "サ", + "start": 14.1, + "end": 14.22, + "confidence": 0.555 + }, + { + "text": "プ", + "start": 14.22, + "end": 14.32, + "confidence": 0.786 + }, + { + "text": "テ", + "start": 14.32, + "end": 14.42, + "confidence": 0.622 + }, + { + "text": "レ", + "start": 14.42, + "end": 14.58, + "confidence": 0.865 + }, + { + "text": "ク", + "start": 14.58, + "end": 14.68, + "confidence": 0.996 + }, + { + "text": "ト", + "start": 14.68, + "end": 14.78, + "confidence": 0.999 + }, + { + "text": "リ", + "start": 14.78, + "end": 14.92, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 14.92, + "end": 15.04, + "confidence": 0.996 + }, + { + "text": "ご", + "start": 15.04, + "end": 15.1, + "confidence": 0.965 + }, + { + "text": "と", + "start": 15.1, + "end": 15.38, + "confidence": 0.998 + }, + { + "text": "に", + "start": 15.38, + "end": 15.84, + "confidence": 0.625 + }, + { + "text": "わ", + "start": 15.84, + "end": 16.02, + "confidence": 0.739 + }, + { + "text": "け", + "start": 16.02, + "end": 16.14, + "confidence": 0.992 + }, + { + "text": "た", + "start": 16.14, + "end": 16.28, + "confidence": 0.885 + }, + { + "text": "サ", + "start": 16.28, + "end": 16.42, + "confidence": 0.304 + }, + { + "text": "イ", + "start": 16.42, + "end": 16.5, + "confidence": 0.976 + }, + { + "text": "ト", + "start": 16.5, + "end": 16.6, + "confidence": 0.932 + }, + { + "text": "は", + "start": 16.6, + "end": 17.08, + "confidence": 0.894 + }, + { + "text": "それ", + "start": 17.08, + "end": 17.46, + "confidence": 0.474 + }, + { + "text": "ぞ", + "start": 17.46, + "end": 17.64, + "confidence": 0.919 + }, + { + "text": "れ", + "start": 17.64, + "end": 17.72, + "confidence": 0.998 + }, + { + "text": " パ", + "start": 17.72, + "end": 17.86, + "confidence": 0.506 + }, + { + "text": "ブ", + "start": 17.86, + "end": 17.94, + "confidence": 0.961 + }, + { + "text": "リ", + "start": 17.94, + "end": 18.06, + "confidence": 0.988 + }, + { + "text": "シ", + "start": 18.06, + "end": 18.18, + "confidence": 0.542 + }, + { + "text": "ャ", + "start": 18.18, + "end": 18.26, + "confidence": 0.911 + }, + { + "text": "ー", + "start": 18.26, + "end": 18.42, + "confidence": 0.546 + }, + { + "text": "セ", + "start": 18.42, + "end": 18.44, + "confidence": 0.367 + }, + { + "text": "ン", + "start": 18.44, + "end": 18.48, + "confidence": 0.915 + }, + { + "text": "ター", + "start": 18.48, + "end": 18.6, + "confidence": 0.996 + }, + { + "text": "に", + "start": 18.6, + "end": 18.76, + "confidence": 0.707 + }, + { + "text": "登録", + "start": 18.76, + "end": 19.12, + "confidence": 0.668 + }, + { + "text": "して", + "start": 19.12, + "end": 19.38, + "confidence": 0.438 + }, + { + "text": "いく", + "start": 19.38, + "end": 19.55, + "confidence": 0.806 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 19.55, + "end": 27.08, + "text": "グルニュース上では 別再度として認識され パンコン愛行は最図ごと物物は正しく表示されます", + "tokens": [ + 51340, + 23839, + 9405, + 34737, + 26167, + 3384, + 9550, + 5708, + 16719, + 220, + 16158, + 8623, + 13127, + 3193, + 8822, + 22041, + 43143, + 6722, + 4132, + 220, + 23268, + 1047, + 111, + 18066, + 4824, + 15157, + 8082, + 3065, + 8661, + 3919, + 111, + 9991, + 3193, + 23516, + 23516, + 3065, + 15789, + 26568, + 40053, + 6722, + 4132, + 5368, + 51727 + ], + "temperature": 0.0, + "avg_logprob": -0.568177216972401, + "compression_ratio": 1.582191780821918, + "no_speech_prob": 0.189762145280838, + "confidence": 0.554, + "words": [ + { + "text": "グ", + "start": 19.55, + "end": 19.78, + "confidence": 0.701 + }, + { + "text": "ル", + "start": 19.78, + "end": 19.88, + "confidence": 0.976 + }, + { + "text": "ニ", + "start": 19.88, + "end": 20.02, + "confidence": 0.899 + }, + { + "text": "ュ", + "start": 20.02, + "end": 20.14, + "confidence": 0.996 + }, + { + "text": "ー", + "start": 20.14, + "end": 20.18, + "confidence": 0.994 + }, + { + "text": "ス", + "start": 20.18, + "end": 20.22, + "confidence": 0.958 + }, + { + "text": "上", + "start": 20.22, + "end": 20.42, + "confidence": 0.313 + }, + { + "text": "では", + "start": 20.42, + "end": 20.82, + "confidence": 0.783 + }, + { + "text": " ", + "start": 20.82, + "end": 21.1, + "confidence": 0.595 + }, + { + "text": "別", + "start": 21.1, + "end": 21.16, + "confidence": 0.294 + }, + { + "text": "再", + "start": 21.16, + "end": 21.34, + "confidence": 0.84 + }, + { + "text": "度", + "start": 21.34, + "end": 21.46, + "confidence": 1.0 + }, + { + "text": "と", + "start": 21.46, + "end": 21.64, + "confidence": 0.992 + }, + { + "text": "して", + "start": 21.64, + "end": 21.82, + "confidence": 0.994 + }, + { + "text": "認", + "start": 21.82, + "end": 22.04, + "confidence": 0.497 + }, + { + "text": "識", + "start": 22.04, + "end": 22.2, + "confidence": 0.99 + }, + { + "text": "さ", + "start": 22.2, + "end": 22.34, + "confidence": 0.994 + }, + { + "text": "れ", + "start": 22.34, + "end": 22.7, + "confidence": 0.958 + }, + { + "text": " ", + "start": 22.7, + "end": 22.76, + "confidence": 0.204 + }, + { + "text": "パ", + "start": 22.76, + "end": 22.9, + "confidence": 0.039 + }, + { + "text": "ン", + "start": 22.9, + "end": 23.0, + "confidence": 0.332 + }, + { + "text": "コ", + "start": 23.0, + "end": 23.24, + "confidence": 0.918 + }, + { + "text": "ン", + "start": 23.24, + "end": 23.4, + "confidence": 0.807 + }, + { + "text": "愛", + "start": 23.4, + "end": 23.48, + "confidence": 0.239 + }, + { + "text": "行", + "start": 23.48, + "end": 23.64, + "confidence": 0.134 + }, + { + "text": "は", + "start": 23.64, + "end": 23.86, + "confidence": 0.959 + }, + { + "text": "最", + "start": 23.86, + "end": 24.1, + "confidence": 0.729 + }, + { + "text": "図", + "start": 24.1, + "end": 24.3, + "confidence": 0.948 + }, + { + "text": "ご", + "start": 24.3, + "end": 24.42, + "confidence": 0.154 + }, + { + "text": "と", + "start": 24.42, + "end": 24.52, + "confidence": 0.969 + }, + { + "text": "物", + "start": 24.52, + "end": 24.76, + "confidence": 0.238 + }, + { + "text": "物", + "start": 24.76, + "end": 24.92, + "confidence": 0.197 + }, + { + "text": "は", + "start": 24.92, + "end": 25.08, + "confidence": 0.566 + }, + { + "text": "正", + "start": 25.08, + "end": 25.36, + "confidence": 0.454 + }, + { + "text": "しく", + "start": 25.36, + "end": 25.64, + "confidence": 0.955 + }, + { + "text": "表示", + "start": 25.64, + "end": 26.0, + "confidence": 0.414 + }, + { + "text": "さ", + "start": 26.0, + "end": 26.28, + "confidence": 0.712 + }, + { + "text": "れ", + "start": 26.28, + "end": 26.62, + "confidence": 0.976 + }, + { + "text": "ます", + "start": 26.62, + "end": 27.08, + "confidence": 0.474 + } + ] + }, + { + "id": 4, + "seek": 2726, + "start": 27.36, + "end": 32.39, + "text": "しかし グルニュースタブでは バブコン最図免証記ともに正しくない", + "tokens": [ + 50364, + 32156, + 2849, + 220, + 23839, + 9405, + 34737, + 26167, + 3384, + 9550, + 12144, + 28889, + 16719, + 15096, + 238, + 28889, + 18066, + 4824, + 8661, + 3919, + 111, + 2347, + 235, + 5396, + 120, + 16958, + 3193, + 4801, + 4108, + 15789, + 26568, + 9311, + 50620 + ], + "temperature": 0.0, + "avg_logprob": -0.4461708068847656, + "compression_ratio": 1.375, + "no_speech_prob": 0.3626025915145874, + "confidence": 0.806, + "words": [ + { + "text": "しか", + "start": 27.36, + "end": 27.58, + "confidence": 0.959 + }, + { + "text": "し", + "start": 27.58, + "end": 27.74, + "confidence": 0.992 + }, + { + "text": " ", + "start": 27.74, + "end": 27.84, + "confidence": 0.249 + }, + { + "text": "グ", + "start": 27.84, + "end": 27.92, + "confidence": 0.445 + }, + { + "text": "ル", + "start": 27.92, + "end": 28.04, + "confidence": 0.995 + }, + { + "text": "ニ", + "start": 28.04, + "end": 28.24, + "confidence": 0.884 + }, + { + "text": "ュ", + "start": 28.24, + "end": 28.7, + "confidence": 0.995 + }, + { + "text": "ー", + "start": 28.7, + "end": 29.18, + "confidence": 0.999 + }, + { + "text": "ス", + "start": 29.18, + "end": 29.2, + "confidence": 0.97 + }, + { + "text": "タ", + "start": 29.2, + "end": 29.34, + "confidence": 0.992 + }, + { + "text": "ブ", + "start": 29.34, + "end": 29.44, + "confidence": 0.996 + }, + { + "text": "では", + "start": 29.44, + "end": 29.72, + "confidence": 0.979 + }, + { + "text": " バ", + "start": 29.72, + "end": 30.06, + "confidence": 0.503 + }, + { + "text": "ブ", + "start": 30.06, + "end": 30.16, + "confidence": 0.631 + }, + { + "text": "コ", + "start": 30.16, + "end": 30.34, + "confidence": 0.631 + }, + { + "text": "ン", + "start": 30.34, + "end": 30.42, + "confidence": 0.981 + }, + { + "text": "最", + "start": 30.42, + "end": 30.6, + "confidence": 0.659 + }, + { + "text": "図", + "start": 30.6, + "end": 30.88, + "confidence": 0.957 + }, + { + "text": "免", + "start": 30.88, + "end": 30.94, + "confidence": 0.977 + }, + { + "text": "証", + "start": 30.94, + "end": 31.12, + "confidence": 0.997 + }, + { + "text": "記", + "start": 31.12, + "end": 31.26, + "confidence": 0.994 + }, + { + "text": "と", + "start": 31.26, + "end": 31.42, + "confidence": 0.722 + }, + { + "text": "も", + "start": 31.42, + "end": 31.54, + "confidence": 0.839 + }, + { + "text": "に", + "start": 31.54, + "end": 31.66, + "confidence": 0.945 + }, + { + "text": "正", + "start": 31.66, + "end": 31.96, + "confidence": 0.481 + }, + { + "text": "しく", + "start": 31.96, + "end": 32.18, + "confidence": 0.999 + }, + { + "text": "ない", + "start": 32.18, + "end": 32.39, + "confidence": 0.938 + } + ] + }, + { + "id": 5, + "seek": 2726, + "start": 32.39, + "end": 36.34, + "text": "以上 時にいいなります こちらいたしても ではありません", + "tokens": [ + 50620, + 29497, + 220, + 6611, + 4108, + 13806, + 3203, + 19420, + 14384, + 28567, + 17679, + 8822, + 4801, + 220, + 16719, + 14498, + 30250, + 50838 + ], + "temperature": 0.0, + "avg_logprob": -0.4461708068847656, + "compression_ratio": 1.375, + "no_speech_prob": 0.3626025915145874, + "confidence": 0.456, + "words": [ + { + "text": "以上", + "start": 32.39, + "end": 32.74, + "confidence": 0.31 + }, + { + "text": " ", + "start": 32.74, + "end": 33.04, + "confidence": 0.128 + }, + { + "text": "時", + "start": 33.04, + "end": 33.1, + "confidence": 0.355 + }, + { + "text": "に", + "start": 33.1, + "end": 33.12, + "confidence": 0.791 + }, + { + "text": "いい", + "start": 33.12, + "end": 33.36, + "confidence": 0.621 + }, + { + "text": "な", + "start": 33.36, + "end": 33.48, + "confidence": 0.793 + }, + { + "text": "ります", + "start": 33.48, + "end": 34.02, + "confidence": 0.896 + }, + { + "text": " こ", + "start": 34.02, + "end": 34.38, + "confidence": 0.328 + }, + { + "text": "ちら", + "start": 34.38, + "end": 34.56, + "confidence": 0.684 + }, + { + "text": "いた", + "start": 34.56, + "end": 34.84, + "confidence": 0.571 + }, + { + "text": "して", + "start": 34.84, + "end": 35.04, + "confidence": 0.437 + }, + { + "text": "も", + "start": 35.04, + "end": 35.18, + "confidence": 0.422 + }, + { + "text": " ", + "start": 35.18, + "end": 35.38, + "confidence": 0.244 + }, + { + "text": "では", + "start": 35.38, + "end": 35.46, + "confidence": 0.139 + }, + { + "text": "あり", + "start": 35.46, + "end": 35.72, + "confidence": 0.903 + }, + { + "text": "ません", + "start": 35.72, + "end": 36.34, + "confidence": 0.997 + } + ] + }, + { + "id": 6, + "seek": 2726, + "start": 36.42, + "end": 40.4, + "text": "パブコンはサプテレクトリーごとに 設定した", + "tokens": [ + 50838, + 23268, + 28889, + 18066, + 4824, + 3065, + 23607, + 20953, + 22985, + 16680, + 10825, + 7588, + 12376, + 3384, + 9991, + 3193, + 4108, + 220, + 39035, + 12088, + 8533, + 51042 + ], + "temperature": 0.0, + "avg_logprob": -0.4461708068847656, + "compression_ratio": 1.375, + "no_speech_prob": 0.3626025915145874, + "confidence": 0.849, + "words": [ + { + "text": "パ", + "start": 36.42, + "end": 36.92, + "confidence": 0.482 + }, + { + "text": "ブ", + "start": 36.92, + "end": 37.04, + "confidence": 0.648 + }, + { + "text": "コ", + "start": 37.04, + "end": 37.2, + "confidence": 0.689 + }, + { + "text": "ン", + "start": 37.2, + "end": 37.72, + "confidence": 0.994 + }, + { + "text": "は", + "start": 37.72, + "end": 37.74, + "confidence": 0.975 + }, + { + "text": "サ", + "start": 37.74, + "end": 38.22, + "confidence": 0.668 + }, + { + "text": "プ", + "start": 38.22, + "end": 38.32, + "confidence": 0.572 + }, + { + "text": "テ", + "start": 38.32, + "end": 38.42, + "confidence": 0.984 + }, + { + "text": "レ", + "start": 38.42, + "end": 38.56, + "confidence": 0.997 + }, + { + "text": "ク", + "start": 38.56, + "end": 38.68, + "confidence": 0.997 + }, + { + "text": "ト", + "start": 38.68, + "end": 38.78, + "confidence": 0.998 + }, + { + "text": "リ", + "start": 38.78, + "end": 38.92, + "confidence": 0.999 + }, + { + "text": "ー", + "start": 38.92, + "end": 38.96, + "confidence": 0.987 + }, + { + "text": "ご", + "start": 38.96, + "end": 39.08, + "confidence": 0.959 + }, + { + "text": "と", + "start": 39.08, + "end": 39.26, + "confidence": 0.999 + }, + { + "text": "に", + "start": 39.26, + "end": 39.58, + "confidence": 0.974 + }, + { + "text": " ", + "start": 39.58, + "end": 39.66, + "confidence": 0.663 + }, + { + "text": "設", + "start": 39.66, + "end": 39.82, + "confidence": 0.911 + }, + { + "text": "定", + "start": 39.82, + "end": 40.02, + "confidence": 0.999 + }, + { + "text": "した", + "start": 40.02, + "end": 40.4, + "confidence": 0.873 + } + ] + } + ], + "language": "ja" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/laugh1.mp3.words.json b/tests/expected/tiny_auto/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..a7a3bd6207355e5dadbc844de6ba41ef56377339 --- /dev/null +++ b/tests/expected/tiny_auto/laugh1.mp3.words.json @@ -0,0 +1,70 @@ +{ + "text": " You can't do it, you can't.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.0, + "end": 1.7, + "text": " You can't do it, you can't.", + "tokens": [ + 50364, + 509, + 393, + 380, + 360, + 309, + 11, + 291, + 393, + 380, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.3003899500920222, + "compression_ratio": 0.9310344827586207, + "no_speech_prob": 0.4180056154727936, + "confidence": 0.277, + "words": [ + { + "text": "You", + "start": 0.0, + "end": 0.56, + "confidence": 0.081 + }, + { + "text": "can't", + "start": 0.56, + "end": 0.82, + "confidence": 0.302 + }, + { + "text": "do", + "start": 0.82, + "end": 1.04, + "confidence": 0.118 + }, + { + "text": "it,", + "start": 1.04, + "end": 1.2, + "confidence": 0.374 + }, + { + "text": "you", + "start": 1.26, + "end": 1.5, + "confidence": 0.141 + }, + { + "text": "can't.", + "start": 1.5, + "end": 1.7, + "confidence": 0.868 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/laugh2.mp3.words.json b/tests/expected/tiny_auto/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..81c0b4040bd9be940f1e926d688912d248276441 --- /dev/null +++ b/tests/expected/tiny_auto/laugh2.mp3.words.json @@ -0,0 +1,32 @@ +{ + "text": " Haha.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.18, + "end": 0.56, + "text": " Haha.", + "tokens": [ + 50364, + 19131, + 13, + 50401 + ], + "temperature": 0.0, + "avg_logprob": -1.3164030075073243, + "compression_ratio": 0.38461538461538464, + "no_speech_prob": 0.5721203088760376, + "confidence": 0.079, + "words": [ + { + "text": "Haha.", + "start": 0.18, + "end": 0.56, + "confidence": 0.079 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/punctuations.mp3.words.json b/tests/expected/tiny_auto/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..3da035b29fdb645b73a077037954dce5f1348e2a --- /dev/null +++ b/tests/expected/tiny_auto/punctuations.mp3.words.json @@ -0,0 +1,71 @@ +{ + "text": " Dima, est ce que l'on vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.58, + "text": " Dima, est ce que l'on vole ?", + "tokens": [ + 50364, + 413, + 4775, + 11, + 871, + 1769, + 631, + 287, + 6, + 266, + 49877, + 2506, + 50494 + ], + "temperature": 0.0, + "avg_logprob": -0.8432754789079938, + "compression_ratio": 0.7777777777777778, + "no_speech_prob": 0.0010773586109280586, + "confidence": 0.456, + "words": [ + { + "text": "Dima,", + "start": 0.42, + "end": 0.84, + "confidence": 0.247 + }, + { + "text": "est", + "start": 1.2, + "end": 1.4, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 1.4, + "end": 1.56, + "confidence": 0.587 + }, + { + "text": "que", + "start": 1.56, + "end": 1.68, + "confidence": 0.916 + }, + { + "text": "l'on", + "start": 1.68, + "end": 2.02, + "confidence": 0.481 + }, + { + "text": "vole ?", + "start": 2.02, + "end": 2.58, + "confidence": 0.249 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/radio_short.mp3.words.json b/tests/expected/tiny_auto/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..7a6e6e9fd0b0f095180c783f10f296a7391f71f4 --- /dev/null +++ b/tests/expected/tiny_auto/radio_short.mp3.words.json @@ -0,0 +1,2833 @@ +{ + "text": " What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? What are you telling me, dude? What are you telling me, guys? What are you telling me, guys? What are you telling me, dude? What are you telling me, dude? The most important thing is not to be alert. It's what you do. Yes, man. The reference of the book. Good evening, good evening. All of you, this is your BFFM TV. It's called the BFFM story with the actuality. There are 60 minutes of the report of the analysis of the actions you need. As you can see, it's a bit of a report of the reports of the reports. So, what is it, guys? So, we are ready to do it. The report is ready to be told by the leaders of the CFT's CFT's CFT's CFT. The report of the CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C The CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 5.0, + "end": 6.12, + "text": " What are you telling me, guys?", + "tokens": [ + 50364, + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.342, + "words": [ + { + "text": "What", + "start": 5.0, + "end": 5.22, + "confidence": 0.041 + }, + { + "text": "are", + "start": 5.22, + "end": 5.3, + "confidence": 0.602 + }, + { + "text": "you", + "start": 5.3, + "end": 5.46, + "confidence": 0.97 + }, + { + "text": "telling", + "start": 5.46, + "end": 5.7, + "confidence": 0.355 + }, + { + "text": "me,", + "start": 5.7, + "end": 5.94, + "confidence": 0.612 + }, + { + "text": "guys?", + "start": 6.02, + "end": 6.12, + "confidence": 0.304 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 6.76, + "end": 7.57, + "text": " What are you telling me, dude?", + "tokens": [ + 50664, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.677, + "words": [ + { + "text": "What", + "start": 6.76, + "end": 6.92, + "confidence": 0.884 + }, + { + "text": "are", + "start": 6.92, + "end": 7.06, + "confidence": 0.936 + }, + { + "text": "you", + "start": 7.06, + "end": 7.18, + "confidence": 0.993 + }, + { + "text": "telling", + "start": 7.18, + "end": 7.3, + "confidence": 0.906 + }, + { + "text": "me,", + "start": 7.3, + "end": 7.44, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 7.48, + "end": 7.57, + "confidence": 0.132 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 7.57, + "end": 9.36, + "text": " What are you telling me, dude?", + "tokens": [ + 50764, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.826, + "words": [ + { + "text": "What", + "start": 7.57, + "end": 9.12, + "confidence": 0.544 + }, + { + "text": "are", + "start": 9.12, + "end": 9.28, + "confidence": 0.852 + }, + { + "text": "you", + "start": 9.28, + "end": 9.3, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 9.3, + "end": 9.32, + "confidence": 0.849 + }, + { + "text": "me,", + "start": 9.32, + "end": 9.34, + "confidence": 0.951 + }, + { + "text": "dude?", + "start": 9.34, + "end": 9.36, + "confidence": 0.855 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.22, + "end": 12.5, + "text": " What are you telling me, dude?", + "tokens": [ + 50864, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 50964 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.774, + "words": [ + { + "text": "What", + "start": 11.22, + "end": 11.44, + "confidence": 0.423 + }, + { + "text": "are", + "start": 11.44, + "end": 11.56, + "confidence": 0.791 + }, + { + "text": "you", + "start": 11.56, + "end": 11.68, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 11.68, + "end": 11.7, + "confidence": 0.809 + }, + { + "text": "me,", + "start": 11.7, + "end": 12.48, + "confidence": 0.956 + }, + { + "text": "dude?", + "start": 12.48, + "end": 12.5, + "confidence": 0.837 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 12.5, + "end": 13.54, + "text": " What are you telling me, guys?", + "tokens": [ + 50964, + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30, + 51164 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.76, + "words": [ + { + "text": "What", + "start": 12.5, + "end": 12.52, + "confidence": 0.504 + }, + { + "text": "are", + "start": 12.52, + "end": 12.54, + "confidence": 0.845 + }, + { + "text": "you", + "start": 12.54, + "end": 12.64, + "confidence": 0.994 + }, + { + "text": "telling", + "start": 12.64, + "end": 12.66, + "confidence": 0.844 + }, + { + "text": "me,", + "start": 12.66, + "end": 13.04, + "confidence": 0.962 + }, + { + "text": "guys?", + "start": 13.52, + "end": 13.54, + "confidence": 0.563 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 20.32, + "end": 21.4, + "text": " What are you telling me, guys?", + "tokens": [ + 51164, + 708, + 366, + 291, + 3585, + 385, + 11, + 1074, + 30, + 51414 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.935, + "words": [ + { + "text": "What", + "start": 20.32, + "end": 20.72, + "confidence": 0.885 + }, + { + "text": "are", + "start": 20.72, + "end": 20.78, + "confidence": 0.936 + }, + { + "text": "you", + "start": 20.78, + "end": 20.8, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 20.8, + "end": 21.04, + "confidence": 0.935 + }, + { + "text": "me,", + "start": 21.04, + "end": 21.3, + "confidence": 0.896 + }, + { + "text": "guys?", + "start": 21.38, + "end": 21.4, + "confidence": 0.969 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 22.08, + "end": 22.96, + "text": " What are you telling me, dude?", + "tokens": [ + 51414, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 51514 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.953, + "words": [ + { + "text": "What", + "start": 22.08, + "end": 22.24, + "confidence": 0.95 + }, + { + "text": "are", + "start": 22.24, + "end": 22.36, + "confidence": 0.965 + }, + { + "text": "you", + "start": 22.36, + "end": 22.44, + "confidence": 0.997 + }, + { + "text": "telling", + "start": 22.44, + "end": 22.6, + "confidence": 0.864 + }, + { + "text": "me,", + "start": 22.6, + "end": 22.78, + "confidence": 0.984 + }, + { + "text": "dude?", + "start": 22.94, + "end": 22.96, + "confidence": 0.967 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 22.96, + "end": 23.08, + "text": " What are you telling me, dude?", + "tokens": [ + 51514, + 708, + 366, + 291, + 3585, + 385, + 11, + 6449, + 30, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.39899626010801736, + "compression_ratio": 4.660377358490566, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.856, + "words": [ + { + "text": "What", + "start": 22.96, + "end": 22.98, + "confidence": 0.541 + }, + { + "text": "are", + "start": 22.98, + "end": 23.0, + "confidence": 0.889 + }, + { + "text": "you", + "start": 23.0, + "end": 23.02, + "confidence": 0.996 + }, + { + "text": "telling", + "start": 23.02, + "end": 23.04, + "confidence": 0.882 + }, + { + "text": "me,", + "start": 23.04, + "end": 23.06, + "confidence": 0.975 + }, + { + "text": "dude?", + "start": 23.06, + "end": 23.08, + "confidence": 0.955 + } + ] + }, + { + "id": 8, + "seek": 2600, + "start": 26.54, + "end": 32.48, + "text": " The most important thing is not to be alert.", + "tokens": [ + 50414, + 440, + 881, + 1021, + 551, + 307, + 406, + 281, + 312, + 9615, + 13, + 50664 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.276, + "words": [ + { + "text": "The", + "start": 26.54, + "end": 31.02, + "confidence": 0.182 + }, + { + "text": "most", + "start": 31.02, + "end": 31.18, + "confidence": 0.518 + }, + { + "text": "important", + "start": 31.18, + "end": 31.5, + "confidence": 0.995 + }, + { + "text": "thing", + "start": 31.5, + "end": 31.68, + "confidence": 0.217 + }, + { + "text": "is", + "start": 31.68, + "end": 31.9, + "confidence": 0.507 + }, + { + "text": "not", + "start": 31.9, + "end": 32.22, + "confidence": 0.284 + }, + { + "text": "to", + "start": 32.22, + "end": 32.4, + "confidence": 0.343 + }, + { + "text": "be", + "start": 32.4, + "end": 32.42, + "confidence": 0.247 + }, + { + "text": "alert.", + "start": 32.42, + "end": 32.48, + "confidence": 0.038 + } + ] + }, + { + "id": 9, + "seek": 2600, + "start": 33.42, + "end": 34.06, + "text": " It's what you do.", + "tokens": [ + 50664, + 467, + 311, + 437, + 291, + 360, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.502, + "words": [ + { + "text": "It's", + "start": 33.42, + "end": 33.64, + "confidence": 0.307 + }, + { + "text": "what", + "start": 33.64, + "end": 33.72, + "confidence": 0.616 + }, + { + "text": "you", + "start": 33.72, + "end": 33.88, + "confidence": 0.898 + }, + { + "text": "do.", + "start": 33.88, + "end": 34.06, + "confidence": 0.614 + } + ] + }, + { + "id": 10, + "seek": 2600, + "start": 34.86, + "end": 35.5, + "text": " Yes, man.", + "tokens": [ + 50764, + 1079, + 11, + 587, + 13, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.088, + "words": [ + { + "text": "Yes,", + "start": 34.86, + "end": 34.88, + "confidence": 0.065 + }, + { + "text": "man.", + "start": 35.48, + "end": 35.5, + "confidence": 0.12 + } + ] + }, + { + "id": 11, + "seek": 2600, + "start": 37.14, + "end": 38.64, + "text": " The reference of the book.", + "tokens": [ + 50864, + 440, + 6408, + 295, + 264, + 1446, + 13, + 51014 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.268, + "words": [ + { + "text": "The", + "start": 37.14, + "end": 37.28, + "confidence": 0.385 + }, + { + "text": "reference", + "start": 37.28, + "end": 37.74, + "confidence": 0.543 + }, + { + "text": "of", + "start": 37.74, + "end": 37.92, + "confidence": 0.781 + }, + { + "text": "the", + "start": 37.92, + "end": 38.2, + "confidence": 0.37 + }, + { + "text": "book.", + "start": 38.2, + "end": 38.64, + "confidence": 0.023 + } + ] + }, + { + "id": 12, + "seek": 2600, + "start": 44.94, + "end": 45.65, + "text": " Good evening, good evening.", + "tokens": [ + 51264, + 2205, + 5634, + 11, + 665, + 5634, + 13, + 51364 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.398, + "words": [ + { + "text": "Good", + "start": 44.94, + "end": 45.16, + "confidence": 0.134 + }, + { + "text": "evening,", + "start": 45.16, + "end": 45.4, + "confidence": 0.782 + }, + { + "text": "good", + "start": 45.46, + "end": 45.58, + "confidence": 0.328 + }, + { + "text": "evening.", + "start": 45.58, + "end": 45.65, + "confidence": 0.733 + } + ] + }, + { + "id": 13, + "seek": 2600, + "start": 45.65, + "end": 47.76, + "text": " All of you, this is your BFFM TV.", + "tokens": [ + 51364, + 1057, + 295, + 291, + 11, + 341, + 307, + 428, + 363, + 6345, + 44, + 3558, + 13, + 51464 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.327, + "words": [ + { + "text": "All", + "start": 45.65, + "end": 46.28, + "confidence": 0.146 + }, + { + "text": "of", + "start": 46.28, + "end": 46.3, + "confidence": 0.355 + }, + { + "text": "you,", + "start": 46.3, + "end": 46.5, + "confidence": 0.953 + }, + { + "text": "this", + "start": 46.54, + "end": 46.68, + "confidence": 0.226 + }, + { + "text": "is", + "start": 46.68, + "end": 46.82, + "confidence": 0.254 + }, + { + "text": "your", + "start": 46.82, + "end": 47.08, + "confidence": 0.184 + }, + { + "text": "BFFM", + "start": 47.08, + "end": 47.62, + "confidence": 0.326 + }, + { + "text": "TV.", + "start": 47.62, + "end": 47.76, + "confidence": 0.775 + } + ] + }, + { + "id": 14, + "seek": 2600, + "start": 47.76, + "end": 51.42, + "text": " It's called the BFFM story with the actuality.", + "tokens": [ + 51464, + 467, + 311, + 1219, + 264, + 363, + 6345, + 44, + 1657, + 365, + 264, + 3539, + 507, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.9193238558834547, + "compression_ratio": 1.3594771241830066, + "no_speech_prob": 0.09051438421010971, + "confidence": 0.385, + "words": [ + { + "text": "It's", + "start": 47.76, + "end": 48.22, + "confidence": 0.351 + }, + { + "text": "called", + "start": 48.22, + "end": 48.38, + "confidence": 0.215 + }, + { + "text": "the", + "start": 48.38, + "end": 48.72, + "confidence": 0.207 + }, + { + "text": "BFFM", + "start": 48.72, + "end": 49.8, + "confidence": 0.849 + }, + { + "text": "story", + "start": 49.8, + "end": 50.06, + "confidence": 0.467 + }, + { + "text": "with", + "start": 50.06, + "end": 50.22, + "confidence": 0.368 + }, + { + "text": "the", + "start": 50.22, + "end": 50.66, + "confidence": 0.179 + }, + { + "text": "actuality.", + "start": 50.66, + "end": 51.42, + "confidence": 0.322 + } + ] + }, + { + "id": 15, + "seek": 5200, + "start": 52.0, + "end": 56.02, + "text": " There are 60 minutes of the report of the analysis of the actions you need.", + "tokens": [ + 50364, + 821, + 366, + 4060, + 2077, + 295, + 264, + 2275, + 295, + 264, + 5215, + 295, + 264, + 5909, + 291, + 643, + 13, + 50564 + ], + "temperature": 0.0, + "avg_logprob": -0.49296905517578127, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2597074508666992, + "confidence": 0.299, + "words": [ + { + "text": "There", + "start": 52.0, + "end": 52.16, + "confidence": 0.088 + }, + { + "text": "are", + "start": 52.16, + "end": 52.3, + "confidence": 0.683 + }, + { + "text": "60", + "start": 52.3, + "end": 52.7, + "confidence": 0.201 + }, + { + "text": "minutes", + "start": 52.7, + "end": 53.12, + "confidence": 0.804 + }, + { + "text": "of", + "start": 53.12, + "end": 53.74, + "confidence": 0.217 + }, + { + "text": "the", + "start": 53.74, + "end": 53.86, + "confidence": 0.16 + }, + { + "text": "report", + "start": 53.86, + "end": 54.16, + "confidence": 0.072 + }, + { + "text": "of", + "start": 54.16, + "end": 54.4, + "confidence": 0.56 + }, + { + "text": "the", + "start": 54.4, + "end": 54.52, + "confidence": 0.584 + }, + { + "text": "analysis", + "start": 54.52, + "end": 54.7, + "confidence": 0.128 + }, + { + "text": "of", + "start": 54.7, + "end": 54.82, + "confidence": 0.837 + }, + { + "text": "the", + "start": 54.82, + "end": 55.22, + "confidence": 0.549 + }, + { + "text": "actions", + "start": 55.22, + "end": 55.24, + "confidence": 0.321 + }, + { + "text": "you", + "start": 55.24, + "end": 55.66, + "confidence": 0.557 + }, + { + "text": "need.", + "start": 55.66, + "end": 56.02, + "confidence": 0.163 + } + ] + }, + { + "id": 16, + "seek": 5200, + "start": 56.7, + "end": 59.8, + "text": " As you can see, it's a bit of a report of the reports of the reports.", + "tokens": [ + 50564, + 1018, + 291, + 393, + 536, + 11, + 309, + 311, + 257, + 857, + 295, + 257, + 2275, + 295, + 264, + 7122, + 295, + 264, + 7122, + 13, + 50764 + ], + "temperature": 0.0, + "avg_logprob": -0.49296905517578127, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2597074508666992, + "confidence": 0.287, + "words": [ + { + "text": "As", + "start": 56.7, + "end": 56.9, + "confidence": 0.289 + }, + { + "text": "you", + "start": 56.9, + "end": 56.96, + "confidence": 0.158 + }, + { + "text": "can", + "start": 56.96, + "end": 57.34, + "confidence": 0.266 + }, + { + "text": "see,", + "start": 57.34, + "end": 57.36, + "confidence": 0.922 + }, + { + "text": "it's", + "start": 57.64, + "end": 57.66, + "confidence": 0.309 + }, + { + "text": "a", + "start": 57.66, + "end": 57.68, + "confidence": 0.456 + }, + { + "text": "bit", + "start": 57.68, + "end": 57.7, + "confidence": 0.491 + }, + { + "text": "of", + "start": 57.7, + "end": 57.96, + "confidence": 0.106 + }, + { + "text": "a", + "start": 57.96, + "end": 58.1, + "confidence": 0.441 + }, + { + "text": "report", + "start": 58.1, + "end": 59.06, + "confidence": 0.058 + }, + { + "text": "of", + "start": 59.06, + "end": 59.32, + "confidence": 0.53 + }, + { + "text": "the", + "start": 59.32, + "end": 59.46, + "confidence": 0.706 + }, + { + "text": "reports", + "start": 59.46, + "end": 59.72, + "confidence": 0.095 + }, + { + "text": "of", + "start": 59.72, + "end": 59.76, + "confidence": 0.291 + }, + { + "text": "the", + "start": 59.76, + "end": 59.78, + "confidence": 0.746 + }, + { + "text": "reports.", + "start": 59.78, + "end": 59.8, + "confidence": 0.123 + } + ] + }, + { + "id": 17, + "seek": 5200, + "start": 59.8, + "end": 60.62, + "text": " So, what is it, guys?", + "tokens": [ + 50764, + 407, + 11, + 437, + 307, + 309, + 11, + 1074, + 30, + 50814 + ], + "temperature": 0.0, + "avg_logprob": -0.49296905517578127, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2597074508666992, + "confidence": 0.234, + "words": [ + { + "text": "So,", + "start": 59.8, + "end": 60.02, + "confidence": 0.153 + }, + { + "text": "what", + "start": 60.18, + "end": 60.2, + "confidence": 0.266 + }, + { + "text": "is", + "start": 60.2, + "end": 60.42, + "confidence": 0.34 + }, + { + "text": "it,", + "start": 60.42, + "end": 60.58, + "confidence": 0.189 + }, + { + "text": "guys?", + "start": 60.58, + "end": 60.62, + "confidence": 0.267 + } + ] + }, + { + "id": 18, + "seek": 5200, + "start": 60.62, + "end": 62.98, + "text": " So, we are ready to do it.", + "tokens": [ + 50814, + 407, + 11, + 321, + 366, + 1919, + 281, + 360, + 309, + 13, + 50914 + ], + "temperature": 0.0, + "avg_logprob": -0.49296905517578127, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2597074508666992, + "confidence": 0.393, + "words": [ + { + "text": "So,", + "start": 60.62, + "end": 62.06, + "confidence": 0.172 + }, + { + "text": "we", + "start": 62.08, + "end": 62.26, + "confidence": 0.36 + }, + { + "text": "are", + "start": 62.26, + "end": 62.44, + "confidence": 0.246 + }, + { + "text": "ready", + "start": 62.44, + "end": 62.68, + "confidence": 0.477 + }, + { + "text": "to", + "start": 62.68, + "end": 62.7, + "confidence": 0.886 + }, + { + "text": "do", + "start": 62.7, + "end": 62.92, + "confidence": 0.589 + }, + { + "text": "it.", + "start": 62.92, + "end": 62.98, + "confidence": 0.381 + } + ] + }, + { + "id": 19, + "seek": 5200, + "start": 63.0, + "end": 66.94, + "text": " The report is ready to be told by the leaders of the CFT's CFT's CFT's CFT.", + "tokens": [ + 50914, + 440, + 2275, + 307, + 1919, + 281, + 312, + 1907, + 538, + 264, + 3523, + 295, + 264, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 13, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.49296905517578127, + "compression_ratio": 3.419753086419753, + "no_speech_prob": 0.2597074508666992, + "confidence": 0.35, + "words": [ + { + "text": "The", + "start": 63.0, + "end": 63.44, + "confidence": 0.495 + }, + { + "text": "report", + "start": 63.44, + "end": 63.46, + "confidence": 0.387 + }, + { + "text": "is", + "start": 63.46, + "end": 63.62, + "confidence": 0.522 + }, + { + "text": "ready", + "start": 63.62, + "end": 63.88, + "confidence": 0.055 + }, + { + "text": "to", + "start": 63.88, + "end": 64.26, + "confidence": 0.458 + }, + { + "text": "be", + "start": 64.26, + "end": 64.68, + "confidence": 0.209 + }, + { + "text": "told", + "start": 64.68, + "end": 64.7, + "confidence": 0.047 + }, + { + "text": "by", + "start": 64.7, + "end": 65.04, + "confidence": 0.645 + }, + { + "text": "the", + "start": 65.04, + "end": 65.08, + "confidence": 0.891 + }, + { + "text": "leaders", + "start": 65.08, + "end": 65.46, + "confidence": 0.569 + }, + { + "text": "of", + "start": 65.46, + "end": 65.94, + "confidence": 0.835 + }, + { + "text": "the", + "start": 65.94, + "end": 66.02, + "confidence": 0.738 + }, + { + "text": "CFT's", + "start": 66.02, + "end": 66.36, + "confidence": 0.098 + }, + { + "text": "CFT's", + "start": 66.36, + "end": 66.62, + "confidence": 0.406 + }, + { + "text": "CFT's", + "start": 66.62, + "end": 66.64, + "confidence": 0.512 + }, + { + "text": "CFT.", + "start": 66.64, + "end": 66.94, + "confidence": 0.876 + } + ] + }, + { + "id": 20, + "seek": 6700, + "start": 67.0, + "end": 97.0, + "text": " The report of the CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C", + "tokens": [ + 50364, + 440, + 2275, + 295, + 264, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383 + ], + "temperature": 0.0, + "avg_logprob": -0.09771330794945961, + "compression_ratio": 13.264705882352942, + "no_speech_prob": 0.7763262987136841, + "confidence": 0.908, + "words": [ + { + "text": "The", + "start": 67.0, + "end": 67.84, + "confidence": 0.342 + }, + { + "text": "report", + "start": 67.84, + "end": 67.86, + "confidence": 0.626 + }, + { + "text": "of", + "start": 67.86, + "end": 68.16, + "confidence": 0.375 + }, + { + "text": "the", + "start": 68.16, + "end": 68.36, + "confidence": 0.865 + }, + { + "text": "CFT's", + "start": 68.36, + "end": 68.68, + "confidence": 0.859 + }, + { + "text": "CFT's", + "start": 68.68, + "end": 68.82, + "confidence": 0.845 + }, + { + "text": "CFT's", + "start": 68.82, + "end": 68.94, + "confidence": 0.816 + }, + { + "text": "CFT's", + "start": 68.94, + "end": 69.04, + "confidence": 0.814 + }, + { + "text": "CFT's", + "start": 69.04, + "end": 69.24, + "confidence": 0.828 + }, + { + "text": "CFT's", + "start": 69.24, + "end": 69.64, + "confidence": 0.852 + }, + { + "text": "CFT's", + "start": 69.64, + "end": 69.88, + "confidence": 0.881 + }, + { + "text": "CFT's", + "start": 69.88, + "end": 70.34, + "confidence": 0.888 + }, + { + "text": "CFT's", + "start": 70.34, + "end": 70.92, + "confidence": 0.899 + }, + { + "text": "CFT's", + "start": 70.92, + "end": 71.62, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 71.62, + "end": 73.26, + "confidence": 0.897 + }, + { + "text": "CFT's", + "start": 73.26, + "end": 74.0, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 74.0, + "end": 74.4, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 74.4, + "end": 74.96, + "confidence": 0.902 + }, + { + "text": "CFT's", + "start": 74.96, + "end": 76.3, + "confidence": 0.904 + }, + { + "text": "CFT's", + "start": 76.3, + "end": 76.32, + "confidence": 0.902 + }, + { + "text": "CFT's", + "start": 76.32, + "end": 76.38, + "confidence": 0.9 + }, + { + "text": "CFT's", + "start": 76.38, + "end": 76.5, + "confidence": 0.904 + }, + { + "text": "CFT's", + "start": 76.5, + "end": 77.4, + "confidence": 0.901 + }, + { + "text": "CFT's", + "start": 77.4, + "end": 77.42, + "confidence": 0.901 + }, + { + "text": "CFT's", + "start": 77.42, + "end": 78.0, + "confidence": 0.899 + }, + { + "text": "CFT's", + "start": 78.0, + "end": 78.02, + "confidence": 0.899 + }, + { + "text": "CFT's", + "start": 78.02, + "end": 78.06, + "confidence": 0.902 + }, + { + "text": "CFT's", + "start": 78.06, + "end": 78.08, + "confidence": 0.903 + }, + { + "text": "CFT's", + "start": 78.08, + "end": 78.1, + "confidence": 0.902 + }, + { + "text": "CFT's", + "start": 78.1, + "end": 78.98, + "confidence": 0.904 + }, + { + "text": "CFT's", + "start": 78.98, + "end": 79.24, + "confidence": 0.903 + }, + { + "text": "CFT's", + "start": 79.24, + "end": 81.7, + "confidence": 0.906 + }, + { + "text": "CFT's", + "start": 81.7, + "end": 81.72, + "confidence": 0.906 + }, + { + "text": "CFT's", + "start": 81.72, + "end": 81.94, + "confidence": 0.91 + }, + { + "text": "CFT's", + "start": 81.94, + "end": 82.32, + "confidence": 0.916 + }, + { + "text": "CFT's", + "start": 82.32, + "end": 82.34, + "confidence": 0.916 + }, + { + "text": "CFT's", + "start": 82.34, + "end": 82.36, + "confidence": 0.916 + }, + { + "text": "CFT's", + "start": 82.36, + "end": 82.44, + "confidence": 0.919 + }, + { + "text": "CFT's", + "start": 82.44, + "end": 82.54, + "confidence": 0.918 + }, + { + "text": "CFT's", + "start": 82.54, + "end": 82.56, + "confidence": 0.923 + }, + { + "text": "CFT's", + "start": 82.56, + "end": 82.58, + "confidence": 0.922 + }, + { + "text": "CFT's", + "start": 82.58, + "end": 82.6, + "confidence": 0.922 + }, + { + "text": "CFT's", + "start": 82.6, + "end": 82.62, + "confidence": 0.927 + }, + { + "text": "CFT's", + "start": 82.62, + "end": 82.68, + "confidence": 0.928 + }, + { + "text": "CFT's", + "start": 82.68, + "end": 83.12, + "confidence": 0.931 + }, + { + "text": "CFT's", + "start": 83.12, + "end": 83.24, + "confidence": 0.933 + }, + { + "text": "CFT's", + "start": 83.24, + "end": 83.54, + "confidence": 0.93 + }, + { + "text": "CFT's", + "start": 83.54, + "end": 83.56, + "confidence": 0.935 + }, + { + "text": "CFT's", + "start": 83.56, + "end": 83.58, + "confidence": 0.932 + }, + { + "text": "CFT's", + "start": 83.58, + "end": 83.7, + "confidence": 0.936 + }, + { + "text": "CFT's", + "start": 83.7, + "end": 83.72, + "confidence": 0.938 + }, + { + "text": "CFT's", + "start": 83.72, + "end": 83.74, + "confidence": 0.94 + }, + { + "text": "CFT's", + "start": 83.74, + "end": 83.76, + "confidence": 0.942 + }, + { + "text": "CFT's", + "start": 83.76, + "end": 83.78, + "confidence": 0.939 + }, + { + "text": "CFT's", + "start": 83.78, + "end": 84.02, + "confidence": 0.947 + }, + { + "text": "CFT's", + "start": 84.02, + "end": 84.04, + "confidence": 0.945 + }, + { + "text": "CFT's", + "start": 84.04, + "end": 84.28, + "confidence": 0.947 + }, + { + "text": "CFT's", + "start": 84.28, + "end": 84.4, + "confidence": 0.948 + }, + { + "text": "CFT's", + "start": 84.4, + "end": 84.54, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 84.54, + "end": 84.56, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 84.56, + "end": 84.58, + "confidence": 0.956 + }, + { + "text": "CFT's", + "start": 84.58, + "end": 85.02, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 85.02, + "end": 85.98, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 85.98, + "end": 86.62, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 86.62, + "end": 86.8, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 86.8, + "end": 87.28, + "confidence": 0.95 + }, + { + "text": "CFT's", + "start": 87.28, + "end": 87.58, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 87.58, + "end": 87.66, + "confidence": 0.951 + }, + { + "text": "CFT's", + "start": 87.66, + "end": 87.68, + "confidence": 0.952 + }, + { + "text": "CFT's", + "start": 87.68, + "end": 87.7, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 87.7, + "end": 88.84, + "confidence": 0.949 + }, + { + "text": "CFT's", + "start": 88.84, + "end": 88.86, + "confidence": 0.948 + }, + { + "text": "CFT's", + "start": 88.86, + "end": 89.2, + "confidence": 0.95 + }, + { + "text": "CFT's", + "start": 89.2, + "end": 90.42, + "confidence": 0.954 + }, + { + "text": "CFT's", + "start": 90.42, + "end": 90.82, + "confidence": 0.951 + }, + { + "text": "CFT's", + "start": 90.82, + "end": 96.98, + "confidence": 0.95 + }, + { + "text": "C", + "start": 96.98, + "end": 97.0, + "confidence": 0.974 + } + ] + }, + { + "id": 21, + "seek": 9700, + "start": 97.0, + "end": 127.0, + "text": " The CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's CFT's C", + "tokens": [ + 50364, + 440, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383, + 25469, + 311, + 383 + ], + "temperature": 0.0, + "avg_logprob": -0.04616457678277396, + "compression_ratio": 21.095238095238095, + "no_speech_prob": 0.1808893084526062, + "confidence": 0.955, + "words": [ + { + "text": "The", + "start": 97.0, + "end": 97.94, + "confidence": 0.245 + }, + { + "text": "CFT's", + "start": 97.94, + "end": 97.96, + "confidence": 0.471 + }, + { + "text": "CFT's", + "start": 97.96, + "end": 99.74, + "confidence": 0.915 + }, + { + "text": "CFT's", + "start": 99.74, + "end": 102.98, + "confidence": 0.915 + }, + { + "text": "CFT's", + "start": 102.98, + "end": 104.02, + "confidence": 0.918 + }, + { + "text": "CFT's", + "start": 104.02, + "end": 104.12, + "confidence": 0.93 + }, + { + "text": "CFT's", + "start": 104.12, + "end": 104.28, + "confidence": 0.94 + }, + { + "text": "CFT's", + "start": 104.28, + "end": 104.3, + "confidence": 0.95 + }, + { + "text": "CFT's", + "start": 104.3, + "end": 104.82, + "confidence": 0.96 + }, + { + "text": "CFT's", + "start": 104.82, + "end": 105.08, + "confidence": 0.965 + }, + { + "text": "CFT's", + "start": 105.08, + "end": 105.94, + "confidence": 0.967 + }, + { + "text": "CFT's", + "start": 105.94, + "end": 106.88, + "confidence": 0.969 + }, + { + "text": "CFT's", + "start": 106.88, + "end": 108.96, + "confidence": 0.97 + }, + { + "text": "CFT's", + "start": 108.96, + "end": 108.98, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 108.98, + "end": 109.06, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 109.06, + "end": 109.7, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 109.7, + "end": 109.72, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 109.72, + "end": 110.12, + "confidence": 0.97 + }, + { + "text": "CFT's", + "start": 110.12, + "end": 110.44, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 110.44, + "end": 110.82, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 110.82, + "end": 110.84, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 110.84, + "end": 110.86, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 110.86, + "end": 110.88, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 110.88, + "end": 110.9, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 110.9, + "end": 111.54, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 111.54, + "end": 111.76, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 111.76, + "end": 111.78, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 111.78, + "end": 111.8, + "confidence": 0.971 + }, + { + "text": "CFT's", + "start": 111.8, + "end": 111.82, + "confidence": 0.972 + }, + { + "text": "CFT's", + "start": 111.82, + "end": 111.84, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 111.84, + "end": 111.86, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 111.86, + "end": 111.88, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 111.88, + "end": 111.9, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.9, + "end": 111.92, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.92, + "end": 111.94, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.94, + "end": 111.96, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.96, + "end": 111.98, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 111.98, + "end": 112.0, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 112.0, + "end": 112.02, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 112.02, + "end": 112.04, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 112.04, + "end": 112.58, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 112.58, + "end": 112.76, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 112.76, + "end": 112.78, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.78, + "end": 112.8, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.8, + "end": 112.82, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 112.82, + "end": 112.84, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 112.84, + "end": 112.86, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.86, + "end": 112.88, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 112.88, + "end": 113.2, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 113.2, + "end": 113.22, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 113.22, + "end": 113.24, + "confidence": 0.978 + }, + { + "text": "CFT's", + "start": 113.24, + "end": 113.26, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 113.26, + "end": 114.64, + "confidence": 0.979 + }, + { + "text": "CFT's", + "start": 114.64, + "end": 114.66, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 114.66, + "end": 115.04, + "confidence": 0.977 + }, + { + "text": "CFT's", + "start": 115.04, + "end": 115.86, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 115.86, + "end": 116.02, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 116.02, + "end": 118.04, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 118.04, + "end": 118.24, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 118.24, + "end": 120.92, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 120.92, + "end": 120.94, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 120.94, + "end": 121.06, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 121.06, + "end": 121.08, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 121.08, + "end": 121.1, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 121.1, + "end": 122.26, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.26, + "end": 122.28, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 122.28, + "end": 122.3, + "confidence": 0.974 + }, + { + "text": "CFT's", + "start": 122.3, + "end": 122.32, + "confidence": 0.976 + }, + { + "text": "CFT's", + "start": 122.32, + "end": 122.34, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.34, + "end": 122.36, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.36, + "end": 122.38, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.38, + "end": 122.4, + "confidence": 0.975 + }, + { + "text": "CFT's", + "start": 122.4, + "end": 122.56, + "confidence": 0.973 + }, + { + "text": "CFT's", + "start": 122.56, + "end": 126.98, + "confidence": 0.972 + }, + { + "text": "C", + "start": 126.98, + "end": 127.0, + "confidence": 0.992 + } + ] + } + ], + "language": "en" +} \ No newline at end of file diff --git a/tests/expected/tiny_auto/smartphone.mp3.words.json b/tests/expected/tiny_auto/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..debc2f3712ed6a2e320e59b7cae8f5cd7feb2aa1 --- /dev/null +++ b/tests/expected/tiny_auto/smartphone.mp3.words.json @@ -0,0 +1,5091 @@ +{ + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent mentionnées. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très symbolablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport inédit ? Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent. On s'est espèrent de nous voter dans la relation à l'objet. C'est facilement éterricion. Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu en fait, une espèce de relation de médiation avec le monde qui rendent encore avec la maille de celles formes de rogeur. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance et de rogeur. Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure. On a par besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On la pade en la main quand on est colis, qu'on n'a même pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui. Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira, j'amé? Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux et diabolique et diabolique par que merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus, soit des finissants débordes, des finissants, une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.66, + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50545 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.709, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.68, + "confidence": 0.849 + }, + { + "text": "évidence", + "start": 0.68, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.08, + "confidence": 0.883 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.342 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.44, + "confidence": 0.921 + }, + { + "text": "mais", + "start": 1.88, + "end": 2.14, + "confidence": 0.914 + }, + { + "text": "je", + "start": 2.14, + "end": 2.26, + "confidence": 0.778 + }, + { + "text": "me", + "start": 2.26, + "end": 2.34, + "confidence": 0.954 + }, + { + "text": "l'étais", + "start": 2.34, + "end": 2.58, + "confidence": 0.72 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.86, + "confidence": 0.946 + }, + { + "text": "formulé", + "start": 2.86, + "end": 3.26, + "confidence": 0.53 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.46, + "confidence": 0.968 + }, + { + "text": "ça.", + "start": 3.46, + "end": 3.66, + "confidence": 0.96 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.14, + "end": 8.9, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 50545, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267, + 50806 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.789, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.26, + "confidence": 0.392 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.38, + "confidence": 0.94 + }, + { + "text": "fait", + "start": 4.38, + "end": 4.56, + "confidence": 0.732 + }, + { + "text": "la", + "start": 4.56, + "end": 4.72, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.72, + "end": 5.02, + "confidence": 0.93 + }, + { + "text": "du", + "start": 5.02, + "end": 5.2, + "confidence": 0.938 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.58, + "confidence": 0.909 + }, + { + "text": "c'est", + "start": 5.9, + "end": 6.2, + "confidence": 0.947 + }, + { + "text": "pas", + "start": 6.2, + "end": 6.26, + "confidence": 0.983 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.6, + "confidence": 0.993 + }, + { + "text": "la", + "start": 6.6, + "end": 6.8, + "confidence": 0.633 + }, + { + "text": "cumulation", + "start": 6.8, + "end": 7.34, + "confidence": 0.689 + }, + { + "text": "des", + "start": 7.34, + "end": 7.56, + "confidence": 0.792 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 8.14, + "confidence": 0.834 + }, + { + "text": "mais", + "start": 8.38, + "end": 8.5, + "confidence": 0.668 + }, + { + "text": "la", + "start": 8.5, + "end": 8.62, + "confidence": 0.719 + }, + { + "text": "manière", + "start": 8.62, + "end": 8.9, + "confidence": 0.498 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.9, + "end": 10.98, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 50806, + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.303, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.06, + "confidence": 0.284 + }, + { + "text": "quelques", + "start": 9.06, + "end": 9.28, + "confidence": 0.281 + }, + { + "text": "interagues", + "start": 9.28, + "end": 10.38, + "confidence": 0.246 + }, + { + "text": "entraîne.", + "start": 10.38, + "end": 10.98, + "confidence": 0.446 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.0, + "end": 12.96, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 50906, + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.63, + "words": [ + { + "text": "Et", + "start": 11.0, + "end": 11.12, + "confidence": 0.36 + }, + { + "text": "il", + "start": 11.12, + "end": 11.28, + "confidence": 0.144 + }, + { + "text": "est", + "start": 11.28, + "end": 11.38, + "confidence": 0.241 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.6, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.7, + "end": 11.78, + "confidence": 0.886 + }, + { + "text": "la", + "start": 11.78, + "end": 11.8, + "confidence": 0.969 + }, + { + "text": "photo", + "start": 11.8, + "end": 12.02, + "confidence": 0.809 + }, + { + "text": "c'est", + "start": 12.02, + "end": 12.26, + "confidence": 0.785 + }, + { + "text": "hyper", + "start": 12.26, + "end": 12.46, + "confidence": 0.938 + }, + { + "text": "convaincant.", + "start": 12.46, + "end": 12.96, + "confidence": 0.51 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.3, + "end": 18.8, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 608, + 14835, + 1531, + 8862, + 8796, + 5732, + 20847, + 51306 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.532, + "words": [ + { + "text": "Alors", + "start": 13.3, + "end": 13.56, + "confidence": 0.894 + }, + { + "text": "évidemment,", + "start": 13.56, + "end": 13.82, + "confidence": 0.778 + }, + { + "text": "il", + "start": 14.38, + "end": 14.4, + "confidence": 0.964 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.856 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.883 + }, + { + "text": "les", + "start": 15.38, + "end": 15.6, + "confidence": 0.934 + }, + { + "text": "interfaces", + "start": 15.6, + "end": 16.0, + "confidence": 0.38 + }, + { + "text": "les", + "start": 16.0, + "end": 16.5, + "confidence": 0.463 + }, + { + "text": "grand-attachilles", + "start": 16.5, + "end": 17.16, + "confidence": 0.213 + }, + { + "text": "à", + "start": 17.16, + "end": 17.32, + "confidence": 0.406 + }, + { + "text": "été", + "start": 17.32, + "end": 17.72, + "confidence": 0.894 + }, + { + "text": "beaucoup", + "start": 17.72, + "end": 18.28, + "confidence": 0.797 + }, + { + "text": "très", + "start": 18.28, + "end": 18.64, + "confidence": 0.493 + }, + { + "text": "souvent", + "start": 18.64, + "end": 18.8, + "confidence": 0.822 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 18.8, + "end": 19.84, + "text": " mentionnées.", + "tokens": [ + 51306, + 2152, + 77, + 6836, + 13, + 51356 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.626, + "words": [ + { + "text": "mentionnées.", + "start": 18.8, + "end": 19.84, + "confidence": 0.626 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 23.58, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 51356, + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156, + 51543 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.658, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.98 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.46, + "confidence": 0.568 + }, + { + "text": "il", + "start": 20.6, + "end": 20.68, + "confidence": 0.975 + }, + { + "text": "faudrait", + "start": 20.68, + "end": 20.78, + "confidence": 0.771 + }, + { + "text": "que", + "start": 20.78, + "end": 20.92, + "confidence": 0.388 + }, + { + "text": "les", + "start": 20.92, + "end": 20.96, + "confidence": 0.22 + }, + { + "text": "profites", + "start": 20.96, + "end": 21.36, + "confidence": 0.57 + }, + { + "text": "aussi", + "start": 21.36, + "end": 21.72, + "confidence": 0.52 + }, + { + "text": "de", + "start": 21.72, + "end": 21.92, + "confidence": 0.492 + }, + { + "text": "20", + "start": 21.92, + "end": 22.14, + "confidence": 0.915 + }, + { + "text": "ans", + "start": 22.14, + "end": 22.32, + "confidence": 0.942 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.52, + "confidence": 0.916 + }, + { + "text": "l'été,", + "start": 22.52, + "end": 22.86, + "confidence": 0.48 + }, + { + "text": "les", + "start": 22.98, + "end": 23.1, + "confidence": 0.903 + }, + { + "text": "ordinateurs", + "start": 23.1, + "end": 23.58, + "confidence": 0.946 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 23.58, + "end": 28.07, + "text": " nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher,", + "tokens": [ + 51543, + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 11, + 601, + 2947, + 631, + 11, + 11246, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 51766 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.656, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.78, + "confidence": 0.767 + }, + { + "text": "ont", + "start": 23.78, + "end": 23.9, + "confidence": 0.98 + }, + { + "text": "appris", + "start": 23.9, + "end": 24.12, + "confidence": 0.952 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.34 + }, + { + "text": "piquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.449 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.816 + }, + { + "text": "des", + "start": 24.72, + "end": 24.9, + "confidence": 0.96 + }, + { + "text": "icônes,", + "start": 24.9, + "end": 25.56, + "confidence": 0.599 + }, + { + "text": "sauf", + "start": 25.64, + "end": 25.8, + "confidence": 0.522 + }, + { + "text": "que,", + "start": 25.8, + "end": 26.36, + "confidence": 0.915 + }, + { + "text": "alors", + "start": 26.36, + "end": 26.58, + "confidence": 0.396 + }, + { + "text": "le", + "start": 26.58, + "end": 26.72, + "confidence": 0.824 + }, + { + "text": "smartphone", + "start": 26.72, + "end": 27.0, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 27.0, + "end": 27.5, + "confidence": 0.674 + }, + { + "text": "le", + "start": 27.5, + "end": 27.62, + "confidence": 0.552 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.07, + "confidence": 0.631 + } + ] + }, + { + "id": 8, + "seek": 2804, + "start": 28.07, + "end": 30.6, + "text": " qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.742, + "words": [ + { + "text": "qui", + "start": 28.07, + "end": 28.26, + "confidence": 0.305 + }, + { + "text": "rend", + "start": 28.26, + "end": 28.5, + "confidence": 0.763 + }, + { + "text": "le", + "start": 28.5, + "end": 28.72, + "confidence": 0.988 + }, + { + "text": "contact", + "start": 28.72, + "end": 29.06, + "confidence": 0.817 + }, + { + "text": "plus", + "start": 29.06, + "end": 29.48, + "confidence": 0.84 + }, + { + "text": "direct,", + "start": 29.48, + "end": 30.02, + "confidence": 0.944 + }, + { + "text": "plus", + "start": 30.18, + "end": 30.24, + "confidence": 0.992 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.6, + "confidence": 0.618 + } + ] + }, + { + "id": 9, + "seek": 2804, + "start": 31.1, + "end": 34.76, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté", + "tokens": [ + 50496, + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 50698 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.777, + "words": [ + { + "text": "Et", + "start": 31.1, + "end": 31.24, + "confidence": 0.949 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.36, + "confidence": 0.721 + }, + { + "text": "évidemment,", + "start": 31.42, + "end": 31.62, + "confidence": 0.213 + }, + { + "text": "il", + "start": 31.7, + "end": 31.76, + "confidence": 0.959 + }, + { + "text": "faudrait", + "start": 31.76, + "end": 31.94, + "confidence": 0.994 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.14, + "confidence": 0.847 + }, + { + "text": "aussi", + "start": 32.14, + "end": 32.36, + "confidence": 0.925 + }, + { + "text": "des", + "start": 32.36, + "end": 32.46, + "confidence": 0.921 + }, + { + "text": "applications", + "start": 32.46, + "end": 32.88, + "confidence": 0.857 + }, + { + "text": "qui", + "start": 32.88, + "end": 33.2, + "confidence": 0.663 + }, + { + "text": "permettent", + "start": 33.2, + "end": 33.8, + "confidence": 0.952 + }, + { + "text": "de", + "start": 33.8, + "end": 33.96, + "confidence": 0.951 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.4, + "confidence": 0.721 + }, + { + "text": "le", + "start": 34.4, + "end": 34.52, + "confidence": 0.654 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.76, + "confidence": 0.641 + } + ] + }, + { + "id": 10, + "seek": 2804, + "start": 34.8, + "end": 37.86, + "text": " tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 50698, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13, + 50860 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.735, + "words": [ + { + "text": "tout", + "start": 34.8, + "end": 35.04, + "confidence": 0.945 + }, + { + "text": "flu", + "start": 35.04, + "end": 35.32, + "confidence": 0.484 + }, + { + "text": "de", + "start": 35.32, + "end": 35.64, + "confidence": 0.35 + }, + { + "text": "la", + "start": 35.64, + "end": 35.78, + "confidence": 0.921 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.903 + }, + { + "text": "web", + "start": 36.24, + "end": 36.64, + "confidence": 0.913 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.84, + "confidence": 0.57 + }, + { + "text": "aller", + "start": 36.84, + "end": 37.06, + "confidence": 0.991 + }, + { + "text": "directement", + "start": 37.06, + "end": 37.48, + "confidence": 0.981 + }, + { + "text": "en", + "start": 37.48, + "end": 37.7, + "confidence": 0.654 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.86, + "confidence": 0.768 + } + ] + }, + { + "id": 11, + "seek": 2804, + "start": 38.78, + "end": 43.12, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 50860, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11, + 51121 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.737, + "words": [ + { + "text": "Bref,", + "start": 38.78, + "end": 38.8, + "confidence": 0.972 + }, + { + "text": "tout", + "start": 38.84, + "end": 39.04, + "confidence": 0.817 + }, + { + "text": "ça,", + "start": 39.04, + "end": 39.46, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 39.54, + "end": 39.76, + "confidence": 0.905 + }, + { + "text": "sont", + "start": 39.76, + "end": 39.96, + "confidence": 0.98 + }, + { + "text": "les", + "start": 39.96, + "end": 40.1, + "confidence": 0.982 + }, + { + "text": "conditions", + "start": 40.1, + "end": 40.64, + "confidence": 0.956 + }, + { + "text": "qui", + "start": 40.64, + "end": 40.96, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.58, + "confidence": 0.994 + }, + { + "text": "de", + "start": 41.58, + "end": 41.64, + "confidence": 0.993 + }, + { + "text": "créer", + "start": 41.64, + "end": 42.08, + "confidence": 0.91 + }, + { + "text": "cette", + "start": 42.08, + "end": 42.34, + "confidence": 0.473 + }, + { + "text": "objet,", + "start": 42.34, + "end": 42.64, + "confidence": 0.235 + }, + { + "text": "en", + "start": 42.7, + "end": 42.84, + "confidence": 0.221 + }, + { + "text": "Nicolas,", + "start": 42.84, + "end": 43.12, + "confidence": 0.537 + } + ] + }, + { + "id": 12, + "seek": 2804, + "start": 43.12, + "end": 46.58, + "text": " dit qu'il est très symbolablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 51121, + 6176, + 421, + 6, + 388, + 871, + 5732, + 5986, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51290 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.717, + "words": [ + { + "text": "dit", + "start": 43.12, + "end": 43.56, + "confidence": 0.696 + }, + { + "text": "qu'il", + "start": 43.56, + "end": 43.76, + "confidence": 0.958 + }, + { + "text": "est", + "start": 43.76, + "end": 43.9, + "confidence": 0.849 + }, + { + "text": "très", + "start": 43.9, + "end": 44.08, + "confidence": 0.443 + }, + { + "text": "symbolablement", + "start": 44.08, + "end": 44.86, + "confidence": 0.321 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.54, + "confidence": 0.655 + }, + { + "text": "dans", + "start": 45.54, + "end": 45.74, + "confidence": 0.817 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 46.02, + "confidence": 0.824 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.983 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.58, + "confidence": 0.99 + } + ] + }, + { + "id": 13, + "seek": 2804, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 51290, + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13, + 51402 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.717, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.24, + "confidence": 0.928 + }, + { + "text": "ça", + "start": 47.24, + "end": 47.48, + "confidence": 0.75 + }, + { + "text": "s'assoulait", + "start": 47.48, + "end": 47.84, + "confidence": 0.616 + }, + { + "text": "d'une", + "start": 47.84, + "end": 48.08, + "confidence": 0.674 + }, + { + "text": "autre", + "start": 48.08, + "end": 48.18, + "confidence": 0.964 + }, + { + "text": "interrogation.", + "start": 48.18, + "end": 48.76, + "confidence": 0.855 + } + ] + }, + { + "id": 14, + "seek": 2804, + "start": 49.42, + "end": 54.93, + "text": " Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport", + "tokens": [ + 51402, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 1208, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212, + 517, + 18018, + 51710 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.704, + "words": [ + { + "text": "Est-ce", + "start": 49.42, + "end": 49.7, + "confidence": 0.919 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.99 + }, + { + "text": "le", + "start": 49.74, + "end": 49.82, + "confidence": 0.871 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.02, + "confidence": 0.945 + }, + { + "text": "que", + "start": 50.02, + "end": 50.16, + "confidence": 0.923 + }, + { + "text": "cette", + "start": 50.16, + "end": 50.3, + "confidence": 0.943 + }, + { + "text": "objet", + "start": 50.3, + "end": 50.64, + "confidence": 0.963 + }, + { + "text": "soit", + "start": 50.64, + "end": 51.1, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 51.1, + "end": 51.82, + "confidence": 0.928 + }, + { + "text": "un", + "start": 51.82, + "end": 52.08, + "confidence": 0.511 + }, + { + "text": "d'huits", + "start": 52.08, + "end": 52.34, + "confidence": 0.246 + }, + { + "text": "que", + "start": 52.34, + "end": 52.44, + "confidence": 0.974 + }, + { + "text": "notre", + "start": 52.44, + "end": 52.66, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.66, + "end": 53.24, + "confidence": 0.779 + }, + { + "text": "a", + "start": 53.24, + "end": 53.5, + "confidence": 0.57 + }, + { + "text": "lui", + "start": 53.5, + "end": 53.7, + "confidence": 0.701 + }, + { + "text": "est", + "start": 53.7, + "end": 54.04, + "confidence": 0.879 + }, + { + "text": "aussi", + "start": 54.04, + "end": 54.52, + "confidence": 0.815 + }, + { + "text": "un", + "start": 54.52, + "end": 54.7, + "confidence": 0.48 + }, + { + "text": "rapport", + "start": 54.7, + "end": 54.93, + "confidence": 0.952 + } + ] + }, + { + "id": 15, + "seek": 2804, + "start": 54.93, + "end": 55.88, + "text": " inédit ?", + "tokens": [ + 51710, + 294, + 7811, + 270, + 2506, + 51760 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.86, + "words": [ + { + "text": "inédit ?", + "start": 54.93, + "end": 55.88, + "confidence": 0.86 + } + ] + }, + { + "id": 16, + "seek": 5596, + "start": 55.96, + "end": 59.36, + "text": " Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 50530 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.62, + "words": [ + { + "text": "Est-ce", + "start": 55.96, + "end": 56.38, + "confidence": 0.553 + }, + { + "text": "que", + "start": 56.38, + "end": 56.44, + "confidence": 0.892 + }, + { + "text": "le", + "start": 56.44, + "end": 56.58, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.9, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.9, + "end": 57.14, + "confidence": 0.902 + }, + { + "text": "a", + "start": 57.14, + "end": 57.2, + "confidence": 0.959 + }, + { + "text": "au", + "start": 57.2, + "end": 57.28, + "confidence": 0.273 + }, + { + "text": "sein", + "start": 57.28, + "end": 57.42, + "confidence": 0.17 + }, + { + "text": "de", + "start": 57.42, + "end": 57.56, + "confidence": 0.229 + }, + { + "text": "foi", + "start": 57.56, + "end": 57.58, + "confidence": 0.121 + }, + { + "text": "n'est", + "start": 57.58, + "end": 57.78, + "confidence": 0.79 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.28, + "confidence": 0.547 + }, + { + "text": "à", + "start": 58.28, + "end": 58.48, + "confidence": 0.934 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.7, + "confidence": 0.851 + }, + { + "text": "qu'on", + "start": 58.7, + "end": 58.94, + "confidence": 0.926 + }, + { + "text": "entretenait", + "start": 58.94, + "end": 59.36, + "confidence": 0.56 + } + ] + }, + { + "id": 17, + "seek": 5596, + "start": 59.36, + "end": 63.14, + "text": " à d'autres objectes techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50530, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.756, + "words": [ + { + "text": "à", + "start": 59.36, + "end": 59.44, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.68, + "confidence": 0.857 + }, + { + "text": "objectes", + "start": 59.68, + "end": 60.02, + "confidence": 0.507 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.44, + "confidence": 0.462 + }, + { + "text": "comme", + "start": 60.44, + "end": 61.04, + "confidence": 0.639 + }, + { + "text": "la", + "start": 61.04, + "end": 61.52, + "confidence": 0.905 + }, + { + "text": "voiture", + "start": 61.52, + "end": 61.86, + "confidence": 0.946 + }, + { + "text": "ou", + "start": 61.86, + "end": 62.46, + "confidence": 0.755 + }, + { + "text": "le", + "start": 62.46, + "end": 62.68, + "confidence": 0.913 + }, + { + "text": "téléphone ?", + "start": 62.68, + "end": 63.14, + "confidence": 0.983 + } + ] + }, + { + "id": 18, + "seek": 5596, + "start": 65.4, + "end": 66.16, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 50714, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13, + 50872 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.881, + "words": [ + { + "text": "Il", + "start": 65.4, + "end": 65.5, + "confidence": 0.901 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.86 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.962 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.68, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.68, + "end": 66.16, + "confidence": 0.852 + } + ] + }, + { + "id": 19, + "seek": 5596, + "start": 66.16, + "end": 69.92, + "text": " On s'est espèrent de nous voter dans la relation à l'objet.", + "tokens": [ + 50872, + 1282, + 262, + 6, + 377, + 7089, + 1462, + 1753, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13, + 51058 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.501, + "words": [ + { + "text": "On", + "start": 66.16, + "end": 67.08, + "confidence": 0.302 + }, + { + "text": "s'est", + "start": 67.08, + "end": 67.3, + "confidence": 0.563 + }, + { + "text": "espèrent", + "start": 67.3, + "end": 67.62, + "confidence": 0.134 + }, + { + "text": "de", + "start": 67.62, + "end": 67.66, + "confidence": 0.99 + }, + { + "text": "nous", + "start": 67.66, + "end": 67.84, + "confidence": 0.763 + }, + { + "text": "voter", + "start": 67.84, + "end": 68.36, + "confidence": 0.181 + }, + { + "text": "dans", + "start": 68.36, + "end": 68.86, + "confidence": 0.82 + }, + { + "text": "la", + "start": 68.86, + "end": 68.96, + "confidence": 0.617 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.949 + }, + { + "text": "à", + "start": 69.24, + "end": 69.42, + "confidence": 0.75 + }, + { + "text": "l'objet.", + "start": 69.42, + "end": 69.92, + "confidence": 0.887 + } + ] + }, + { + "id": 20, + "seek": 5596, + "start": 70.2, + "end": 71.22, + "text": " C'est facilement éterricion.", + "tokens": [ + 51058, + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 1341, + 313, + 13, + 51122 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.441, + "words": [ + { + "text": "C'est", + "start": 70.2, + "end": 70.34, + "confidence": 0.895 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.7, + "confidence": 0.562 + }, + { + "text": "éterricion.", + "start": 70.7, + "end": 71.22, + "confidence": 0.23 + } + ] + }, + { + "id": 21, + "seek": 5596, + "start": 71.64, + "end": 76.97, + "text": " Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu", + "tokens": [ + 51122, + 20429, + 631, + 635, + 5418, + 368, + 287, + 6, + 20835, + 271, + 15540, + 1030, + 5385, + 2096, + 1473, + 257, + 45768, + 719, + 11, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 51416 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.457, + "words": [ + { + "text": "Parce", + "start": 71.64, + "end": 71.94, + "confidence": 0.679 + }, + { + "text": "que", + "start": 71.94, + "end": 72.32, + "confidence": 0.566 + }, + { + "text": "la", + "start": 72.32, + "end": 72.56, + "confidence": 0.336 + }, + { + "text": "passion", + "start": 72.56, + "end": 72.9, + "confidence": 0.267 + }, + { + "text": "de", + "start": 72.9, + "end": 73.24, + "confidence": 0.365 + }, + { + "text": "l'utilisateur", + "start": 73.24, + "end": 74.82, + "confidence": 0.499 + }, + { + "text": "et", + "start": 74.82, + "end": 74.92, + "confidence": 0.45 + }, + { + "text": "ses", + "start": 74.92, + "end": 75.04, + "confidence": 0.095 + }, + { + "text": "affices", + "start": 75.04, + "end": 75.24, + "confidence": 0.303 + }, + { + "text": "a", + "start": 75.24, + "end": 75.38, + "confidence": 0.59 + }, + { + "text": "dépendance,", + "start": 75.38, + "end": 76.06, + "confidence": 0.559 + }, + { + "text": "cette", + "start": 76.32, + "end": 76.34, + "confidence": 0.401 + }, + { + "text": "objet", + "start": 76.34, + "end": 76.54, + "confidence": 0.924 + }, + { + "text": "d'un", + "start": 76.54, + "end": 76.92, + "confidence": 0.783 + }, + { + "text": "lieu", + "start": 76.92, + "end": 76.97, + "confidence": 0.307 + } + ] + }, + { + "id": 22, + "seek": 5596, + "start": 76.97, + "end": 82.15, + "text": " en fait, une espèce de relation de médiation avec le monde qui rendent encore avec", + "tokens": [ + 51416, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368, + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 10122, + 4163, + 51671 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.746, + "words": [ + { + "text": "en", + "start": 76.97, + "end": 77.2, + "confidence": 0.767 + }, + { + "text": "fait,", + "start": 77.2, + "end": 77.32, + "confidence": 0.944 + }, + { + "text": "une", + "start": 77.46, + "end": 77.48, + "confidence": 0.696 + }, + { + "text": "espèce", + "start": 77.48, + "end": 77.9, + "confidence": 0.97 + }, + { + "text": "de", + "start": 77.9, + "end": 78.08, + "confidence": 0.994 + }, + { + "text": "relation", + "start": 78.08, + "end": 78.5, + "confidence": 0.826 + }, + { + "text": "de", + "start": 78.5, + "end": 78.94, + "confidence": 0.72 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.5, + "confidence": 0.881 + }, + { + "text": "avec", + "start": 79.5, + "end": 79.74, + "confidence": 0.968 + }, + { + "text": "le", + "start": 79.74, + "end": 79.94, + "confidence": 0.986 + }, + { + "text": "monde", + "start": 79.94, + "end": 80.44, + "confidence": 0.906 + }, + { + "text": "qui", + "start": 80.44, + "end": 81.14, + "confidence": 0.825 + }, + { + "text": "rendent", + "start": 81.14, + "end": 81.8, + "confidence": 0.584 + }, + { + "text": "encore", + "start": 81.8, + "end": 81.98, + "confidence": 0.198 + }, + { + "text": "avec", + "start": 81.98, + "end": 82.15, + "confidence": 0.486 + } + ] + }, + { + "id": 23, + "seek": 8210, + "start": 82.15, + "end": 83.44, + "text": " la maille de celles formes de rogeur.", + "tokens": [ + 50364, + 635, + 463, + 3409, + 368, + 2815, + 279, + 1254, + 279, + 368, + 744, + 432, + 374, + 13, + 50440 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.465, + "words": [ + { + "text": "la", + "start": 82.15, + "end": 82.22, + "confidence": 0.648 + }, + { + "text": "maille", + "start": 82.22, + "end": 82.4, + "confidence": 0.136 + }, + { + "text": "de", + "start": 82.4, + "end": 82.48, + "confidence": 0.585 + }, + { + "text": "celles", + "start": 82.48, + "end": 82.74, + "confidence": 0.561 + }, + { + "text": "formes", + "start": 82.74, + "end": 82.96, + "confidence": 0.78 + }, + { + "text": "de", + "start": 82.96, + "end": 83.0, + "confidence": 0.936 + }, + { + "text": "rogeur.", + "start": 83.0, + "end": 83.44, + "confidence": 0.433 + } + ] + }, + { + "id": 24, + "seek": 8210, + "start": 83.98, + "end": 87.86, + "text": " Donc, à objets inédits, rapport inédits.", + "tokens": [ + 50440, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13, + 50640 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.745, + "words": [ + { + "text": "Donc,", + "start": 83.98, + "end": 84.46, + "confidence": 0.833 + }, + { + "text": "à", + "start": 84.56, + "end": 84.96, + "confidence": 0.696 + }, + { + "text": "objets", + "start": 84.96, + "end": 85.44, + "confidence": 0.526 + }, + { + "text": "inédits,", + "start": 85.44, + "end": 86.24, + "confidence": 0.701 + }, + { + "text": "rapport", + "start": 86.32, + "end": 86.92, + "confidence": 0.944 + }, + { + "text": "inédits.", + "start": 86.92, + "end": 87.86, + "confidence": 0.908 + } + ] + }, + { + "id": 25, + "seek": 8210, + "start": 88.1, + "end": 94.2, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance", + "tokens": [ + 50640, + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.669, + "words": [ + { + "text": "Et,", + "start": 88.1, + "end": 88.3, + "confidence": 0.865 + }, + { + "text": "ce", + "start": 88.58, + "end": 88.84, + "confidence": 0.277 + }, + { + "text": "rapport,", + "start": 88.84, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.933 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.78 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.92, + "confidence": 0.307 + }, + { + "text": "Nicolas,", + "start": 89.92, + "end": 90.22, + "confidence": 0.65 + }, + { + "text": "frère", + "start": 90.78, + "end": 91.1, + "confidence": 0.431 + }, + { + "text": "caractérisée", + "start": 91.1, + "end": 91.8, + "confidence": 0.595 + }, + { + "text": "par", + "start": 91.8, + "end": 92.14, + "confidence": 0.907 + }, + { + "text": "un", + "start": 92.14, + "end": 92.34, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.34, + "end": 92.98, + "confidence": 0.92 + }, + { + "text": "de", + "start": 92.98, + "end": 93.24, + "confidence": 0.842 + }, + { + "text": "dépendance", + "start": 93.24, + "end": 94.2, + "confidence": 0.675 + } + ] + }, + { + "id": 26, + "seek": 8210, + "start": 94.36, + "end": 95.08, + "text": " et de rogeur.", + "tokens": [ + 50972, + 1030, + 368, + 744, + 432, + 374, + 13, + 51022 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.851, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.97 + }, + { + "text": "de", + "start": 94.52, + "end": 94.64, + "confidence": 0.996 + }, + { + "text": "rogeur.", + "start": 94.64, + "end": 95.08, + "confidence": 0.773 + } + ] + }, + { + "id": 27, + "seek": 8210, + "start": 96.3, + "end": 100.48, + "text": " Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes", + "tokens": [ + 51022, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 11, + 5732, + 2007, + 394, + 3486, + 287, + 6, + 29093, + 730, + 2657, + 279, + 51286 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.752, + "words": [ + { + "text": "Bon,", + "start": 96.3, + "end": 96.32, + "confidence": 0.6 + }, + { + "text": "en", + "start": 96.36, + "end": 96.58, + "confidence": 0.918 + }, + { + "text": "vrai,", + "start": 96.58, + "end": 96.92, + "confidence": 0.992 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.987 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.56, + "confidence": 0.933 + }, + { + "text": "remonter", + "start": 97.56, + "end": 98.08, + "confidence": 0.61 + }, + { + "text": "très,", + "start": 98.08, + "end": 98.56, + "confidence": 0.975 + }, + { + "text": "très", + "start": 98.56, + "end": 98.86, + "confidence": 0.986 + }, + { + "text": "filmant", + "start": 98.86, + "end": 99.42, + "confidence": 0.409 + }, + { + "text": "tout", + "start": 99.42, + "end": 99.68, + "confidence": 0.348 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.08, + "confidence": 0.876 + }, + { + "text": "des", + "start": 100.08, + "end": 100.22, + "confidence": 0.957 + }, + { + "text": "objectes", + "start": 100.22, + "end": 100.48, + "confidence": 0.842 + } + ] + }, + { + "id": 28, + "seek": 8210, + "start": 100.48, + "end": 105.24, + "text": " techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement", + "tokens": [ + 51286, + 7512, + 1030, + 368, + 9580, + 1536, + 911, + 313, + 2680, + 3269, + 4941, + 2449, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 51530 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.614, + "words": [ + { + "text": "techniques", + "start": 100.48, + "end": 101.0, + "confidence": 0.952 + }, + { + "text": "et", + "start": 101.0, + "end": 101.58, + "confidence": 0.943 + }, + { + "text": "de", + "start": 101.58, + "end": 101.72, + "confidence": 0.975 + }, + { + "text": "leur", + "start": 101.72, + "end": 101.8, + "confidence": 0.803 + }, + { + "text": "infertion", + "start": 101.8, + "end": 102.34, + "confidence": 0.382 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.297 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.68, + "confidence": 0.419 + }, + { + "text": "vieux", + "start": 102.68, + "end": 103.08, + "confidence": 0.386 + }, + { + "text": "pour", + "start": 103.08, + "end": 103.1, + "confidence": 0.281 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.976 + }, + { + "text": "si", + "start": 103.64, + "end": 103.8, + "confidence": 0.367 + }, + { + "text": "ce", + "start": 103.8, + "end": 103.92, + "confidence": 0.983 + }, + { + "text": "rapport", + "start": 103.92, + "end": 104.22, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.22, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.24, + "confidence": 0.907 + } + ] + }, + { + "id": 29, + "seek": 8210, + "start": 105.24, + "end": 106.1, + "text": " inédit.", + "tokens": [ + 51530, + 294, + 7811, + 270, + 13, + 51580 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.939, + "words": [ + { + "text": "inédit.", + "start": 105.24, + "end": 106.1, + "confidence": 0.939 + } + ] + }, + { + "id": 30, + "seek": 8210, + "start": 106.16, + "end": 109.44, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 51580, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51738 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.811, + "words": [ + { + "text": "Mais", + "start": 106.16, + "end": 106.5, + "confidence": 0.721 + }, + { + "text": "j'ai", + "start": 106.5, + "end": 106.92, + "confidence": 0.91 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.34, + "confidence": 0.956 + }, + { + "text": "comme", + "start": 107.34, + "end": 107.58, + "confidence": 0.7 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.74, + "confidence": 0.953 + }, + { + "text": "que", + "start": 107.74, + "end": 108.06, + "confidence": 0.941 + }, + { + "text": "Nicolas", + "start": 108.06, + "end": 108.46, + "confidence": 0.994 + }, + { + "text": "se", + "start": 108.46, + "end": 108.68, + "confidence": 0.716 + }, + { + "text": "trompe", + "start": 108.68, + "end": 109.02, + "confidence": 0.54 + }, + { + "text": "pas", + "start": 109.02, + "end": 109.16, + "confidence": 0.886 + }, + { + "text": "vraiment.", + "start": 109.16, + "end": 109.44, + "confidence": 0.963 + } + ] + }, + { + "id": 31, + "seek": 8210, + "start": 109.92, + "end": 110.86, + "text": " Pour autant, je sache.", + "tokens": [ + 51738, + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 51808 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.6, + "words": [ + { + "text": "Pour", + "start": 109.92, + "end": 110.1, + "confidence": 0.49 + }, + { + "text": "autant,", + "start": 110.1, + "end": 110.28, + "confidence": 0.75 + }, + { + "text": "je", + "start": 110.42, + "end": 110.48, + "confidence": 0.931 + }, + { + "text": "sache.", + "start": 110.48, + "end": 110.86, + "confidence": 0.477 + } + ] + }, + { + "id": 32, + "seek": 11098, + "start": 111.04, + "end": 115.0, + "text": " Il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 50364, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 50568 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.821, + "words": [ + { + "text": "Il", + "start": 111.04, + "end": 111.2, + "confidence": 0.905 + }, + { + "text": "y", + "start": 111.2, + "end": 111.3, + "confidence": 0.934 + }, + { + "text": "a", + "start": 111.3, + "end": 111.56, + "confidence": 0.893 + }, + { + "text": "eu", + "start": 111.56, + "end": 111.6, + "confidence": 0.839 + }, + { + "text": "plein", + "start": 111.6, + "end": 111.88, + "confidence": 0.776 + }, + { + "text": "de", + "start": 111.88, + "end": 112.12, + "confidence": 0.944 + }, + { + "text": "discussions", + "start": 112.12, + "end": 112.6, + "confidence": 0.681 + }, + { + "text": "autour", + "start": 112.6, + "end": 113.04, + "confidence": 0.978 + }, + { + "text": "de", + "start": 113.04, + "end": 113.48, + "confidence": 0.975 + }, + { + "text": "la", + "start": 113.48, + "end": 113.56, + "confidence": 0.966 + }, + { + "text": "voiture", + "start": 113.56, + "end": 113.88, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.14, + "confidence": 0.66 + }, + { + "text": "même", + "start": 114.14, + "end": 114.34, + "confidence": 0.99 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.283 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.0, + "confidence": 0.986 + } + ] + }, + { + "id": 33, + "seek": 11098, + "start": 115.52, + "end": 119.51, + "text": " Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même", + "tokens": [ + 50568, + 6313, + 11, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 50790 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.701, + "words": [ + { + "text": "Mais,", + "start": 115.52, + "end": 115.82, + "confidence": 0.983 + }, + { + "text": "la", + "start": 115.9, + "end": 116.0, + "confidence": 0.961 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.42, + "confidence": 0.616 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.82, + "confidence": 0.955 + }, + { + "text": "pas", + "start": 116.82, + "end": 117.04, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.04, + "end": 117.2, + "confidence": 0.958 + }, + { + "text": "même", + "start": 117.2, + "end": 117.38, + "confidence": 0.929 + }, + { + "text": "mort,", + "start": 117.38, + "end": 117.58, + "confidence": 0.686 + }, + { + "text": "donc", + "start": 117.7, + "end": 118.02, + "confidence": 0.932 + }, + { + "text": "le", + "start": 118.02, + "end": 118.34, + "confidence": 0.95 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.64, + "confidence": 0.57 + }, + { + "text": "n'en", + "start": 118.64, + "end": 118.82, + "confidence": 0.429 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.96, + "confidence": 0.055 + }, + { + "text": "n'était", + "start": 118.96, + "end": 119.16, + "confidence": 0.973 + }, + { + "text": "pas", + "start": 119.16, + "end": 119.32, + "confidence": 0.993 + }, + { + "text": "du", + "start": 119.32, + "end": 119.42, + "confidence": 0.823 + }, + { + "text": "même", + "start": 119.42, + "end": 119.51, + "confidence": 0.79 + } + ] + }, + { + "id": 34, + "seek": 11098, + "start": 119.51, + "end": 119.72, + "text": " mort.", + "tokens": [ + 50790, + 6599, + 13, + 50840 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.639, + "words": [ + { + "text": "mort.", + "start": 119.51, + "end": 119.72, + "confidence": 0.639 + } + ] + }, + { + "id": 35, + "seek": 11098, + "start": 120.04, + "end": 121.24, + "text": " On peut adorer sa bagnure.", + "tokens": [ + 50840, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 13, + 50890 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.615, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.22, + "confidence": 0.981 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.36, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.7, + "confidence": 0.859 + }, + { + "text": "sa", + "start": 120.7, + "end": 120.88, + "confidence": 0.918 + }, + { + "text": "bagnure.", + "start": 120.88, + "end": 121.24, + "confidence": 0.314 + } + ] + }, + { + "id": 36, + "seek": 11098, + "start": 121.38, + "end": 123.06, + "text": " On a par besoin pour plein de choses.", + "tokens": [ + 50890, + 1282, + 257, + 971, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.722, + "words": [ + { + "text": "On", + "start": 121.38, + "end": 121.56, + "confidence": 0.897 + }, + { + "text": "a", + "start": 121.56, + "end": 121.64, + "confidence": 0.656 + }, + { + "text": "par", + "start": 121.64, + "end": 121.8, + "confidence": 0.161 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.947 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.5, + "confidence": 0.976 + }, + { + "text": "plein", + "start": 122.5, + "end": 122.72, + "confidence": 0.857 + }, + { + "text": "de", + "start": 122.72, + "end": 122.8, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.06, + "confidence": 0.989 + } + ] + }, + { + "id": 37, + "seek": 11098, + "start": 123.36, + "end": 126.46, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50972, + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 51142 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.835, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.5, + "confidence": 0.606 + }, + { + "text": "là,", + "start": 123.5, + "end": 123.76, + "confidence": 0.498 + }, + { + "text": "le", + "start": 123.82, + "end": 124.06, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.06, + "end": 124.42, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 124.84, + "end": 124.96, + "confidence": 0.787 + }, + { + "text": "on", + "start": 124.96, + "end": 125.06, + "confidence": 0.988 + }, + { + "text": "va", + "start": 125.06, + "end": 125.18, + "confidence": 0.966 + }, + { + "text": "se", + "start": 125.18, + "end": 125.26, + "confidence": 0.869 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.6, + "confidence": 0.804 + }, + { + "text": "on", + "start": 125.82, + "end": 126.06, + "confidence": 0.98 + }, + { + "text": "la", + "start": 126.06, + "end": 126.2, + "confidence": 0.811 + }, + { + "text": "laisse.", + "start": 126.2, + "end": 126.46, + "confidence": 0.985 + } + ] + }, + { + "id": 38, + "seek": 11098, + "start": 127.06, + "end": 130.1, + "text": " On la pade en la main quand on est colis, qu'on n'a même pas au chiot.", + "tokens": [ + 51142, + 1282, + 635, + 280, + 762, + 465, + 635, + 2135, + 6932, + 322, + 871, + 1173, + 271, + 11, + 421, + 6, + 266, + 297, + 6, + 64, + 5698, + 1736, + 1609, + 417, + 6471, + 13, + 51334 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.542, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.32, + "confidence": 0.605 + }, + { + "text": "la", + "start": 127.32, + "end": 127.5, + "confidence": 0.35 + }, + { + "text": "pade", + "start": 127.5, + "end": 127.72, + "confidence": 0.18 + }, + { + "text": "en", + "start": 127.72, + "end": 127.88, + "confidence": 0.775 + }, + { + "text": "la", + "start": 127.88, + "end": 128.04, + "confidence": 0.795 + }, + { + "text": "main", + "start": 128.04, + "end": 128.3, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.46, + "confidence": 0.483 + }, + { + "text": "on", + "start": 128.46, + "end": 128.62, + "confidence": 0.994 + }, + { + "text": "est", + "start": 128.62, + "end": 128.74, + "confidence": 0.714 + }, + { + "text": "colis,", + "start": 128.74, + "end": 129.12, + "confidence": 0.501 + }, + { + "text": "qu'on", + "start": 129.22, + "end": 129.32, + "confidence": 0.777 + }, + { + "text": "n'a", + "start": 129.32, + "end": 129.42, + "confidence": 0.439 + }, + { + "text": "même", + "start": 129.42, + "end": 129.56, + "confidence": 0.324 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.74, + "confidence": 1.0 + }, + { + "text": "au", + "start": 129.74, + "end": 129.88, + "confidence": 0.64 + }, + { + "text": "chiot.", + "start": 129.88, + "end": 130.1, + "confidence": 0.514 + } + ] + }, + { + "id": 39, + "seek": 11098, + "start": 130.94, + "end": 135.32, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure", + "tokens": [ + 51334, + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 51574 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.656, + "words": [ + { + "text": "On", + "start": 130.94, + "end": 131.04, + "confidence": 0.99 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.588 + }, + { + "text": "être", + "start": 131.28, + "end": 131.6, + "confidence": 0.446 + }, + { + "text": "émervé", + "start": 131.6, + "end": 132.24, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.24, + "end": 132.5, + "confidence": 0.848 + }, + { + "text": "son", + "start": 132.5, + "end": 132.7, + "confidence": 0.445 + }, + { + "text": "mome", + "start": 132.7, + "end": 133.08, + "confidence": 0.255 + }, + { + "text": "qui", + "start": 133.08, + "end": 133.3, + "confidence": 0.911 + }, + { + "text": "occupeait", + "start": 133.3, + "end": 133.74, + "confidence": 0.642 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.808 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.02, + "confidence": 0.975 + }, + { + "text": "de", + "start": 134.02, + "end": 134.24, + "confidence": 0.928 + }, + { + "text": "téléphone", + "start": 134.24, + "end": 134.56, + "confidence": 0.978 + }, + { + "text": "pendant", + "start": 134.56, + "end": 134.92, + "confidence": 0.901 + }, + { + "text": "une", + "start": 134.92, + "end": 135.18, + "confidence": 0.811 + }, + { + "text": "heure", + "start": 135.18, + "end": 135.32, + "confidence": 0.477 + } + ] + }, + { + "id": 40, + "seek": 11098, + "start": 135.32, + "end": 137.04, + "text": " chaque soir pour discuter avec un copain.", + "tokens": [ + 51574, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.955, + "words": [ + { + "text": "chaque", + "start": 135.32, + "end": 135.6, + "confidence": 0.99 + }, + { + "text": "soir", + "start": 135.6, + "end": 135.8, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.8, + "end": 136.0, + "confidence": 0.981 + }, + { + "text": "discuter", + "start": 136.0, + "end": 136.34, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.34, + "end": 136.5, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.5, + "end": 136.62, + "confidence": 0.967 + }, + { + "text": "copain.", + "start": 136.62, + "end": 137.04, + "confidence": 0.953 + } + ] + }, + { + "id": 41, + "seek": 13698, + "start": 137.32, + "end": 141.84, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui.", + "tokens": [ + 50376, + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556, + 13, + 50606 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.735, + "words": [ + { + "text": "Mais", + "start": 137.32, + "end": 137.52, + "confidence": 0.984 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.7, + "confidence": 0.877 + }, + { + "text": "ne", + "start": 137.7, + "end": 137.8, + "confidence": 0.672 + }, + { + "text": "ressemble", + "start": 137.8, + "end": 138.22, + "confidence": 0.731 + }, + { + "text": "pas", + "start": 138.22, + "end": 138.76, + "confidence": 0.526 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.98 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.08, + "confidence": 0.578 + }, + { + "text": "qu'on", + "start": 139.08, + "end": 139.22, + "confidence": 0.96 + }, + { + "text": "peut", + "start": 139.22, + "end": 139.38, + "confidence": 0.547 + }, + { + "text": "ressentir", + "start": 139.38, + "end": 140.06, + "confidence": 0.888 + }, + { + "text": "à", + "start": 140.06, + "end": 140.26, + "confidence": 0.437 + }, + { + "text": "voir", + "start": 140.26, + "end": 140.54, + "confidence": 0.952 + }, + { + "text": "même", + "start": 140.54, + "end": 140.96, + "confidence": 0.297 + }, + { + "text": "mome", + "start": 140.96, + "end": 141.22, + "confidence": 0.496 + }, + { + "text": "aujourd'hui.", + "start": 141.22, + "end": 141.84, + "confidence": 0.961 + } + ] + }, + { + "id": 42, + "seek": 13698, + "start": 141.96, + "end": 145.86, + "text": " Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 50606, + 14674, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540, + 50810 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.663, + "words": [ + { + "text": "Continuellement", + "start": 141.96, + "end": 142.84, + "confidence": 0.844 + }, + { + "text": "avec", + "start": 142.84, + "end": 143.24, + "confidence": 0.646 + }, + { + "text": "son", + "start": 143.24, + "end": 143.42, + "confidence": 0.864 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.72, + "confidence": 0.447 + }, + { + "text": "dans", + "start": 143.72, + "end": 143.92, + "confidence": 0.587 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.974 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.3, + "confidence": 0.995 + }, + { + "text": "comme", + "start": 144.42, + "end": 144.56, + "confidence": 0.895 + }, + { + "text": "c'était", + "start": 144.56, + "end": 144.84, + "confidence": 0.596 + }, + { + "text": "une", + "start": 144.84, + "end": 144.96, + "confidence": 0.972 + }, + { + "text": "sorte", + "start": 144.96, + "end": 145.1, + "confidence": 0.658 + }, + { + "text": "de", + "start": 145.1, + "end": 145.4, + "confidence": 0.312 + }, + { + "text": "estimateur", + "start": 145.4, + "end": 145.86, + "confidence": 0.489 + } + ] + }, + { + "id": 43, + "seek": 13698, + "start": 145.96, + "end": 149.0, + "text": " extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 50810, + 1279, + 4212, + 368, + 2916, + 607, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13, + 50954 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.416, + "words": [ + { + "text": "extère", + "start": 145.96, + "end": 146.4, + "confidence": 0.371 + }, + { + "text": "de", + "start": 146.4, + "end": 146.54, + "confidence": 0.254 + }, + { + "text": "tomber", + "start": 146.54, + "end": 146.7, + "confidence": 0.102 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.454 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.34, + "confidence": 0.75 + }, + { + "text": "à", + "start": 147.34, + "end": 147.46, + "confidence": 0.444 + }, + { + "text": "l'éantrénée,", + "start": 147.46, + "end": 147.9, + "confidence": 0.491 + }, + { + "text": "ça", + "start": 148.0, + "end": 148.02, + "confidence": 0.799 + }, + { + "text": "m'a", + "start": 148.02, + "end": 148.26, + "confidence": 0.531 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.44, + "confidence": 0.136 + }, + { + "text": "immédiate.", + "start": 148.44, + "end": 149.0, + "confidence": 0.505 + } + ] + }, + { + "id": 44, + "seek": 13698, + "start": 149.08, + "end": 151.98, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi.", + "tokens": [ + 50954, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 11, + 4428, + 6212, + 13, + 51126 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.564, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.28, + "confidence": 0.375 + }, + { + "text": "je", + "start": 149.34, + "end": 149.4, + "confidence": 0.471 + }, + { + "text": "dis", + "start": 149.4, + "end": 149.46, + "confidence": 0.324 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.952 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.8, + "confidence": 0.963 + }, + { + "text": "le", + "start": 149.8, + "end": 149.92, + "confidence": 0.992 + }, + { + "text": "mome,", + "start": 149.92, + "end": 150.12, + "confidence": 0.619 + }, + { + "text": "mais", + "start": 150.46, + "end": 150.52, + "confidence": 0.694 + }, + { + "text": "évidemment,", + "start": 150.52, + "end": 151.12, + "confidence": 0.776 + }, + { + "text": "va", + "start": 151.26, + "end": 151.36, + "confidence": 0.388 + }, + { + "text": "là,", + "start": 151.36, + "end": 151.46, + "confidence": 0.59 + }, + { + "text": "bon", + "start": 151.6, + "end": 151.68, + "confidence": 0.825 + }, + { + "text": "aussi.", + "start": 151.68, + "end": 151.98, + "confidence": 0.162 + } + ] + }, + { + "id": 45, + "seek": 13698, + "start": 152.64, + "end": 154.64, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 51126, + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13, + 51248 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.861, + "words": [ + { + "text": "Donc,", + "start": 152.64, + "end": 153.08, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 153.16, + "end": 153.58, + "confidence": 0.979 + }, + { + "text": "immédiate", + "start": 153.58, + "end": 154.28, + "confidence": 0.841 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.64, + "confidence": 0.816 + } + ] + }, + { + "id": 46, + "seek": 13698, + "start": 155.66, + "end": 158.36, + "text": " Mais pourquoi, à ton impression qu'on en sortira, j'amé?", + "tokens": [ + 51248, + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 11, + 361, + 6, + 335, + 526, + 30, + 51440 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.709, + "words": [ + { + "text": "Mais", + "start": 155.66, + "end": 155.88, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.88, + "end": 156.28, + "confidence": 0.897 + }, + { + "text": "à", + "start": 156.38, + "end": 156.58, + "confidence": 0.837 + }, + { + "text": "ton", + "start": 156.58, + "end": 156.7, + "confidence": 0.953 + }, + { + "text": "impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.938 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.89 + }, + { + "text": "en", + "start": 157.28, + "end": 157.4, + "confidence": 0.801 + }, + { + "text": "sortira,", + "start": 157.4, + "end": 157.88, + "confidence": 0.576 + }, + { + "text": "j'amé?", + "start": 157.96, + "end": 158.36, + "confidence": 0.464 + } + ] + }, + { + "id": 47, + "seek": 13698, + "start": 159.16, + "end": 163.32, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux", + "tokens": [ + 51440, + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550, + 25673, + 3551, + 303, + 3409, + 2449, + 51678 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.714, + "words": [ + { + "text": "Et", + "start": 159.16, + "end": 159.32, + "confidence": 0.832 + }, + { + "text": "puis,", + "start": 159.32, + "end": 159.46, + "confidence": 0.494 + }, + { + "text": "il", + "start": 159.62, + "end": 159.64, + "confidence": 0.594 + }, + { + "text": "faut", + "start": 159.64, + "end": 159.66, + "confidence": 0.951 + }, + { + "text": "en", + "start": 159.66, + "end": 159.78, + "confidence": 0.937 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.14, + "confidence": 0.984 + }, + { + "text": "la", + "start": 160.14, + "end": 160.42, + "confidence": 0.602 + }, + { + "text": "faute", + "start": 160.42, + "end": 160.68, + "confidence": 0.59 + }, + { + "text": "sur", + "start": 160.68, + "end": 160.98, + "confidence": 0.966 + }, + { + "text": "les", + "start": 160.98, + "end": 161.24, + "confidence": 0.828 + }, + { + "text": "gens", + "start": 161.24, + "end": 161.46, + "confidence": 0.986 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.6, + "confidence": 0.98 + }, + { + "text": "ont", + "start": 161.6, + "end": 161.68, + "confidence": 0.95 + }, + { + "text": "créé", + "start": 161.68, + "end": 162.3, + "confidence": 0.95 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.48, + "confidence": 0.908 + }, + { + "text": "critique", + "start": 162.48, + "end": 162.72, + "confidence": 0.032 + }, + { + "text": "merveilleux", + "start": 162.72, + "end": 163.32, + "confidence": 0.839 + } + ] + }, + { + "id": 48, + "seek": 13698, + "start": 163.32, + "end": 165.34, + "text": " et diabolique et diabolique par que merveilleux.", + "tokens": [ + 51678, + 1030, + 1026, + 14923, + 1925, + 1030, + 1026, + 14923, + 1925, + 971, + 631, + 3551, + 303, + 3409, + 2449, + 13, + 51778 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.708, + "words": [ + { + "text": "et", + "start": 163.32, + "end": 163.44, + "confidence": 0.945 + }, + { + "text": "diabolique", + "start": 163.44, + "end": 163.86, + "confidence": 0.573 + }, + { + "text": "et", + "start": 163.86, + "end": 163.94, + "confidence": 0.238 + }, + { + "text": "diabolique", + "start": 163.94, + "end": 164.38, + "confidence": 0.892 + }, + { + "text": "par", + "start": 164.38, + "end": 164.62, + "confidence": 0.544 + }, + { + "text": "que", + "start": 164.62, + "end": 164.82, + "confidence": 0.529 + }, + { + "text": "merveilleux.", + "start": 164.82, + "end": 165.34, + "confidence": 0.981 + } + ] + }, + { + "id": 49, + "seek": 16526, + "start": 166.9, + "end": 168.8, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 50410, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13, + 50542 + ], + "temperature": 0.0, + "avg_logprob": -0.6644251346588135, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.1915542334318161, + "confidence": 0.765, + "words": [ + { + "text": "Les", + "start": 166.9, + "end": 167.06, + "confidence": 0.699 + }, + { + "text": "économistes", + "start": 167.06, + "end": 167.52, + "confidence": 0.917 + }, + { + "text": "parlent", + "start": 167.52, + "end": 167.78, + "confidence": 0.793 + }, + { + "text": "de", + "start": 167.78, + "end": 167.84, + "confidence": 0.813 + }, + { + "text": "dépendance", + "start": 167.84, + "end": 168.36, + "confidence": 0.698 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.963 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.8, + "confidence": 0.489 + } + ] + }, + { + "id": 50, + "seek": 16526, + "start": 168.82, + "end": 173.42, + "text": " Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus,", + "tokens": [ + 50542, + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 271, + 11, + 517, + 12703, + 1108, + 1433, + 533, + 11, + 465, + 8368, + 474, + 30677, + 11, + 50784 + ], + "temperature": 0.0, + "avg_logprob": -0.6644251346588135, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.1915542334318161, + "confidence": 0.474, + "words": [ + { + "text": "Ces", + "start": 168.82, + "end": 169.14, + "confidence": 0.443 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.4, + "confidence": 0.455 + }, + { + "text": "en", + "start": 169.44, + "end": 169.64, + "confidence": 0.744 + }, + { + "text": "fait,", + "start": 169.64, + "end": 169.66, + "confidence": 0.976 + }, + { + "text": "on", + "start": 169.72, + "end": 169.74, + "confidence": 0.303 + }, + { + "text": "est", + "start": 169.74, + "end": 169.9, + "confidence": 0.814 + }, + { + "text": "un", + "start": 169.9, + "end": 170.48, + "confidence": 0.367 + }, + { + "text": "santé", + "start": 170.48, + "end": 170.8, + "confidence": 0.951 + }, + { + "text": "qui", + "start": 170.8, + "end": 170.92, + "confidence": 0.835 + }, + { + "text": "a", + "start": 170.92, + "end": 171.02, + "confidence": 0.819 + }, + { + "text": "été", + "start": 171.02, + "end": 171.12, + "confidence": 0.994 + }, + { + "text": "étabis,", + "start": 171.12, + "end": 171.5, + "confidence": 0.343 + }, + { + "text": "un", + "start": 171.76, + "end": 171.9, + "confidence": 0.236 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.16, + "confidence": 0.31 + }, + { + "text": "mon", + "start": 172.16, + "end": 172.36, + "confidence": 0.205 + }, + { + "text": "termine,", + "start": 172.36, + "end": 172.72, + "confidence": 0.4 + }, + { + "text": "en", + "start": 172.8, + "end": 172.82, + "confidence": 0.309 + }, + { + "text": "marchand", + "start": 172.82, + "end": 173.14, + "confidence": 0.816 + }, + { + "text": "dessus,", + "start": 173.14, + "end": 173.42, + "confidence": 0.201 + } + ] + }, + { + "id": 51, + "seek": 16526, + "start": 173.86, + "end": 177.42, + "text": " soit des finissants débordes, des finissants, une signalétique.", + "tokens": [ + 50784, + 12703, + 730, + 962, + 891, + 1719, + 36529, + 765, + 279, + 11, + 730, + 962, + 891, + 1719, + 11, + 2251, + 6358, + 42379, + 13, + 50974 + ], + "temperature": 0.0, + "avg_logprob": -0.6644251346588135, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.1915542334318161, + "confidence": 0.582, + "words": [ + { + "text": "soit", + "start": 173.86, + "end": 174.4, + "confidence": 0.993 + }, + { + "text": "des", + "start": 174.4, + "end": 175.2, + "confidence": 0.759 + }, + { + "text": "finissants", + "start": 175.2, + "end": 175.58, + "confidence": 0.436 + }, + { + "text": "débordes,", + "start": 175.58, + "end": 175.98, + "confidence": 0.467 + }, + { + "text": "des", + "start": 176.14, + "end": 176.18, + "confidence": 0.222 + }, + { + "text": "finissants,", + "start": 176.18, + "end": 176.62, + "confidence": 0.961 + }, + { + "text": "une", + "start": 176.66, + "end": 176.84, + "confidence": 0.75 + }, + { + "text": "signalétique.", + "start": 176.84, + "end": 177.42, + "confidence": 0.566 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr.cpu/bonjour_vous_allez_bien.mp3.words.json b/tests/expected/tiny_fr.cpu/bonjour_vous_allez_bien.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..adc9890af4a61ebbb4b7beeef62b7b6307ee0338 --- /dev/null +++ b/tests/expected/tiny_fr.cpu/bonjour_vous_allez_bien.mp3.words.json @@ -0,0 +1,190 @@ +{ + "text": " Bonjour! Est-ce que vous allez bien? Bonjour! Bonjour! Est-ce que vous allez bien?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.46, + "end": 0.68, + "text": " Bonjour!", + "tokens": [ + 25431, + 2298 + ], + "temperature": 0.0, + "avg_logprob": -0.7739177703857422, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.04250079020857811, + "confidence": 0.69, + "words": [ + { + "text": "Bonjour!", + "start": 0.46, + "end": 0.68, + "confidence": 0.69 + } + ] + }, + { + "id": 1, + "seek": 148, + "start": 1.86, + "end": 2.8, + "text": " Est-ce que vous allez bien?", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.4815776131369851, + "compression_ratio": 0.7714285714285715, + "no_speech_prob": 0.04176269844174385, + "confidence": 0.632, + "words": [ + { + "text": "Est-ce", + "start": 1.86, + "end": 2.14, + "confidence": 0.548 + }, + { + "text": "que", + "start": 2.14, + "end": 2.18, + "confidence": 0.848 + }, + { + "text": "vous", + "start": 2.18, + "end": 2.34, + "confidence": 0.993 + }, + { + "text": "allez", + "start": 2.34, + "end": 2.48, + "confidence": 0.298 + }, + { + "text": "bien?", + "start": 2.48, + "end": 2.8, + "confidence": 0.979 + } + ] + }, + { + "id": 2, + "seek": 3148, + "start": 32.98, + "end": 33.16, + "text": " Bonjour!", + "tokens": [ + 25431, + 2298 + ], + "temperature": 0.0, + "avg_logprob": -0.34033950169881183, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.5495142936706543, + "confidence": 0.919, + "words": [ + { + "text": "Bonjour!", + "start": 32.98, + "end": 33.16, + "confidence": 0.919 + } + ] + }, + { + "id": 3, + "seek": 3348, + "start": 34.4, + "end": 34.61, + "text": " Bonjour!", + "tokens": [ + 25431, + 2298 + ], + "temperature": 0.0, + "avg_logprob": -0.23644089698791504, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.40533149242401123, + "confidence": 0.331, + "words": [ + { + "text": "Bonjour!", + "start": 34.4, + "end": 34.61, + "confidence": 0.331 + } + ] + }, + { + "id": 4, + "seek": 3448, + "start": 34.61, + "end": 35.34, + "text": " Est-ce que vous allez bien?", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 2630, + 18146, + 3610, + 2506, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -0.17279924045909534, + "compression_ratio": 0.7714285714285715, + "no_speech_prob": 0.5867945551872253, + "confidence": 0.871, + "words": [ + { + "text": "Est-ce", + "start": 34.61, + "end": 34.66, + "confidence": 0.741 + }, + { + "text": "que", + "start": 34.66, + "end": 34.72, + "confidence": 0.979 + }, + { + "text": "vous", + "start": 34.72, + "end": 34.86, + "confidence": 0.999 + }, + { + "text": "allez", + "start": 34.86, + "end": 35.0, + "confidence": 0.959 + }, + { + "text": "bien?", + "start": 35.0, + "end": 35.34, + "confidence": 1.0 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr.cpu/radio_short.mp3.words.json b/tests/expected/tiny_fr.cpu/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b1910cc5b3224406e946a7c50d9507d3dfe850fe --- /dev/null +++ b/tests/expected/tiny_fr.cpu/radio_short.mp3.words.json @@ -0,0 +1,104 @@ +{ + "text": "............", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.08, + "end": 0.76, + "text": "...", + "tokens": [ + 50364, + 1097, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -1.4265364408493042, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.5112143754959106, + "confidence": 0.224, + "words": [ + { + "text": "...", + "start": 0.08, + "end": 0.76, + "confidence": 0.224 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.02, + "end": 69.4, + "text": "...", + "tokens": [ + 50364, + 1097, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.973953366279602, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.8780310153961182, + "confidence": 0.466, + "words": [ + { + "text": "...", + "start": 60.02, + "end": 69.4, + "confidence": 0.466 + } + ] + }, + { + "id": 2, + "seek": 9000, + "start": 90.02, + "end": 90.24, + "text": "...", + "tokens": [ + 1097 + ], + "temperature": 0.0, + "avg_logprob": -0.8283956527709961, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.641629695892334, + "confidence": 0.842, + "words": [ + { + "text": "...", + "start": 90.02, + "end": 90.24, + "confidence": 0.842 + } + ] + }, + { + "id": 3, + "seek": 10500, + "start": 105.04, + "end": 117.76, + "text": "...", + "tokens": [ + 50364, + 1097, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.728730320930481, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.25682491064071655, + "confidence": 0.764, + "words": [ + { + "text": "...", + "start": 105.04, + "end": 117.76, + "confidence": 0.764 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr.cpu/smartphone.mp3.words.json b/tests/expected/tiny_fr.cpu/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..f90643caf0519b49ba0cbc0381cd8d8ced1f218b --- /dev/null +++ b/tests/expected/tiny_fr.cpu/smartphone.mp3.words.json @@ -0,0 +1,5038 @@ +{ + "text": " C'est évidence que dit Nicolas. Mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup très souvent ementionné. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes. C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très fondablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi un rapport inédit? Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone? Il n'y a pas d'équivalent. On s'est espécie de nous voter dans la relation à l'objet. C'est facilement éterréciant parce qu'on a impression de, comme le 10, les utilisateurs et les efforts, elles aident dépendant de cette objet d'un lieu, en fait, une espèce de relation de médiation avec le monde qui rendent un peu avec la même sédiforme de le jeu. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépenses et de rojets. Bon, en vrai, il faudrait remonter très très finement toute l'histoire des objectes techniques et de leur infération dans le vie pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure, en avoir besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On l'a pas dans la main, quand on est collis, quand on n'en mène pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui continuuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de l'intempis de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là pour nos aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira jamais? Et puis, il faut en remettre la faute sur les gens qui ont créé cette route merveilleux et diabolique, qui a dit à bollique par coeur, merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été établie, c'est un soit mon termine, soit définissant des beurs, on définisse un signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 1.38, + "text": " C'est évidence que dit Nicolas.", + "tokens": [ + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.66, + "confidence": 0.849 + }, + { + "text": "évidence", + "start": 0.66, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.06, + "confidence": 0.883 + }, + { + "text": "dit", + "start": 1.06, + "end": 1.16, + "confidence": 0.344 + }, + { + "text": "Nicolas.", + "start": 1.16, + "end": 1.38, + "confidence": 0.921 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 1.66, + "end": 3.62, + "text": " Mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 6313, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 1254, + 425, + 526, + 5173, + 2788, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.745, + "words": [ + { + "text": "Mais", + "start": 1.66, + "end": 1.9, + "confidence": 0.956 + }, + { + "text": "je", + "start": 1.9, + "end": 2.18, + "confidence": 0.629 + }, + { + "text": "me", + "start": 2.18, + "end": 2.3, + "confidence": 0.943 + }, + { + "text": "l'étais", + "start": 2.3, + "end": 2.54, + "confidence": 0.708 + }, + { + "text": "jamais", + "start": 2.54, + "end": 2.78, + "confidence": 0.962 + }, + { + "text": "formulé", + "start": 2.78, + "end": 3.2, + "confidence": 0.541 + }, + { + "text": "comme", + "start": 3.2, + "end": 3.34, + "confidence": 0.975 + }, + { + "text": "ça.", + "start": 3.34, + "end": 3.62, + "confidence": 0.979 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 4.14, + "end": 8.82, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.762, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.22, + "confidence": 0.39 + }, + { + "text": "qui", + "start": 4.22, + "end": 4.34, + "confidence": 0.934 + }, + { + "text": "fait", + "start": 4.34, + "end": 4.46, + "confidence": 0.735 + }, + { + "text": "la", + "start": 4.46, + "end": 4.56, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.56, + "end": 4.96, + "confidence": 0.933 + }, + { + "text": "du", + "start": 4.96, + "end": 5.16, + "confidence": 0.936 + }, + { + "text": "smartphone,", + "start": 5.16, + "end": 5.74, + "confidence": 0.909 + }, + { + "text": "c'est", + "start": 5.74, + "end": 6.12, + "confidence": 0.871 + }, + { + "text": "pas", + "start": 6.12, + "end": 6.2, + "confidence": 0.982 + }, + { + "text": "seulement", + "start": 6.2, + "end": 6.52, + "confidence": 0.991 + }, + { + "text": "la", + "start": 6.52, + "end": 6.76, + "confidence": 0.627 + }, + { + "text": "cumulation", + "start": 6.76, + "end": 7.18, + "confidence": 0.679 + }, + { + "text": "des", + "start": 7.18, + "end": 7.54, + "confidence": 0.752 + }, + { + "text": "fonctions,", + "start": 7.54, + "end": 8.1, + "confidence": 0.826 + }, + { + "text": "mais", + "start": 8.1, + "end": 8.42, + "confidence": 0.511 + }, + { + "text": "la", + "start": 8.42, + "end": 8.58, + "confidence": 0.717 + }, + { + "text": "manière", + "start": 8.58, + "end": 8.82, + "confidence": 0.457 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 8.9, + "end": 10.84, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.3, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.04, + "confidence": 0.329 + }, + { + "text": "quelques", + "start": 9.04, + "end": 9.26, + "confidence": 0.282 + }, + { + "text": "interagues", + "start": 9.26, + "end": 10.18, + "confidence": 0.238 + }, + { + "text": "entraîne.", + "start": 10.18, + "end": 10.84, + "confidence": 0.421 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 11.04, + "end": 12.92, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.625, + "words": [ + { + "text": "Et", + "start": 11.04, + "end": 11.1, + "confidence": 0.394 + }, + { + "text": "il", + "start": 11.1, + "end": 11.24, + "confidence": 0.128 + }, + { + "text": "est", + "start": 11.24, + "end": 11.38, + "confidence": 0.237 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.62, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.62, + "end": 11.78, + "confidence": 0.879 + }, + { + "text": "la", + "start": 11.78, + "end": 11.82, + "confidence": 0.968 + }, + { + "text": "photo", + "start": 11.82, + "end": 11.92, + "confidence": 0.811 + }, + { + "text": "c'est", + "start": 11.92, + "end": 12.18, + "confidence": 0.792 + }, + { + "text": "hyper", + "start": 12.18, + "end": 12.38, + "confidence": 0.939 + }, + { + "text": "convaincant.", + "start": 12.38, + "end": 12.92, + "confidence": 0.494 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 13.26, + "end": 18.03, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attêtes qu'il a été beaucoup", + "tokens": [ + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 38262, + 421, + 6, + 388, + 257, + 8862, + 8796 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.553, + "words": [ + { + "text": "Alors", + "start": 13.26, + "end": 13.48, + "confidence": 0.904 + }, + { + "text": "évidemment,", + "start": 13.48, + "end": 13.92, + "confidence": 0.785 + }, + { + "text": "il", + "start": 13.92, + "end": 14.4, + "confidence": 0.961 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.85 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.874 + }, + { + "text": "les", + "start": 15.38, + "end": 15.62, + "confidence": 0.933 + }, + { + "text": "interfaces", + "start": 15.62, + "end": 15.86, + "confidence": 0.359 + }, + { + "text": "les", + "start": 15.86, + "end": 16.5, + "confidence": 0.414 + }, + { + "text": "grand-attêtes", + "start": 16.5, + "end": 16.94, + "confidence": 0.178 + }, + { + "text": "qu'il", + "start": 16.94, + "end": 17.18, + "confidence": 0.632 + }, + { + "text": "a", + "start": 17.18, + "end": 17.24, + "confidence": 0.969 + }, + { + "text": "été", + "start": 17.24, + "end": 17.5, + "confidence": 0.957 + }, + { + "text": "beaucoup", + "start": 17.5, + "end": 18.03, + "confidence": 0.572 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 18.03, + "end": 19.26, + "text": " très souvent ementionné.", + "tokens": [ + 5732, + 20847, + 846, + 1251, + 15055, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.541, + "words": [ + { + "text": "très", + "start": 18.03, + "end": 18.66, + "confidence": 0.959 + }, + { + "text": "souvent", + "start": 18.66, + "end": 18.82, + "confidence": 0.994 + }, + { + "text": "ementionné.", + "start": 18.82, + "end": 19.26, + "confidence": 0.365 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 19.86, + "end": 23.54, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.648, + "words": [ + { + "text": "Mais", + "start": 19.86, + "end": 20.22, + "confidence": 0.975 + }, + { + "text": "bon,", + "start": 20.22, + "end": 20.56, + "confidence": 0.479 + }, + { + "text": "il", + "start": 20.56, + "end": 20.6, + "confidence": 0.978 + }, + { + "text": "faudrait", + "start": 20.6, + "end": 20.74, + "confidence": 0.753 + }, + { + "text": "que", + "start": 20.74, + "end": 20.92, + "confidence": 0.378 + }, + { + "text": "les", + "start": 20.92, + "end": 20.98, + "confidence": 0.182 + }, + { + "text": "profites", + "start": 20.98, + "end": 21.26, + "confidence": 0.622 + }, + { + "text": "aussi", + "start": 21.26, + "end": 21.7, + "confidence": 0.502 + }, + { + "text": "de", + "start": 21.7, + "end": 21.84, + "confidence": 0.468 + }, + { + "text": "20", + "start": 21.84, + "end": 22.08, + "confidence": 0.924 + }, + { + "text": "ans", + "start": 22.08, + "end": 22.28, + "confidence": 0.937 + }, + { + "text": "pendant", + "start": 22.28, + "end": 22.46, + "confidence": 0.903 + }, + { + "text": "l'été,", + "start": 22.46, + "end": 22.96, + "confidence": 0.499 + }, + { + "text": "les", + "start": 22.96, + "end": 23.04, + "confidence": 0.861 + }, + { + "text": "ordinateurs", + "start": 23.04, + "end": 23.54, + "confidence": 0.934 + } + ] + }, + { + "id": 8, + "seek": 0, + "start": 23.58, + "end": 25.26, + "text": " nous ont appris à piquer sur des icônes.", + "tokens": [ + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.566579490710216, + "compression_ratio": 1.605421686746988, + "no_speech_prob": 0.14170263707637787, + "confidence": 0.663, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.74, + "confidence": 0.833 + }, + { + "text": "ont", + "start": 23.74, + "end": 23.86, + "confidence": 0.976 + }, + { + "text": "appris", + "start": 23.86, + "end": 24.06, + "confidence": 0.947 + }, + { + "text": "à", + "start": 24.06, + "end": 24.24, + "confidence": 0.24 + }, + { + "text": "piquer", + "start": 24.24, + "end": 24.42, + "confidence": 0.45 + }, + { + "text": "sur", + "start": 24.42, + "end": 24.68, + "confidence": 0.749 + }, + { + "text": "des", + "start": 24.68, + "end": 24.8, + "confidence": 0.96 + }, + { + "text": "icônes.", + "start": 24.8, + "end": 25.26, + "confidence": 0.656 + } + ] + }, + { + "id": 9, + "seek": 2556, + "start": 25.58, + "end": 30.56, + "text": " C'est ce que le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible.", + "tokens": [ + 383, + 6, + 377, + 1769, + 631, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.643, + "words": [ + { + "text": "C'est", + "start": 25.58, + "end": 25.66, + "confidence": 0.449 + }, + { + "text": "ce", + "start": 25.66, + "end": 25.72, + "confidence": 0.491 + }, + { + "text": "que", + "start": 25.72, + "end": 25.94, + "confidence": 0.935 + }, + { + "text": "le", + "start": 25.94, + "end": 26.6, + "confidence": 0.377 + }, + { + "text": "smartphone", + "start": 26.6, + "end": 26.86, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 26.86, + "end": 27.42, + "confidence": 0.813 + }, + { + "text": "le", + "start": 27.42, + "end": 27.64, + "confidence": 0.957 + }, + { + "text": "toucher,", + "start": 27.64, + "end": 28.06, + "confidence": 0.73 + }, + { + "text": "qui", + "start": 28.06, + "end": 28.18, + "confidence": 0.209 + }, + { + "text": "rend", + "start": 28.18, + "end": 28.34, + "confidence": 0.877 + }, + { + "text": "le", + "start": 28.34, + "end": 28.68, + "confidence": 0.991 + }, + { + "text": "contact", + "start": 28.68, + "end": 28.96, + "confidence": 0.854 + }, + { + "text": "plus", + "start": 28.96, + "end": 29.48, + "confidence": 0.865 + }, + { + "text": "direct,", + "start": 29.48, + "end": 29.96, + "confidence": 0.692 + }, + { + "text": "plus", + "start": 29.96, + "end": 30.24, + "confidence": 0.928 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.56, + "confidence": 0.332 + } + ] + }, + { + "id": 10, + "seek": 2556, + "start": 31.04, + "end": 34.34, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner", + "tokens": [ + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.827, + "words": [ + { + "text": "Et", + "start": 31.04, + "end": 31.24, + "confidence": 0.914 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.34, + "confidence": 0.713 + }, + { + "text": "évidemment,", + "start": 31.34, + "end": 31.64, + "confidence": 0.378 + }, + { + "text": "il", + "start": 31.64, + "end": 31.78, + "confidence": 0.935 + }, + { + "text": "faudrait", + "start": 31.78, + "end": 31.88, + "confidence": 0.99 + }, + { + "text": "parler", + "start": 31.88, + "end": 32.12, + "confidence": 0.882 + }, + { + "text": "aussi", + "start": 32.12, + "end": 32.3, + "confidence": 0.889 + }, + { + "text": "des", + "start": 32.3, + "end": 32.44, + "confidence": 0.927 + }, + { + "text": "applications", + "start": 32.44, + "end": 32.78, + "confidence": 0.839 + }, + { + "text": "qui", + "start": 32.78, + "end": 33.16, + "confidence": 0.652 + }, + { + "text": "permettent", + "start": 33.16, + "end": 33.68, + "confidence": 0.951 + }, + { + "text": "de", + "start": 33.68, + "end": 33.9, + "confidence": 0.952 + }, + { + "text": "contourner", + "start": 33.9, + "end": 34.34, + "confidence": 0.787 + } + ] + }, + { + "id": 11, + "seek": 2556, + "start": 34.34, + "end": 37.72, + "text": " le côté tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 476, + 18437, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.766, + "words": [ + { + "text": "le", + "start": 34.34, + "end": 34.52, + "confidence": 0.989 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.72, + "confidence": 0.983 + }, + { + "text": "tout", + "start": 34.72, + "end": 34.96, + "confidence": 0.954 + }, + { + "text": "flu", + "start": 34.96, + "end": 35.2, + "confidence": 0.548 + }, + { + "text": "de", + "start": 35.2, + "end": 35.42, + "confidence": 0.248 + }, + { + "text": "la", + "start": 35.42, + "end": 35.72, + "confidence": 0.902 + }, + { + "text": "navigation", + "start": 35.72, + "end": 36.04, + "confidence": 0.913 + }, + { + "text": "web", + "start": 36.04, + "end": 36.64, + "confidence": 0.93 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.76, + "confidence": 0.746 + }, + { + "text": "aller", + "start": 36.76, + "end": 36.94, + "confidence": 0.992 + }, + { + "text": "directement", + "start": 36.94, + "end": 37.46, + "confidence": 0.986 + }, + { + "text": "en", + "start": 37.46, + "end": 37.68, + "confidence": 0.644 + }, + { + "text": "but.", + "start": 37.68, + "end": 37.72, + "confidence": 0.689 + } + ] + }, + { + "id": 12, + "seek": 2556, + "start": 37.72, + "end": 43.06, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.725, + "words": [ + { + "text": "Bref,", + "start": 37.72, + "end": 38.9, + "confidence": 0.967 + }, + { + "text": "tout", + "start": 38.9, + "end": 38.98, + "confidence": 0.786 + }, + { + "text": "ça,", + "start": 38.98, + "end": 39.36, + "confidence": 0.985 + }, + { + "text": "ce", + "start": 39.36, + "end": 39.68, + "confidence": 0.929 + }, + { + "text": "sont", + "start": 39.68, + "end": 39.84, + "confidence": 0.963 + }, + { + "text": "les", + "start": 39.84, + "end": 40.06, + "confidence": 0.976 + }, + { + "text": "conditions", + "start": 40.06, + "end": 40.54, + "confidence": 0.96 + }, + { + "text": "qui", + "start": 40.54, + "end": 40.9, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.9, + "end": 41.54, + "confidence": 0.986 + }, + { + "text": "de", + "start": 41.54, + "end": 41.58, + "confidence": 0.989 + }, + { + "text": "créer", + "start": 41.58, + "end": 41.92, + "confidence": 0.918 + }, + { + "text": "cette", + "start": 41.92, + "end": 42.3, + "confidence": 0.518 + }, + { + "text": "objet,", + "start": 42.3, + "end": 42.7, + "confidence": 0.213 + }, + { + "text": "en", + "start": 42.7, + "end": 42.8, + "confidence": 0.191 + }, + { + "text": "Nicolas,", + "start": 42.8, + "end": 43.06, + "confidence": 0.508 + } + ] + }, + { + "id": 13, + "seek": 2556, + "start": 43.1, + "end": 46.48, + "text": " dit qu'il est très fondablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 6176, + 421, + 6, + 388, + 871, + 5732, + 9557, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.768, + "words": [ + { + "text": "dit", + "start": 43.1, + "end": 43.52, + "confidence": 0.551 + }, + { + "text": "qu'il", + "start": 43.52, + "end": 43.68, + "confidence": 0.969 + }, + { + "text": "est", + "start": 43.68, + "end": 43.8, + "confidence": 0.851 + }, + { + "text": "très", + "start": 43.8, + "end": 43.94, + "confidence": 0.368 + }, + { + "text": "fondablement", + "start": 43.94, + "end": 44.68, + "confidence": 0.593 + }, + { + "text": "inédit", + "start": 44.68, + "end": 45.52, + "confidence": 0.66 + }, + { + "text": "dans", + "start": 45.52, + "end": 45.74, + "confidence": 0.778 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 45.98, + "confidence": 0.825 + }, + { + "text": "de", + "start": 45.98, + "end": 46.08, + "confidence": 0.978 + }, + { + "text": "l'humanité.", + "start": 46.08, + "end": 46.48, + "confidence": 0.991 + } + ] + }, + { + "id": 14, + "seek": 2556, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.16, + "confidence": 0.939 + }, + { + "text": "ça", + "start": 47.16, + "end": 47.46, + "confidence": 0.854 + }, + { + "text": "s'assoulait", + "start": 47.46, + "end": 47.78, + "confidence": 0.591 + }, + { + "text": "d'une", + "start": 47.78, + "end": 48.02, + "confidence": 0.619 + }, + { + "text": "autre", + "start": 48.02, + "end": 48.14, + "confidence": 0.966 + }, + { + "text": "interrogation.", + "start": 48.14, + "end": 48.76, + "confidence": 0.833 + } + ] + }, + { + "id": 15, + "seek": 2556, + "start": 49.26, + "end": 54.34, + "text": " Est-ce que le fait que cette objet soit inédit un d'huies que notre rapport a lui est aussi", + "tokens": [ + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 530, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.697, + "words": [ + { + "text": "Est-ce", + "start": 49.26, + "end": 49.7, + "confidence": 0.804 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.989 + }, + { + "text": "le", + "start": 49.74, + "end": 49.78, + "confidence": 0.882 + }, + { + "text": "fait", + "start": 49.78, + "end": 49.96, + "confidence": 0.962 + }, + { + "text": "que", + "start": 49.96, + "end": 50.08, + "confidence": 0.935 + }, + { + "text": "cette", + "start": 50.08, + "end": 50.24, + "confidence": 0.95 + }, + { + "text": "objet", + "start": 50.24, + "end": 50.5, + "confidence": 0.968 + }, + { + "text": "soit", + "start": 50.5, + "end": 50.96, + "confidence": 0.991 + }, + { + "text": "inédit", + "start": 50.96, + "end": 51.78, + "confidence": 0.91 + }, + { + "text": "un", + "start": 51.78, + "end": 52.06, + "confidence": 0.442 + }, + { + "text": "d'huies", + "start": 52.06, + "end": 52.32, + "confidence": 0.298 + }, + { + "text": "que", + "start": 52.32, + "end": 52.4, + "confidence": 0.966 + }, + { + "text": "notre", + "start": 52.4, + "end": 52.58, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.58, + "end": 53.1, + "confidence": 0.714 + }, + { + "text": "a", + "start": 53.1, + "end": 53.44, + "confidence": 0.522 + }, + { + "text": "lui", + "start": 53.44, + "end": 53.62, + "confidence": 0.659 + }, + { + "text": "est", + "start": 53.62, + "end": 54.0, + "confidence": 0.643 + }, + { + "text": "aussi", + "start": 54.0, + "end": 54.34, + "confidence": 0.808 + } + ] + }, + { + "id": 16, + "seek": 2556, + "start": 54.34, + "end": 55.32, + "text": " un rapport inédit?", + "tokens": [ + 517, + 18018, + 294, + 7811, + 270, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.4095459938049316, + "compression_ratio": 1.7522658610271904, + "no_speech_prob": 0.0006256799097172916, + "confidence": 0.849, + "words": [ + { + "text": "un", + "start": 54.34, + "end": 54.72, + "confidence": 0.506 + }, + { + "text": "rapport", + "start": 54.72, + "end": 54.94, + "confidence": 0.999 + }, + { + "text": "inédit?", + "start": 54.94, + "end": 55.32, + "confidence": 0.956 + } + ] + }, + { + "id": 17, + "seek": 5548, + "start": 55.6, + "end": 58.6, + "text": " Je veux dire, est-ce que le rapport qu'on a au sein de foi n'est comparable à celui", + "tokens": [ + 2588, + 16389, + 1264, + 11, + 871, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 6311, + 712, + 1531, + 22829 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.626, + "words": [ + { + "text": "Je", + "start": 55.6, + "end": 55.84, + "confidence": 0.335 + }, + { + "text": "veux", + "start": 55.84, + "end": 55.94, + "confidence": 0.432 + }, + { + "text": "dire,", + "start": 55.94, + "end": 56.16, + "confidence": 0.992 + }, + { + "text": "est-ce", + "start": 56.16, + "end": 56.34, + "confidence": 0.951 + }, + { + "text": "que", + "start": 56.34, + "end": 56.4, + "confidence": 0.973 + }, + { + "text": "le", + "start": 56.4, + "end": 56.54, + "confidence": 0.987 + }, + { + "text": "rapport", + "start": 56.54, + "end": 56.76, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.76, + "end": 57.1, + "confidence": 0.906 + }, + { + "text": "a", + "start": 57.1, + "end": 57.14, + "confidence": 0.974 + }, + { + "text": "au", + "start": 57.14, + "end": 57.26, + "confidence": 0.319 + }, + { + "text": "sein", + "start": 57.26, + "end": 57.36, + "confidence": 0.244 + }, + { + "text": "de", + "start": 57.36, + "end": 57.44, + "confidence": 0.177 + }, + { + "text": "foi", + "start": 57.44, + "end": 57.54, + "confidence": 0.163 + }, + { + "text": "n'est", + "start": 57.54, + "end": 57.78, + "confidence": 0.777 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.32, + "confidence": 0.606 + }, + { + "text": "à", + "start": 58.32, + "end": 58.46, + "confidence": 0.482 + }, + { + "text": "celui", + "start": 58.46, + "end": 58.6, + "confidence": 0.831 + } + ] + }, + { + "id": 18, + "seek": 5548, + "start": 58.72, + "end": 62.81, + "text": " qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone?", + "tokens": [ + 421, + 6, + 266, + 3962, + 1147, + 1001, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.781, + "words": [ + { + "text": "qu'on", + "start": 58.72, + "end": 58.88, + "confidence": 0.941 + }, + { + "text": "entretenait", + "start": 58.88, + "end": 59.38, + "confidence": 0.657 + }, + { + "text": "à", + "start": 59.38, + "end": 59.44, + "confidence": 0.973 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.64, + "confidence": 0.909 + }, + { + "text": "objectes", + "start": 59.64, + "end": 60.02, + "confidence": 0.528 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.38, + "confidence": 0.547 + }, + { + "text": "comme", + "start": 60.38, + "end": 60.86, + "confidence": 0.662 + }, + { + "text": "la", + "start": 60.86, + "end": 61.46, + "confidence": 0.898 + }, + { + "text": "voiture", + "start": 61.46, + "end": 61.8, + "confidence": 0.954 + }, + { + "text": "ou", + "start": 61.8, + "end": 62.32, + "confidence": 0.74 + }, + { + "text": "le", + "start": 62.32, + "end": 62.6, + "confidence": 0.847 + }, + { + "text": "téléphone?", + "start": 62.6, + "end": 62.81, + "confidence": 0.979 + } + ] + }, + { + "id": 19, + "seek": 5548, + "start": 62.81, + "end": 66.07, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13 + ], + "temperature": 0.0, + "avg_logprob": -0.6093524909167556, + "compression_ratio": 1.6292834890965733, + "no_speech_prob": 6.752492481609806e-05, + "confidence": 0.882, + "words": [ + { + "text": "Il", + "start": 62.81, + "end": 65.42, + "confidence": 0.866 + }, + { + "text": "n'y", + "start": 65.42, + "end": 65.52, + "confidence": 0.778 + }, + { + "text": "très", + "start": 22.44, + "end": 22.46, + "confidence": 0.77 + }, + { + "text": "très", + "start": 22.46, + "end": 22.48, + "confidence": 0.768 + }, + { + "text": "très", + "start": 22.48, + "end": 22.5, + "confidence": 0.768 + }, + { + "text": "très", + "start": 22.5, + "end": 22.52, + "confidence": 0.775 + }, + { + "text": "très", + "start": 22.52, + "end": 22.54, + "confidence": 0.763 + }, + { + "text": "très", + "start": 22.54, + "end": 22.56, + "confidence": 0.768 + }, + { + "text": "très", + "start": 22.56, + "end": 22.58, + "confidence": 0.759 + }, + { + "text": "très", + "start": 22.58, + "end": 22.6, + "confidence": 0.771 + }, + { + "text": "très", + "start": 22.6, + "end": 22.62, + "confidence": 0.764 + }, + { + "text": "très", + "start": 22.62, + "end": 22.64, + "confidence": 0.762 + }, + { + "text": "très", + "start": 22.64, + "end": 22.66, + "confidence": 0.772 + }, + { + "text": "très", + "start": 22.66, + "end": 22.68, + "confidence": 0.774 + }, + { + "text": "très", + "start": 22.68, + "end": 22.7, + "confidence": 0.764 + }, + { + "text": "très", + "start": 22.7, + "end": 22.72, + "confidence": 0.763 + }, + { + "text": "très", + "start": 22.72, + "end": 22.74, + "confidence": 0.755 + }, + { + "text": "très", + "start": 22.74, + "end": 22.76, + "confidence": 0.766 + }, + { + "text": "très", + "start": 22.76, + "end": 22.78, + "confidence": 0.766 + }, + { + "text": "très", + "start": 22.78, + "end": 22.8, + "confidence": 0.759 + }, + { + "text": "très", + "start": 22.8, + "end": 22.82, + "confidence": 0.768 + }, + { + "text": "très", + "start": 22.82, + "end": 23.04, + "confidence": 0.762 + }, + { + "text": "très", + "start": 23.04, + "end": 23.06, + "confidence": 0.764 + }, + { + "text": "très", + "start": 23.06, + "end": 23.08, + "confidence": 0.775 + }, + { + "text": "très", + "start": 23.08, + "end": 23.1, + "confidence": 0.785 + }, + { + "text": "très", + "start": 23.1, + "end": 23.5, + "confidence": 0.783 + }, + { + "text": "très", + "start": 23.5, + "end": 23.52, + "confidence": 0.792 + }, + { + "text": "très", + "start": 23.52, + "end": 23.54, + "confidence": 0.822 + }, + { + "text": "très", + "start": 23.54, + "end": 26.7, + "confidence": 0.791 + }, + { + "text": "très", + "start": 26.7, + "end": 29.98, + "confidence": 0.775 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/laugh1.mp3.words.json b/tests/expected/tiny_fr/laugh1.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..8b8adba30908587d54afc1e12cad3560d62aaff0 --- /dev/null +++ b/tests/expected/tiny_fr/laugh1.mp3.words.json @@ -0,0 +1,81 @@ +{ + "text": " Je vais vous donner un peu de temps.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.18, + "end": 1.72, + "text": " Je vais vous donner un peu de temps.", + "tokens": [ + 50364, + 2588, + 9369, + 2630, + 20882, + 517, + 5604, + 368, + 8827, + 13, + 50464 + ], + "temperature": 0.0, + "avg_logprob": -1.8615148862202961, + "compression_ratio": 0.8181818181818182, + "no_speech_prob": 0.4180056154727936, + "confidence": 0.103, + "words": [ + { + "text": "Je", + "start": 0.18, + "end": 0.82, + "confidence": 0.066 + }, + { + "text": "vais", + "start": 0.82, + "end": 1.04, + "confidence": 0.128 + }, + { + "text": "vous", + "start": 1.04, + "end": 1.06, + "confidence": 0.075 + }, + { + "text": "donner", + "start": 1.06, + "end": 1.26, + "confidence": 0.048 + }, + { + "text": "un", + "start": 1.26, + "end": 1.66, + "confidence": 0.144 + }, + { + "text": "peu", + "start": 1.66, + "end": 1.68, + "confidence": 0.091 + }, + { + "text": "de", + "start": 1.68, + "end": 1.7, + "confidence": 0.625 + }, + { + "text": "temps.", + "start": 1.7, + "end": 1.72, + "confidence": 0.053 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/laugh2.mp3.words.json b/tests/expected/tiny_fr/laugh2.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..3d2a43a49f3deb3765f81b932183eff4596f2c1f --- /dev/null +++ b/tests/expected/tiny_fr/laugh2.mp3.words.json @@ -0,0 +1,31 @@ +{ + "text": " ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.18, + "end": 0.66, + "text": " ...", + "tokens": [ + 50364, + 1097, + 50414 + ], + "temperature": 0.0, + "avg_logprob": -1.2527990341186523, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.5721203088760376, + "confidence": 0.088, + "words": [ + { + "text": "...", + "start": 0.18, + "end": 0.66, + "confidence": 0.088 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/punctuations.mp3.words.json b/tests/expected/tiny_fr/punctuations.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..3da035b29fdb645b73a077037954dce5f1348e2a --- /dev/null +++ b/tests/expected/tiny_fr/punctuations.mp3.words.json @@ -0,0 +1,71 @@ +{ + "text": " Dima, est ce que l'on vole ?", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 2.58, + "text": " Dima, est ce que l'on vole ?", + "tokens": [ + 50364, + 413, + 4775, + 11, + 871, + 1769, + 631, + 287, + 6, + 266, + 49877, + 2506, + 50494 + ], + "temperature": 0.0, + "avg_logprob": -0.8432754789079938, + "compression_ratio": 0.7777777777777778, + "no_speech_prob": 0.0010773586109280586, + "confidence": 0.456, + "words": [ + { + "text": "Dima,", + "start": 0.42, + "end": 0.84, + "confidence": 0.247 + }, + { + "text": "est", + "start": 1.2, + "end": 1.4, + "confidence": 0.93 + }, + { + "text": "ce", + "start": 1.4, + "end": 1.56, + "confidence": 0.587 + }, + { + "text": "que", + "start": 1.56, + "end": 1.68, + "confidence": 0.916 + }, + { + "text": "l'on", + "start": 1.68, + "end": 2.02, + "confidence": 0.481 + }, + { + "text": "vole ?", + "start": 2.02, + "end": 2.58, + "confidence": 0.249 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/radio_short.mp3.words.json b/tests/expected/tiny_fr/radio_short.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..b1664d3b54c4b3f4d433384b72c236208cb7c154 --- /dev/null +++ b/tests/expected/tiny_fr/radio_short.mp3.words.json @@ -0,0 +1,106 @@ +{ + "text": " ... ... ... ...", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.44, + "end": 1.46, + "text": " ...", + "tokens": [ + 50364, + 1097, + 50614 + ], + "temperature": 0.0, + "avg_logprob": -1.4212262630462646, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.5096546411514282, + "confidence": 0.224, + "words": [ + { + "text": "...", + "start": 0.44, + "end": 1.46, + "confidence": 0.224 + } + ] + }, + { + "id": 1, + "seek": 6000, + "start": 60.0, + "end": 69.42, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51714 + ], + "temperature": 0.0, + "avg_logprob": -0.9656867980957031, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.8783581256866455, + "confidence": 0.464, + "words": [ + { + "text": "...", + "start": 60.0, + "end": 69.42, + "confidence": 0.464 + } + ] + }, + { + "id": 2, + "seek": 9000, + "start": 105.48, + "end": 105.5, + "text": " ...", + "tokens": [ + 50364, + 1097, + 51114 + ], + "temperature": 0.0, + "avg_logprob": -0.8222726821899414, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.640861988067627, + "confidence": 0.842, + "words": [ + { + "text": "...", + "start": 105.48, + "end": 105.5, + "confidence": 0.842 + } + ] + }, + { + "id": 3, + "seek": 10500, + "start": 106.84, + "end": 108.62, + "text": " ...", + "tokens": [ + 50414, + 1097, + 50864 + ], + "temperature": 0.0, + "avg_logprob": -0.9236552715301514, + "compression_ratio": 0.2727272727272727, + "no_speech_prob": 0.8327455520629883, + "confidence": 0.47, + "words": [ + { + "text": "...", + "start": 106.84, + "end": 108.62, + "confidence": 0.47 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/tiny_fr/smartphone.mp3.words.json b/tests/expected/tiny_fr/smartphone.mp3.words.json new file mode 100644 index 0000000000000000000000000000000000000000..debc2f3712ed6a2e320e59b7cae8f5cd7feb2aa1 --- /dev/null +++ b/tests/expected/tiny_fr/smartphone.mp3.words.json @@ -0,0 +1,5091 @@ +{ + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça. Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière dans quelques interagues entraîne. Et il est d'ailleurs, c'est la photo c'est hyper convaincant. Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent mentionnées. Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher, qui rend le contact plus direct, plus sensible. Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté tout flu de la navigation web pour aller directement en but. Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas, dit qu'il est très symbolablement inédit dans l'histoire de l'humanité. Mais ça s'assoulait d'une autre interrogation. Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport inédit ? Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait à d'autres objectes techniques comme la voiture ou le téléphone ? Il n'y a pas d'équivalent. On s'est espèrent de nous voter dans la relation à l'objet. C'est facilement éterricion. Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu en fait, une espèce de relation de médiation avec le monde qui rendent encore avec la maille de celles formes de rogeur. Donc, à objets inédits, rapport inédits. Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance et de rogeur. Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement inédit. Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment. Pour autant, je sache. Il y a eu plein de discussions autour de la voiture ou même du téléphone. Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même mort. On peut adorer sa bagnure. On a par besoin pour plein de choses. Et là, le soir, quand on va se coucher, on la laisse. On la pade en la main quand on est colis, qu'on n'a même pas au chiot. On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure chaque soir pour discuter avec un copain. Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui. Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate. Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi. Donc, rapport immédiate d'accord. Mais pourquoi, à ton impression qu'on en sortira, j'amé? Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux et diabolique et diabolique par que merveilleux. Les économistes parlent de dépendance du santé. Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus, soit des finissants débordes, des finissants, une signalétique.", + "segments": [ + { + "id": 0, + "seek": 0, + "start": 0.42, + "end": 3.66, + "text": " C'est évidence que dit Nicolas, mais je me l'étais jamais formulé comme ça.", + "tokens": [ + 50364, + 383, + 6, + 377, + 20090, + 2778, + 631, + 6176, + 38268, + 11, + 2420, + 1506, + 385, + 287, + 6, + 22824, + 14540, + 49990, + 526, + 5173, + 2788, + 13, + 50545 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.709, + "words": [ + { + "text": "C'est", + "start": 0.42, + "end": 0.68, + "confidence": 0.849 + }, + { + "text": "évidence", + "start": 0.68, + "end": 0.94, + "confidence": 0.368 + }, + { + "text": "que", + "start": 0.94, + "end": 1.08, + "confidence": 0.883 + }, + { + "text": "dit", + "start": 1.08, + "end": 1.2, + "confidence": 0.342 + }, + { + "text": "Nicolas,", + "start": 1.2, + "end": 1.44, + "confidence": 0.921 + }, + { + "text": "mais", + "start": 1.88, + "end": 2.14, + "confidence": 0.914 + }, + { + "text": "je", + "start": 2.14, + "end": 2.26, + "confidence": 0.778 + }, + { + "text": "me", + "start": 2.26, + "end": 2.34, + "confidence": 0.954 + }, + { + "text": "l'étais", + "start": 2.34, + "end": 2.58, + "confidence": 0.72 + }, + { + "text": "jamais", + "start": 2.58, + "end": 2.86, + "confidence": 0.946 + }, + { + "text": "formulé", + "start": 2.86, + "end": 3.26, + "confidence": 0.53 + }, + { + "text": "comme", + "start": 3.26, + "end": 3.46, + "confidence": 0.968 + }, + { + "text": "ça.", + "start": 3.46, + "end": 3.66, + "confidence": 0.96 + } + ] + }, + { + "id": 1, + "seek": 0, + "start": 4.14, + "end": 8.9, + "text": " Ce qui fait la force du smartphone, c'est pas seulement la cumulation des fonctions, mais la manière", + "tokens": [ + 50545, + 8257, + 1956, + 3887, + 635, + 3464, + 1581, + 13307, + 11, + 269, + 6, + 377, + 1736, + 27772, + 635, + 12713, + 2776, + 730, + 17290, + 3916, + 11, + 2420, + 635, + 22267, + 50806 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.789, + "words": [ + { + "text": "Ce", + "start": 4.14, + "end": 4.26, + "confidence": 0.392 + }, + { + "text": "qui", + "start": 4.26, + "end": 4.38, + "confidence": 0.94 + }, + { + "text": "fait", + "start": 4.38, + "end": 4.56, + "confidence": 0.732 + }, + { + "text": "la", + "start": 4.56, + "end": 4.72, + "confidence": 0.988 + }, + { + "text": "force", + "start": 4.72, + "end": 5.02, + "confidence": 0.93 + }, + { + "text": "du", + "start": 5.02, + "end": 5.2, + "confidence": 0.938 + }, + { + "text": "smartphone,", + "start": 5.2, + "end": 5.58, + "confidence": 0.909 + }, + { + "text": "c'est", + "start": 5.9, + "end": 6.2, + "confidence": 0.947 + }, + { + "text": "pas", + "start": 6.2, + "end": 6.26, + "confidence": 0.983 + }, + { + "text": "seulement", + "start": 6.26, + "end": 6.6, + "confidence": 0.993 + }, + { + "text": "la", + "start": 6.6, + "end": 6.8, + "confidence": 0.633 + }, + { + "text": "cumulation", + "start": 6.8, + "end": 7.34, + "confidence": 0.689 + }, + { + "text": "des", + "start": 7.34, + "end": 7.56, + "confidence": 0.792 + }, + { + "text": "fonctions,", + "start": 7.56, + "end": 8.14, + "confidence": 0.834 + }, + { + "text": "mais", + "start": 8.38, + "end": 8.5, + "confidence": 0.668 + }, + { + "text": "la", + "start": 8.5, + "end": 8.62, + "confidence": 0.719 + }, + { + "text": "manière", + "start": 8.62, + "end": 8.9, + "confidence": 0.498 + } + ] + }, + { + "id": 2, + "seek": 0, + "start": 8.9, + "end": 10.98, + "text": " dans quelques interagues entraîne.", + "tokens": [ + 50806, + 2680, + 16597, + 728, + 559, + 1247, + 22284, + 24741, + 13, + 50906 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.303, + "words": [ + { + "text": "dans", + "start": 8.9, + "end": 9.06, + "confidence": 0.284 + }, + { + "text": "quelques", + "start": 9.06, + "end": 9.28, + "confidence": 0.281 + }, + { + "text": "interagues", + "start": 9.28, + "end": 10.38, + "confidence": 0.246 + }, + { + "text": "entraîne.", + "start": 10.38, + "end": 10.98, + "confidence": 0.446 + } + ] + }, + { + "id": 3, + "seek": 0, + "start": 11.0, + "end": 12.96, + "text": " Et il est d'ailleurs, c'est la photo c'est hyper convaincant.", + "tokens": [ + 50906, + 3790, + 1930, + 871, + 274, + 6, + 19400, + 11, + 269, + 6, + 377, + 635, + 5052, + 269, + 6, + 377, + 9848, + 3754, + 491, + 66, + 394, + 13, + 51006 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.63, + "words": [ + { + "text": "Et", + "start": 11.0, + "end": 11.12, + "confidence": 0.36 + }, + { + "text": "il", + "start": 11.12, + "end": 11.28, + "confidence": 0.144 + }, + { + "text": "est", + "start": 11.28, + "end": 11.38, + "confidence": 0.241 + }, + { + "text": "d'ailleurs,", + "start": 11.38, + "end": 11.6, + "confidence": 0.904 + }, + { + "text": "c'est", + "start": 11.7, + "end": 11.78, + "confidence": 0.886 + }, + { + "text": "la", + "start": 11.78, + "end": 11.8, + "confidence": 0.969 + }, + { + "text": "photo", + "start": 11.8, + "end": 12.02, + "confidence": 0.809 + }, + { + "text": "c'est", + "start": 12.02, + "end": 12.26, + "confidence": 0.785 + }, + { + "text": "hyper", + "start": 12.26, + "end": 12.46, + "confidence": 0.938 + }, + { + "text": "convaincant.", + "start": 12.46, + "end": 12.96, + "confidence": 0.51 + } + ] + }, + { + "id": 4, + "seek": 0, + "start": 13.3, + "end": 18.8, + "text": " Alors évidemment, il faudrait ajouter les interfaces les grand-attachilles à été beaucoup très souvent", + "tokens": [ + 51006, + 9946, + 24724, + 11, + 1930, + 38694, + 8645, + 17680, + 23985, + 1512, + 28416, + 1512, + 2697, + 12, + 1591, + 608, + 14835, + 1531, + 8862, + 8796, + 5732, + 20847, + 51306 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.532, + "words": [ + { + "text": "Alors", + "start": 13.3, + "end": 13.56, + "confidence": 0.894 + }, + { + "text": "évidemment,", + "start": 13.56, + "end": 13.82, + "confidence": 0.778 + }, + { + "text": "il", + "start": 14.38, + "end": 14.4, + "confidence": 0.964 + }, + { + "text": "faudrait", + "start": 14.4, + "end": 14.76, + "confidence": 0.856 + }, + { + "text": "ajouter", + "start": 14.76, + "end": 15.38, + "confidence": 0.883 + }, + { + "text": "les", + "start": 15.38, + "end": 15.6, + "confidence": 0.934 + }, + { + "text": "interfaces", + "start": 15.6, + "end": 16.0, + "confidence": 0.38 + }, + { + "text": "les", + "start": 16.0, + "end": 16.5, + "confidence": 0.463 + }, + { + "text": "grand-attachilles", + "start": 16.5, + "end": 17.16, + "confidence": 0.213 + }, + { + "text": "à", + "start": 17.16, + "end": 17.32, + "confidence": 0.406 + }, + { + "text": "été", + "start": 17.32, + "end": 17.72, + "confidence": 0.894 + }, + { + "text": "beaucoup", + "start": 17.72, + "end": 18.28, + "confidence": 0.797 + }, + { + "text": "très", + "start": 18.28, + "end": 18.64, + "confidence": 0.493 + }, + { + "text": "souvent", + "start": 18.64, + "end": 18.8, + "confidence": 0.822 + } + ] + }, + { + "id": 5, + "seek": 0, + "start": 18.8, + "end": 19.84, + "text": " mentionnées.", + "tokens": [ + 51306, + 2152, + 77, + 6836, + 13, + 51356 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.626, + "words": [ + { + "text": "mentionnées.", + "start": 18.8, + "end": 19.84, + "confidence": 0.626 + } + ] + }, + { + "id": 6, + "seek": 0, + "start": 20.02, + "end": 23.58, + "text": " Mais bon, il faudrait que les profites aussi de 20 ans pendant l'été, les ordinateurs", + "tokens": [ + 51356, + 6313, + 4428, + 11, + 1930, + 38694, + 8645, + 631, + 1512, + 1740, + 3324, + 6212, + 368, + 945, + 1567, + 17338, + 287, + 6, + 21210, + 11, + 1512, + 4792, + 13923, + 2156, + 51543 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.658, + "words": [ + { + "text": "Mais", + "start": 20.02, + "end": 20.26, + "confidence": 0.98 + }, + { + "text": "bon,", + "start": 20.26, + "end": 20.46, + "confidence": 0.568 + }, + { + "text": "il", + "start": 20.6, + "end": 20.68, + "confidence": 0.975 + }, + { + "text": "faudrait", + "start": 20.68, + "end": 20.78, + "confidence": 0.771 + }, + { + "text": "que", + "start": 20.78, + "end": 20.92, + "confidence": 0.388 + }, + { + "text": "les", + "start": 20.92, + "end": 20.96, + "confidence": 0.22 + }, + { + "text": "profites", + "start": 20.96, + "end": 21.36, + "confidence": 0.57 + }, + { + "text": "aussi", + "start": 21.36, + "end": 21.72, + "confidence": 0.52 + }, + { + "text": "de", + "start": 21.72, + "end": 21.92, + "confidence": 0.492 + }, + { + "text": "20", + "start": 21.92, + "end": 22.14, + "confidence": 0.915 + }, + { + "text": "ans", + "start": 22.14, + "end": 22.32, + "confidence": 0.942 + }, + { + "text": "pendant", + "start": 22.32, + "end": 22.52, + "confidence": 0.916 + }, + { + "text": "l'été,", + "start": 22.52, + "end": 22.86, + "confidence": 0.48 + }, + { + "text": "les", + "start": 22.98, + "end": 23.1, + "confidence": 0.903 + }, + { + "text": "ordinateurs", + "start": 23.1, + "end": 23.58, + "confidence": 0.946 + } + ] + }, + { + "id": 7, + "seek": 0, + "start": 23.58, + "end": 28.07, + "text": " nous ont appris à piquer sur des icônes, sauf que, alors le smartphone ajoute le toucher,", + "tokens": [ + 51543, + 4666, + 6592, + 724, + 5714, + 1531, + 280, + 23909, + 1022, + 730, + 4376, + 2851, + 4081, + 11, + 601, + 2947, + 631, + 11, + 11246, + 476, + 13307, + 17680, + 14040, + 476, + 2557, + 260, + 11, + 51766 + ], + "temperature": 0.0, + "avg_logprob": -0.538998505563447, + "compression_ratio": 1.6505681818181819, + "no_speech_prob": 0.14073368906974792, + "confidence": 0.656, + "words": [ + { + "text": "nous", + "start": 23.58, + "end": 23.78, + "confidence": 0.767 + }, + { + "text": "ont", + "start": 23.78, + "end": 23.9, + "confidence": 0.98 + }, + { + "text": "appris", + "start": 23.9, + "end": 24.12, + "confidence": 0.952 + }, + { + "text": "à", + "start": 24.12, + "end": 24.26, + "confidence": 0.34 + }, + { + "text": "piquer", + "start": 24.26, + "end": 24.54, + "confidence": 0.449 + }, + { + "text": "sur", + "start": 24.54, + "end": 24.72, + "confidence": 0.816 + }, + { + "text": "des", + "start": 24.72, + "end": 24.9, + "confidence": 0.96 + }, + { + "text": "icônes,", + "start": 24.9, + "end": 25.56, + "confidence": 0.599 + }, + { + "text": "sauf", + "start": 25.64, + "end": 25.8, + "confidence": 0.522 + }, + { + "text": "que,", + "start": 25.8, + "end": 26.36, + "confidence": 0.915 + }, + { + "text": "alors", + "start": 26.36, + "end": 26.58, + "confidence": 0.396 + }, + { + "text": "le", + "start": 26.58, + "end": 26.72, + "confidence": 0.824 + }, + { + "text": "smartphone", + "start": 26.72, + "end": 27.0, + "confidence": 0.977 + }, + { + "text": "ajoute", + "start": 27.0, + "end": 27.5, + "confidence": 0.674 + }, + { + "text": "le", + "start": 27.5, + "end": 27.62, + "confidence": 0.552 + }, + { + "text": "toucher,", + "start": 27.62, + "end": 28.07, + "confidence": 0.631 + } + ] + }, + { + "id": 8, + "seek": 2804, + "start": 28.07, + "end": 30.6, + "text": " qui rend le contact plus direct, plus sensible.", + "tokens": [ + 50364, + 1956, + 6125, + 476, + 3385, + 1804, + 2047, + 11, + 1804, + 25380, + 13, + 50496 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.742, + "words": [ + { + "text": "qui", + "start": 28.07, + "end": 28.26, + "confidence": 0.305 + }, + { + "text": "rend", + "start": 28.26, + "end": 28.5, + "confidence": 0.763 + }, + { + "text": "le", + "start": 28.5, + "end": 28.72, + "confidence": 0.988 + }, + { + "text": "contact", + "start": 28.72, + "end": 29.06, + "confidence": 0.817 + }, + { + "text": "plus", + "start": 29.06, + "end": 29.48, + "confidence": 0.84 + }, + { + "text": "direct,", + "start": 29.48, + "end": 30.02, + "confidence": 0.944 + }, + { + "text": "plus", + "start": 30.18, + "end": 30.24, + "confidence": 0.992 + }, + { + "text": "sensible.", + "start": 30.24, + "end": 30.6, + "confidence": 0.618 + } + ] + }, + { + "id": 9, + "seek": 2804, + "start": 31.1, + "end": 34.76, + "text": " Et puis, évidemment, il faudrait parler aussi des applications qui permettent de contourner le côté", + "tokens": [ + 50496, + 3790, + 9093, + 11, + 24724, + 11, + 1930, + 38694, + 8645, + 16421, + 6212, + 730, + 5821, + 1956, + 21540, + 317, + 368, + 21234, + 1193, + 476, + 18437, + 50698 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.777, + "words": [ + { + "text": "Et", + "start": 31.1, + "end": 31.24, + "confidence": 0.949 + }, + { + "text": "puis,", + "start": 31.24, + "end": 31.36, + "confidence": 0.721 + }, + { + "text": "évidemment,", + "start": 31.42, + "end": 31.62, + "confidence": 0.213 + }, + { + "text": "il", + "start": 31.7, + "end": 31.76, + "confidence": 0.959 + }, + { + "text": "faudrait", + "start": 31.76, + "end": 31.94, + "confidence": 0.994 + }, + { + "text": "parler", + "start": 31.94, + "end": 32.14, + "confidence": 0.847 + }, + { + "text": "aussi", + "start": 32.14, + "end": 32.36, + "confidence": 0.925 + }, + { + "text": "des", + "start": 32.36, + "end": 32.46, + "confidence": 0.921 + }, + { + "text": "applications", + "start": 32.46, + "end": 32.88, + "confidence": 0.857 + }, + { + "text": "qui", + "start": 32.88, + "end": 33.2, + "confidence": 0.663 + }, + { + "text": "permettent", + "start": 33.2, + "end": 33.8, + "confidence": 0.952 + }, + { + "text": "de", + "start": 33.8, + "end": 33.96, + "confidence": 0.951 + }, + { + "text": "contourner", + "start": 33.96, + "end": 34.4, + "confidence": 0.721 + }, + { + "text": "le", + "start": 34.4, + "end": 34.52, + "confidence": 0.654 + }, + { + "text": "côté", + "start": 34.52, + "end": 34.76, + "confidence": 0.641 + } + ] + }, + { + "id": 10, + "seek": 2804, + "start": 34.8, + "end": 37.86, + "text": " tout flu de la navigation web pour aller directement en but.", + "tokens": [ + 50698, + 3486, + 5029, + 368, + 635, + 17346, + 3670, + 2016, + 8722, + 37297, + 465, + 457, + 13, + 50860 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.735, + "words": [ + { + "text": "tout", + "start": 34.8, + "end": 35.04, + "confidence": 0.945 + }, + { + "text": "flu", + "start": 35.04, + "end": 35.32, + "confidence": 0.484 + }, + { + "text": "de", + "start": 35.32, + "end": 35.64, + "confidence": 0.35 + }, + { + "text": "la", + "start": 35.64, + "end": 35.78, + "confidence": 0.921 + }, + { + "text": "navigation", + "start": 35.78, + "end": 36.24, + "confidence": 0.903 + }, + { + "text": "web", + "start": 36.24, + "end": 36.64, + "confidence": 0.913 + }, + { + "text": "pour", + "start": 36.64, + "end": 36.84, + "confidence": 0.57 + }, + { + "text": "aller", + "start": 36.84, + "end": 37.06, + "confidence": 0.991 + }, + { + "text": "directement", + "start": 37.06, + "end": 37.48, + "confidence": 0.981 + }, + { + "text": "en", + "start": 37.48, + "end": 37.7, + "confidence": 0.654 + }, + { + "text": "but.", + "start": 37.7, + "end": 37.86, + "confidence": 0.768 + } + ] + }, + { + "id": 11, + "seek": 2804, + "start": 38.78, + "end": 43.12, + "text": " Bref, tout ça, ce sont les conditions qui permettent de créer cette objet, en Nicolas,", + "tokens": [ + 50860, + 49957, + 11, + 3486, + 2788, + 11, + 1769, + 4900, + 1512, + 4487, + 1956, + 21540, + 317, + 368, + 32062, + 5550, + 14964, + 11, + 465, + 38268, + 11, + 51121 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.737, + "words": [ + { + "text": "Bref,", + "start": 38.78, + "end": 38.8, + "confidence": 0.972 + }, + { + "text": "tout", + "start": 38.84, + "end": 39.04, + "confidence": 0.817 + }, + { + "text": "ça,", + "start": 39.04, + "end": 39.46, + "confidence": 0.984 + }, + { + "text": "ce", + "start": 39.54, + "end": 39.76, + "confidence": 0.905 + }, + { + "text": "sont", + "start": 39.76, + "end": 39.96, + "confidence": 0.98 + }, + { + "text": "les", + "start": 39.96, + "end": 40.1, + "confidence": 0.982 + }, + { + "text": "conditions", + "start": 40.1, + "end": 40.64, + "confidence": 0.956 + }, + { + "text": "qui", + "start": 40.64, + "end": 40.96, + "confidence": 0.995 + }, + { + "text": "permettent", + "start": 40.96, + "end": 41.58, + "confidence": 0.994 + }, + { + "text": "de", + "start": 41.58, + "end": 41.64, + "confidence": 0.993 + }, + { + "text": "créer", + "start": 41.64, + "end": 42.08, + "confidence": 0.91 + }, + { + "text": "cette", + "start": 42.08, + "end": 42.34, + "confidence": 0.473 + }, + { + "text": "objet,", + "start": 42.34, + "end": 42.64, + "confidence": 0.235 + }, + { + "text": "en", + "start": 42.7, + "end": 42.84, + "confidence": 0.221 + }, + { + "text": "Nicolas,", + "start": 42.84, + "end": 43.12, + "confidence": 0.537 + } + ] + }, + { + "id": 12, + "seek": 2804, + "start": 43.12, + "end": 46.58, + "text": " dit qu'il est très symbolablement inédit dans l'histoire de l'humanité.", + "tokens": [ + 51121, + 6176, + 421, + 6, + 388, + 871, + 5732, + 5986, + 712, + 518, + 294, + 7811, + 270, + 2680, + 287, + 6, + 29093, + 368, + 287, + 6, + 18796, + 5066, + 13, + 51290 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.717, + "words": [ + { + "text": "dit", + "start": 43.12, + "end": 43.56, + "confidence": 0.696 + }, + { + "text": "qu'il", + "start": 43.56, + "end": 43.76, + "confidence": 0.958 + }, + { + "text": "est", + "start": 43.76, + "end": 43.9, + "confidence": 0.849 + }, + { + "text": "très", + "start": 43.9, + "end": 44.08, + "confidence": 0.443 + }, + { + "text": "symbolablement", + "start": 44.08, + "end": 44.86, + "confidence": 0.321 + }, + { + "text": "inédit", + "start": 44.86, + "end": 45.54, + "confidence": 0.655 + }, + { + "text": "dans", + "start": 45.54, + "end": 45.74, + "confidence": 0.817 + }, + { + "text": "l'histoire", + "start": 45.74, + "end": 46.02, + "confidence": 0.824 + }, + { + "text": "de", + "start": 46.02, + "end": 46.14, + "confidence": 0.983 + }, + { + "text": "l'humanité.", + "start": 46.14, + "end": 46.58, + "confidence": 0.99 + } + ] + }, + { + "id": 13, + "seek": 2804, + "start": 47.06, + "end": 48.76, + "text": " Mais ça s'assoulait d'une autre interrogation.", + "tokens": [ + 51290, + 6313, + 2788, + 262, + 6, + 640, + 263, + 35235, + 274, + 6, + 2613, + 15081, + 24871, + 399, + 13, + 51402 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.717, + "words": [ + { + "text": "Mais", + "start": 47.06, + "end": 47.24, + "confidence": 0.928 + }, + { + "text": "ça", + "start": 47.24, + "end": 47.48, + "confidence": 0.75 + }, + { + "text": "s'assoulait", + "start": 47.48, + "end": 47.84, + "confidence": 0.616 + }, + { + "text": "d'une", + "start": 47.84, + "end": 48.08, + "confidence": 0.674 + }, + { + "text": "autre", + "start": 48.08, + "end": 48.18, + "confidence": 0.964 + }, + { + "text": "interrogation.", + "start": 48.18, + "end": 48.76, + "confidence": 0.855 + } + ] + }, + { + "id": 14, + "seek": 2804, + "start": 49.42, + "end": 54.93, + "text": " Est-ce que le fait que cette objet soit inédit un d'huits que notre rapport a lui est aussi un rapport", + "tokens": [ + 51402, + 4410, + 12, + 384, + 631, + 476, + 3887, + 631, + 5550, + 14964, + 12703, + 294, + 7811, + 270, + 517, + 274, + 6, + 12086, + 1208, + 631, + 10349, + 18018, + 257, + 8783, + 871, + 6212, + 517, + 18018, + 51710 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.704, + "words": [ + { + "text": "Est-ce", + "start": 49.42, + "end": 49.7, + "confidence": 0.919 + }, + { + "text": "que", + "start": 49.7, + "end": 49.74, + "confidence": 0.99 + }, + { + "text": "le", + "start": 49.74, + "end": 49.82, + "confidence": 0.871 + }, + { + "text": "fait", + "start": 49.82, + "end": 50.02, + "confidence": 0.945 + }, + { + "text": "que", + "start": 50.02, + "end": 50.16, + "confidence": 0.923 + }, + { + "text": "cette", + "start": 50.16, + "end": 50.3, + "confidence": 0.943 + }, + { + "text": "objet", + "start": 50.3, + "end": 50.64, + "confidence": 0.963 + }, + { + "text": "soit", + "start": 50.64, + "end": 51.1, + "confidence": 0.99 + }, + { + "text": "inédit", + "start": 51.1, + "end": 51.82, + "confidence": 0.928 + }, + { + "text": "un", + "start": 51.82, + "end": 52.08, + "confidence": 0.511 + }, + { + "text": "d'huits", + "start": 52.08, + "end": 52.34, + "confidence": 0.246 + }, + { + "text": "que", + "start": 52.34, + "end": 52.44, + "confidence": 0.974 + }, + { + "text": "notre", + "start": 52.44, + "end": 52.66, + "confidence": 0.992 + }, + { + "text": "rapport", + "start": 52.66, + "end": 53.24, + "confidence": 0.779 + }, + { + "text": "a", + "start": 53.24, + "end": 53.5, + "confidence": 0.57 + }, + { + "text": "lui", + "start": 53.5, + "end": 53.7, + "confidence": 0.701 + }, + { + "text": "est", + "start": 53.7, + "end": 54.04, + "confidence": 0.879 + }, + { + "text": "aussi", + "start": 54.04, + "end": 54.52, + "confidence": 0.815 + }, + { + "text": "un", + "start": 54.52, + "end": 54.7, + "confidence": 0.48 + }, + { + "text": "rapport", + "start": 54.7, + "end": 54.93, + "confidence": 0.952 + } + ] + }, + { + "id": 15, + "seek": 2804, + "start": 54.93, + "end": 55.88, + "text": " inédit ?", + "tokens": [ + 51710, + 294, + 7811, + 270, + 2506, + 51760 + ], + "temperature": 0.0, + "avg_logprob": -0.4256511350878242, + "compression_ratio": 1.7156549520766773, + "no_speech_prob": 0.12921307981014252, + "confidence": 0.86, + "words": [ + { + "text": "inédit ?", + "start": 54.93, + "end": 55.88, + "confidence": 0.86 + } + ] + }, + { + "id": 16, + "seek": 5596, + "start": 55.96, + "end": 59.36, + "text": " Est-ce que le rapport qu'on a au sein de foi n'est comparable à celui qu'on entretenait", + "tokens": [ + 50364, + 4410, + 12, + 384, + 631, + 476, + 18018, + 421, + 6, + 266, + 257, + 1609, + 6195, + 368, + 6901, + 297, + 6, + 377, + 25323, + 1531, + 22829, + 421, + 6, + 266, + 3962, + 1147, + 1001, + 50530 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.62, + "words": [ + { + "text": "Est-ce", + "start": 55.96, + "end": 56.38, + "confidence": 0.553 + }, + { + "text": "que", + "start": 56.38, + "end": 56.44, + "confidence": 0.892 + }, + { + "text": "le", + "start": 56.44, + "end": 56.58, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 56.58, + "end": 56.9, + "confidence": 0.999 + }, + { + "text": "qu'on", + "start": 56.9, + "end": 57.14, + "confidence": 0.902 + }, + { + "text": "a", + "start": 57.14, + "end": 57.2, + "confidence": 0.959 + }, + { + "text": "au", + "start": 57.2, + "end": 57.28, + "confidence": 0.273 + }, + { + "text": "sein", + "start": 57.28, + "end": 57.42, + "confidence": 0.17 + }, + { + "text": "de", + "start": 57.42, + "end": 57.56, + "confidence": 0.229 + }, + { + "text": "foi", + "start": 57.56, + "end": 57.58, + "confidence": 0.121 + }, + { + "text": "n'est", + "start": 57.58, + "end": 57.78, + "confidence": 0.79 + }, + { + "text": "comparable", + "start": 57.78, + "end": 58.28, + "confidence": 0.547 + }, + { + "text": "à", + "start": 58.28, + "end": 58.48, + "confidence": 0.934 + }, + { + "text": "celui", + "start": 58.48, + "end": 58.7, + "confidence": 0.851 + }, + { + "text": "qu'on", + "start": 58.7, + "end": 58.94, + "confidence": 0.926 + }, + { + "text": "entretenait", + "start": 58.94, + "end": 59.36, + "confidence": 0.56 + } + ] + }, + { + "id": 17, + "seek": 5596, + "start": 59.36, + "end": 63.14, + "text": " à d'autres objectes techniques comme la voiture ou le téléphone ?", + "tokens": [ + 50530, + 1531, + 274, + 6, + 16752, + 2657, + 279, + 7512, + 5173, + 635, + 38859, + 2820, + 476, + 47159, + 2506, + 50714 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.756, + "words": [ + { + "text": "à", + "start": 59.36, + "end": 59.44, + "confidence": 0.951 + }, + { + "text": "d'autres", + "start": 59.44, + "end": 59.68, + "confidence": 0.857 + }, + { + "text": "objectes", + "start": 59.68, + "end": 60.02, + "confidence": 0.507 + }, + { + "text": "techniques", + "start": 60.02, + "end": 60.44, + "confidence": 0.462 + }, + { + "text": "comme", + "start": 60.44, + "end": 61.04, + "confidence": 0.639 + }, + { + "text": "la", + "start": 61.04, + "end": 61.52, + "confidence": 0.905 + }, + { + "text": "voiture", + "start": 61.52, + "end": 61.86, + "confidence": 0.946 + }, + { + "text": "ou", + "start": 61.86, + "end": 62.46, + "confidence": 0.755 + }, + { + "text": "le", + "start": 62.46, + "end": 62.68, + "confidence": 0.913 + }, + { + "text": "téléphone ?", + "start": 62.68, + "end": 63.14, + "confidence": 0.983 + } + ] + }, + { + "id": 18, + "seek": 5596, + "start": 65.4, + "end": 66.16, + "text": " Il n'y a pas d'équivalent.", + "tokens": [ + 50714, + 4416, + 297, + 6, + 88, + 257, + 1736, + 274, + 6, + 20183, + 3576, + 317, + 13, + 50872 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.881, + "words": [ + { + "text": "Il", + "start": 65.4, + "end": 65.5, + "confidence": 0.901 + }, + { + "text": "n'y", + "start": 65.5, + "end": 65.54, + "confidence": 0.86 + }, + { + "text": "a", + "start": 65.54, + "end": 65.56, + "confidence": 0.962 + }, + { + "text": "pas", + "start": 65.56, + "end": 65.68, + "confidence": 0.998 + }, + { + "text": "d'équivalent.", + "start": 65.68, + "end": 66.16, + "confidence": 0.852 + } + ] + }, + { + "id": 19, + "seek": 5596, + "start": 66.16, + "end": 69.92, + "text": " On s'est espèrent de nous voter dans la relation à l'objet.", + "tokens": [ + 50872, + 1282, + 262, + 6, + 377, + 7089, + 1462, + 1753, + 368, + 4666, + 21722, + 2680, + 635, + 9721, + 1531, + 287, + 6, + 996, + 7108, + 13, + 51058 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.501, + "words": [ + { + "text": "On", + "start": 66.16, + "end": 67.08, + "confidence": 0.302 + }, + { + "text": "s'est", + "start": 67.08, + "end": 67.3, + "confidence": 0.563 + }, + { + "text": "espèrent", + "start": 67.3, + "end": 67.62, + "confidence": 0.134 + }, + { + "text": "de", + "start": 67.62, + "end": 67.66, + "confidence": 0.99 + }, + { + "text": "nous", + "start": 67.66, + "end": 67.84, + "confidence": 0.763 + }, + { + "text": "voter", + "start": 67.84, + "end": 68.36, + "confidence": 0.181 + }, + { + "text": "dans", + "start": 68.36, + "end": 68.86, + "confidence": 0.82 + }, + { + "text": "la", + "start": 68.86, + "end": 68.96, + "confidence": 0.617 + }, + { + "text": "relation", + "start": 68.96, + "end": 69.24, + "confidence": 0.949 + }, + { + "text": "à", + "start": 69.24, + "end": 69.42, + "confidence": 0.75 + }, + { + "text": "l'objet.", + "start": 69.42, + "end": 69.92, + "confidence": 0.887 + } + ] + }, + { + "id": 20, + "seek": 5596, + "start": 70.2, + "end": 71.22, + "text": " C'est facilement éterricion.", + "tokens": [ + 51058, + 383, + 6, + 377, + 23670, + 518, + 1136, + 391, + 1341, + 313, + 13, + 51122 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.441, + "words": [ + { + "text": "C'est", + "start": 70.2, + "end": 70.34, + "confidence": 0.895 + }, + { + "text": "facilement", + "start": 70.34, + "end": 70.7, + "confidence": 0.562 + }, + { + "text": "éterricion.", + "start": 70.7, + "end": 71.22, + "confidence": 0.23 + } + ] + }, + { + "id": 21, + "seek": 5596, + "start": 71.64, + "end": 76.97, + "text": " Parce que la passion de l'utilisateur et ses affices a dépendance, cette objet d'un lieu", + "tokens": [ + 51122, + 20429, + 631, + 635, + 5418, + 368, + 287, + 6, + 20835, + 271, + 15540, + 1030, + 5385, + 2096, + 1473, + 257, + 45768, + 719, + 11, + 5550, + 14964, + 274, + 6, + 409, + 26036, + 51416 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.457, + "words": [ + { + "text": "Parce", + "start": 71.64, + "end": 71.94, + "confidence": 0.679 + }, + { + "text": "que", + "start": 71.94, + "end": 72.32, + "confidence": 0.566 + }, + { + "text": "la", + "start": 72.32, + "end": 72.56, + "confidence": 0.336 + }, + { + "text": "passion", + "start": 72.56, + "end": 72.9, + "confidence": 0.267 + }, + { + "text": "de", + "start": 72.9, + "end": 73.24, + "confidence": 0.365 + }, + { + "text": "l'utilisateur", + "start": 73.24, + "end": 74.82, + "confidence": 0.499 + }, + { + "text": "et", + "start": 74.82, + "end": 74.92, + "confidence": 0.45 + }, + { + "text": "ses", + "start": 74.92, + "end": 75.04, + "confidence": 0.095 + }, + { + "text": "affices", + "start": 75.04, + "end": 75.24, + "confidence": 0.303 + }, + { + "text": "a", + "start": 75.24, + "end": 75.38, + "confidence": 0.59 + }, + { + "text": "dépendance,", + "start": 75.38, + "end": 76.06, + "confidence": 0.559 + }, + { + "text": "cette", + "start": 76.32, + "end": 76.34, + "confidence": 0.401 + }, + { + "text": "objet", + "start": 76.34, + "end": 76.54, + "confidence": 0.924 + }, + { + "text": "d'un", + "start": 76.54, + "end": 76.92, + "confidence": 0.783 + }, + { + "text": "lieu", + "start": 76.92, + "end": 76.97, + "confidence": 0.307 + } + ] + }, + { + "id": 22, + "seek": 5596, + "start": 76.97, + "end": 82.15, + "text": " en fait, une espèce de relation de médiation avec le monde qui rendent encore avec", + "tokens": [ + 51416, + 465, + 3887, + 11, + 2251, + 7089, + 30236, + 368, + 9721, + 368, + 42436, + 399, + 4163, + 476, + 10431, + 1956, + 6125, + 317, + 10122, + 4163, + 51671 + ], + "temperature": 0.0, + "avg_logprob": -0.6293430873325893, + "compression_ratio": 1.5749128919860627, + "no_speech_prob": 0.21948836743831635, + "confidence": 0.746, + "words": [ + { + "text": "en", + "start": 76.97, + "end": 77.2, + "confidence": 0.767 + }, + { + "text": "fait,", + "start": 77.2, + "end": 77.32, + "confidence": 0.944 + }, + { + "text": "une", + "start": 77.46, + "end": 77.48, + "confidence": 0.696 + }, + { + "text": "espèce", + "start": 77.48, + "end": 77.9, + "confidence": 0.97 + }, + { + "text": "de", + "start": 77.9, + "end": 78.08, + "confidence": 0.994 + }, + { + "text": "relation", + "start": 78.08, + "end": 78.5, + "confidence": 0.826 + }, + { + "text": "de", + "start": 78.5, + "end": 78.94, + "confidence": 0.72 + }, + { + "text": "médiation", + "start": 78.94, + "end": 79.5, + "confidence": 0.881 + }, + { + "text": "avec", + "start": 79.5, + "end": 79.74, + "confidence": 0.968 + }, + { + "text": "le", + "start": 79.74, + "end": 79.94, + "confidence": 0.986 + }, + { + "text": "monde", + "start": 79.94, + "end": 80.44, + "confidence": 0.906 + }, + { + "text": "qui", + "start": 80.44, + "end": 81.14, + "confidence": 0.825 + }, + { + "text": "rendent", + "start": 81.14, + "end": 81.8, + "confidence": 0.584 + }, + { + "text": "encore", + "start": 81.8, + "end": 81.98, + "confidence": 0.198 + }, + { + "text": "avec", + "start": 81.98, + "end": 82.15, + "confidence": 0.486 + } + ] + }, + { + "id": 23, + "seek": 8210, + "start": 82.15, + "end": 83.44, + "text": " la maille de celles formes de rogeur.", + "tokens": [ + 50364, + 635, + 463, + 3409, + 368, + 2815, + 279, + 1254, + 279, + 368, + 744, + 432, + 374, + 13, + 50440 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.465, + "words": [ + { + "text": "la", + "start": 82.15, + "end": 82.22, + "confidence": 0.648 + }, + { + "text": "maille", + "start": 82.22, + "end": 82.4, + "confidence": 0.136 + }, + { + "text": "de", + "start": 82.4, + "end": 82.48, + "confidence": 0.585 + }, + { + "text": "celles", + "start": 82.48, + "end": 82.74, + "confidence": 0.561 + }, + { + "text": "formes", + "start": 82.74, + "end": 82.96, + "confidence": 0.78 + }, + { + "text": "de", + "start": 82.96, + "end": 83.0, + "confidence": 0.936 + }, + { + "text": "rogeur.", + "start": 83.0, + "end": 83.44, + "confidence": 0.433 + } + ] + }, + { + "id": 24, + "seek": 8210, + "start": 83.98, + "end": 87.86, + "text": " Donc, à objets inédits, rapport inédits.", + "tokens": [ + 50440, + 7477, + 11, + 1531, + 1111, + 25349, + 294, + 7811, + 1208, + 11, + 18018, + 294, + 7811, + 1208, + 13, + 50640 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.745, + "words": [ + { + "text": "Donc,", + "start": 83.98, + "end": 84.46, + "confidence": 0.833 + }, + { + "text": "à", + "start": 84.56, + "end": 84.96, + "confidence": 0.696 + }, + { + "text": "objets", + "start": 84.96, + "end": 85.44, + "confidence": 0.526 + }, + { + "text": "inédits,", + "start": 85.44, + "end": 86.24, + "confidence": 0.701 + }, + { + "text": "rapport", + "start": 86.32, + "end": 86.92, + "confidence": 0.944 + }, + { + "text": "inédits.", + "start": 86.92, + "end": 87.86, + "confidence": 0.908 + } + ] + }, + { + "id": 25, + "seek": 8210, + "start": 88.1, + "end": 94.2, + "text": " Et, ce rapport, si j'en prends Nicolas, frère caractérisée par un mélange de dépendance", + "tokens": [ + 50640, + 3790, + 11, + 1769, + 18018, + 11, + 1511, + 361, + 6, + 268, + 46750, + 38268, + 11, + 431, + 4212, + 1032, + 578, + 4198, + 50027, + 971, + 517, + 41953, + 933, + 368, + 45768, + 719, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.669, + "words": [ + { + "text": "Et,", + "start": 88.1, + "end": 88.3, + "confidence": 0.865 + }, + { + "text": "ce", + "start": 88.58, + "end": 88.84, + "confidence": 0.277 + }, + { + "text": "rapport,", + "start": 88.84, + "end": 89.28, + "confidence": 0.998 + }, + { + "text": "si", + "start": 89.38, + "end": 89.56, + "confidence": 0.933 + }, + { + "text": "j'en", + "start": 89.56, + "end": 89.74, + "confidence": 0.78 + }, + { + "text": "prends", + "start": 89.74, + "end": 89.92, + "confidence": 0.307 + }, + { + "text": "Nicolas,", + "start": 89.92, + "end": 90.22, + "confidence": 0.65 + }, + { + "text": "frère", + "start": 90.78, + "end": 91.1, + "confidence": 0.431 + }, + { + "text": "caractérisée", + "start": 91.1, + "end": 91.8, + "confidence": 0.595 + }, + { + "text": "par", + "start": 91.8, + "end": 92.14, + "confidence": 0.907 + }, + { + "text": "un", + "start": 92.14, + "end": 92.34, + "confidence": 0.989 + }, + { + "text": "mélange", + "start": 92.34, + "end": 92.98, + "confidence": 0.92 + }, + { + "text": "de", + "start": 92.98, + "end": 93.24, + "confidence": 0.842 + }, + { + "text": "dépendance", + "start": 93.24, + "end": 94.2, + "confidence": 0.675 + } + ] + }, + { + "id": 26, + "seek": 8210, + "start": 94.36, + "end": 95.08, + "text": " et de rogeur.", + "tokens": [ + 50972, + 1030, + 368, + 744, + 432, + 374, + 13, + 51022 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.851, + "words": [ + { + "text": "et", + "start": 94.36, + "end": 94.52, + "confidence": 0.97 + }, + { + "text": "de", + "start": 94.52, + "end": 94.64, + "confidence": 0.996 + }, + { + "text": "rogeur.", + "start": 94.64, + "end": 95.08, + "confidence": 0.773 + } + ] + }, + { + "id": 27, + "seek": 8210, + "start": 96.3, + "end": 100.48, + "text": " Bon, en vrai, il faudrait remonter très, très filmant tout l'histoire des objectes", + "tokens": [ + 51022, + 7368, + 11, + 465, + 17815, + 11, + 1930, + 38694, + 8645, + 890, + 41806, + 5732, + 11, + 5732, + 2007, + 394, + 3486, + 287, + 6, + 29093, + 730, + 2657, + 279, + 51286 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.752, + "words": [ + { + "text": "Bon,", + "start": 96.3, + "end": 96.32, + "confidence": 0.6 + }, + { + "text": "en", + "start": 96.36, + "end": 96.58, + "confidence": 0.918 + }, + { + "text": "vrai,", + "start": 96.58, + "end": 96.92, + "confidence": 0.992 + }, + { + "text": "il", + "start": 97.14, + "end": 97.18, + "confidence": 0.987 + }, + { + "text": "faudrait", + "start": 97.18, + "end": 97.56, + "confidence": 0.933 + }, + { + "text": "remonter", + "start": 97.56, + "end": 98.08, + "confidence": 0.61 + }, + { + "text": "très,", + "start": 98.08, + "end": 98.56, + "confidence": 0.975 + }, + { + "text": "très", + "start": 98.56, + "end": 98.86, + "confidence": 0.986 + }, + { + "text": "filmant", + "start": 98.86, + "end": 99.42, + "confidence": 0.409 + }, + { + "text": "tout", + "start": 99.42, + "end": 99.68, + "confidence": 0.348 + }, + { + "text": "l'histoire", + "start": 99.68, + "end": 100.08, + "confidence": 0.876 + }, + { + "text": "des", + "start": 100.08, + "end": 100.22, + "confidence": 0.957 + }, + { + "text": "objectes", + "start": 100.22, + "end": 100.48, + "confidence": 0.842 + } + ] + }, + { + "id": 28, + "seek": 8210, + "start": 100.48, + "end": 105.24, + "text": " techniques et de leur infertion dans nos vieux pour déterminer si ce rapport est totalement", + "tokens": [ + 51286, + 7512, + 1030, + 368, + 9580, + 1536, + 911, + 313, + 2680, + 3269, + 4941, + 2449, + 2016, + 2795, + 29725, + 260, + 1511, + 1769, + 18018, + 871, + 45203, + 51530 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.614, + "words": [ + { + "text": "techniques", + "start": 100.48, + "end": 101.0, + "confidence": 0.952 + }, + { + "text": "et", + "start": 101.0, + "end": 101.58, + "confidence": 0.943 + }, + { + "text": "de", + "start": 101.58, + "end": 101.72, + "confidence": 0.975 + }, + { + "text": "leur", + "start": 101.72, + "end": 101.8, + "confidence": 0.803 + }, + { + "text": "infertion", + "start": 101.8, + "end": 102.34, + "confidence": 0.382 + }, + { + "text": "dans", + "start": 102.34, + "end": 102.5, + "confidence": 0.297 + }, + { + "text": "nos", + "start": 102.5, + "end": 102.68, + "confidence": 0.419 + }, + { + "text": "vieux", + "start": 102.68, + "end": 103.08, + "confidence": 0.386 + }, + { + "text": "pour", + "start": 103.08, + "end": 103.1, + "confidence": 0.281 + }, + { + "text": "déterminer", + "start": 103.1, + "end": 103.64, + "confidence": 0.976 + }, + { + "text": "si", + "start": 103.64, + "end": 103.8, + "confidence": 0.367 + }, + { + "text": "ce", + "start": 103.8, + "end": 103.92, + "confidence": 0.983 + }, + { + "text": "rapport", + "start": 103.92, + "end": 104.22, + "confidence": 0.998 + }, + { + "text": "est", + "start": 104.22, + "end": 104.88, + "confidence": 0.942 + }, + { + "text": "totalement", + "start": 104.88, + "end": 105.24, + "confidence": 0.907 + } + ] + }, + { + "id": 29, + "seek": 8210, + "start": 105.24, + "end": 106.1, + "text": " inédit.", + "tokens": [ + 51530, + 294, + 7811, + 270, + 13, + 51580 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.939, + "words": [ + { + "text": "inédit.", + "start": 105.24, + "end": 106.1, + "confidence": 0.939 + } + ] + }, + { + "id": 30, + "seek": 8210, + "start": 106.16, + "end": 109.44, + "text": " Mais j'ai l'impression comme ça que Nicolas se trompe pas vraiment.", + "tokens": [ + 51580, + 6313, + 361, + 6, + 1301, + 287, + 6, + 36107, + 5173, + 2788, + 631, + 38268, + 369, + 504, + 298, + 494, + 1736, + 8322, + 13, + 51738 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.811, + "words": [ + { + "text": "Mais", + "start": 106.16, + "end": 106.5, + "confidence": 0.721 + }, + { + "text": "j'ai", + "start": 106.5, + "end": 106.92, + "confidence": 0.91 + }, + { + "text": "l'impression", + "start": 106.92, + "end": 107.34, + "confidence": 0.956 + }, + { + "text": "comme", + "start": 107.34, + "end": 107.58, + "confidence": 0.7 + }, + { + "text": "ça", + "start": 107.58, + "end": 107.74, + "confidence": 0.953 + }, + { + "text": "que", + "start": 107.74, + "end": 108.06, + "confidence": 0.941 + }, + { + "text": "Nicolas", + "start": 108.06, + "end": 108.46, + "confidence": 0.994 + }, + { + "text": "se", + "start": 108.46, + "end": 108.68, + "confidence": 0.716 + }, + { + "text": "trompe", + "start": 108.68, + "end": 109.02, + "confidence": 0.54 + }, + { + "text": "pas", + "start": 109.02, + "end": 109.16, + "confidence": 0.886 + }, + { + "text": "vraiment.", + "start": 109.16, + "end": 109.44, + "confidence": 0.963 + } + ] + }, + { + "id": 31, + "seek": 8210, + "start": 109.92, + "end": 110.86, + "text": " Pour autant, je sache.", + "tokens": [ + 51738, + 8732, + 34081, + 11, + 1506, + 262, + 6000, + 13, + 51808 + ], + "temperature": 0.0, + "avg_logprob": -0.45905232269491925, + "compression_ratio": 1.6159169550173011, + "no_speech_prob": 0.2066797912120819, + "confidence": 0.6, + "words": [ + { + "text": "Pour", + "start": 109.92, + "end": 110.1, + "confidence": 0.49 + }, + { + "text": "autant,", + "start": 110.1, + "end": 110.28, + "confidence": 0.75 + }, + { + "text": "je", + "start": 110.42, + "end": 110.48, + "confidence": 0.931 + }, + { + "text": "sache.", + "start": 110.48, + "end": 110.86, + "confidence": 0.477 + } + ] + }, + { + "id": 32, + "seek": 11098, + "start": 111.04, + "end": 115.0, + "text": " Il y a eu plein de discussions autour de la voiture ou même du téléphone.", + "tokens": [ + 50364, + 4416, + 288, + 257, + 2228, + 21088, + 368, + 11088, + 30249, + 368, + 635, + 38859, + 2820, + 5698, + 1581, + 47159, + 13, + 50568 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.821, + "words": [ + { + "text": "Il", + "start": 111.04, + "end": 111.2, + "confidence": 0.905 + }, + { + "text": "y", + "start": 111.2, + "end": 111.3, + "confidence": 0.934 + }, + { + "text": "a", + "start": 111.3, + "end": 111.56, + "confidence": 0.893 + }, + { + "text": "eu", + "start": 111.56, + "end": 111.6, + "confidence": 0.839 + }, + { + "text": "plein", + "start": 111.6, + "end": 111.88, + "confidence": 0.776 + }, + { + "text": "de", + "start": 111.88, + "end": 112.12, + "confidence": 0.944 + }, + { + "text": "discussions", + "start": 112.12, + "end": 112.6, + "confidence": 0.681 + }, + { + "text": "autour", + "start": 112.6, + "end": 113.04, + "confidence": 0.978 + }, + { + "text": "de", + "start": 113.04, + "end": 113.48, + "confidence": 0.975 + }, + { + "text": "la", + "start": 113.48, + "end": 113.56, + "confidence": 0.966 + }, + { + "text": "voiture", + "start": 113.56, + "end": 113.88, + "confidence": 0.983 + }, + { + "text": "ou", + "start": 113.88, + "end": 114.14, + "confidence": 0.66 + }, + { + "text": "même", + "start": 114.14, + "end": 114.34, + "confidence": 0.99 + }, + { + "text": "du", + "start": 114.34, + "end": 114.64, + "confidence": 0.283 + }, + { + "text": "téléphone.", + "start": 114.64, + "end": 115.0, + "confidence": 0.986 + } + ] + }, + { + "id": 33, + "seek": 11098, + "start": 115.52, + "end": 119.51, + "text": " Mais, la dépense n'était pas du même mort, donc le rejet n'en plus n'était pas du même", + "tokens": [ + 50568, + 6313, + 11, + 635, + 27998, + 1288, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 6599, + 11, + 5926, + 476, + 319, + 7108, + 297, + 6, + 268, + 1804, + 297, + 6, + 9743, + 1736, + 1581, + 5698, + 50790 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.701, + "words": [ + { + "text": "Mais,", + "start": 115.52, + "end": 115.82, + "confidence": 0.983 + }, + { + "text": "la", + "start": 115.9, + "end": 116.0, + "confidence": 0.961 + }, + { + "text": "dépense", + "start": 116.0, + "end": 116.42, + "confidence": 0.616 + }, + { + "text": "n'était", + "start": 116.42, + "end": 116.82, + "confidence": 0.955 + }, + { + "text": "pas", + "start": 116.82, + "end": 117.04, + "confidence": 0.994 + }, + { + "text": "du", + "start": 117.04, + "end": 117.2, + "confidence": 0.958 + }, + { + "text": "même", + "start": 117.2, + "end": 117.38, + "confidence": 0.929 + }, + { + "text": "mort,", + "start": 117.38, + "end": 117.58, + "confidence": 0.686 + }, + { + "text": "donc", + "start": 117.7, + "end": 118.02, + "confidence": 0.932 + }, + { + "text": "le", + "start": 118.02, + "end": 118.34, + "confidence": 0.95 + }, + { + "text": "rejet", + "start": 118.34, + "end": 118.64, + "confidence": 0.57 + }, + { + "text": "n'en", + "start": 118.64, + "end": 118.82, + "confidence": 0.429 + }, + { + "text": "plus", + "start": 118.82, + "end": 118.96, + "confidence": 0.055 + }, + { + "text": "n'était", + "start": 118.96, + "end": 119.16, + "confidence": 0.973 + }, + { + "text": "pas", + "start": 119.16, + "end": 119.32, + "confidence": 0.993 + }, + { + "text": "du", + "start": 119.32, + "end": 119.42, + "confidence": 0.823 + }, + { + "text": "même", + "start": 119.42, + "end": 119.51, + "confidence": 0.79 + } + ] + }, + { + "id": 34, + "seek": 11098, + "start": 119.51, + "end": 119.72, + "text": " mort.", + "tokens": [ + 50790, + 6599, + 13, + 50840 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.639, + "words": [ + { + "text": "mort.", + "start": 119.51, + "end": 119.72, + "confidence": 0.639 + } + ] + }, + { + "id": 35, + "seek": 11098, + "start": 120.04, + "end": 121.24, + "text": " On peut adorer sa bagnure.", + "tokens": [ + 50840, + 1282, + 5977, + 614, + 17618, + 601, + 3411, + 77, + 540, + 13, + 50890 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.615, + "words": [ + { + "text": "On", + "start": 120.04, + "end": 120.22, + "confidence": 0.981 + }, + { + "text": "peut", + "start": 120.22, + "end": 120.36, + "confidence": 0.988 + }, + { + "text": "adorer", + "start": 120.36, + "end": 120.7, + "confidence": 0.859 + }, + { + "text": "sa", + "start": 120.7, + "end": 120.88, + "confidence": 0.918 + }, + { + "text": "bagnure.", + "start": 120.88, + "end": 121.24, + "confidence": 0.314 + } + ] + }, + { + "id": 36, + "seek": 11098, + "start": 121.38, + "end": 123.06, + "text": " On a par besoin pour plein de choses.", + "tokens": [ + 50890, + 1282, + 257, + 971, + 19207, + 2016, + 21088, + 368, + 14488, + 13, + 50972 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.722, + "words": [ + { + "text": "On", + "start": 121.38, + "end": 121.56, + "confidence": 0.897 + }, + { + "text": "a", + "start": 121.56, + "end": 121.64, + "confidence": 0.656 + }, + { + "text": "par", + "start": 121.64, + "end": 121.8, + "confidence": 0.161 + }, + { + "text": "besoin", + "start": 121.8, + "end": 122.12, + "confidence": 0.947 + }, + { + "text": "pour", + "start": 122.12, + "end": 122.5, + "confidence": 0.976 + }, + { + "text": "plein", + "start": 122.5, + "end": 122.72, + "confidence": 0.857 + }, + { + "text": "de", + "start": 122.72, + "end": 122.8, + "confidence": 0.993 + }, + { + "text": "choses.", + "start": 122.8, + "end": 123.06, + "confidence": 0.989 + } + ] + }, + { + "id": 37, + "seek": 11098, + "start": 123.36, + "end": 126.46, + "text": " Et là, le soir, quand on va se coucher, on la laisse.", + "tokens": [ + 50972, + 3790, + 3684, + 11, + 476, + 27105, + 11, + 6932, + 322, + 2773, + 369, + 1384, + 6759, + 11, + 322, + 635, + 30969, + 13, + 51142 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.835, + "words": [ + { + "text": "Et", + "start": 123.36, + "end": 123.5, + "confidence": 0.606 + }, + { + "text": "là,", + "start": 123.5, + "end": 123.76, + "confidence": 0.498 + }, + { + "text": "le", + "start": 123.82, + "end": 124.06, + "confidence": 0.986 + }, + { + "text": "soir,", + "start": 124.06, + "end": 124.42, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 124.84, + "end": 124.96, + "confidence": 0.787 + }, + { + "text": "on", + "start": 124.96, + "end": 125.06, + "confidence": 0.988 + }, + { + "text": "va", + "start": 125.06, + "end": 125.18, + "confidence": 0.966 + }, + { + "text": "se", + "start": 125.18, + "end": 125.26, + "confidence": 0.869 + }, + { + "text": "coucher,", + "start": 125.26, + "end": 125.6, + "confidence": 0.804 + }, + { + "text": "on", + "start": 125.82, + "end": 126.06, + "confidence": 0.98 + }, + { + "text": "la", + "start": 126.06, + "end": 126.2, + "confidence": 0.811 + }, + { + "text": "laisse.", + "start": 126.2, + "end": 126.46, + "confidence": 0.985 + } + ] + }, + { + "id": 38, + "seek": 11098, + "start": 127.06, + "end": 130.1, + "text": " On la pade en la main quand on est colis, qu'on n'a même pas au chiot.", + "tokens": [ + 51142, + 1282, + 635, + 280, + 762, + 465, + 635, + 2135, + 6932, + 322, + 871, + 1173, + 271, + 11, + 421, + 6, + 266, + 297, + 6, + 64, + 5698, + 1736, + 1609, + 417, + 6471, + 13, + 51334 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.542, + "words": [ + { + "text": "On", + "start": 127.06, + "end": 127.32, + "confidence": 0.605 + }, + { + "text": "la", + "start": 127.32, + "end": 127.5, + "confidence": 0.35 + }, + { + "text": "pade", + "start": 127.5, + "end": 127.72, + "confidence": 0.18 + }, + { + "text": "en", + "start": 127.72, + "end": 127.88, + "confidence": 0.775 + }, + { + "text": "la", + "start": 127.88, + "end": 128.04, + "confidence": 0.795 + }, + { + "text": "main", + "start": 128.04, + "end": 128.3, + "confidence": 0.971 + }, + { + "text": "quand", + "start": 128.3, + "end": 128.46, + "confidence": 0.483 + }, + { + "text": "on", + "start": 128.46, + "end": 128.62, + "confidence": 0.994 + }, + { + "text": "est", + "start": 128.62, + "end": 128.74, + "confidence": 0.714 + }, + { + "text": "colis,", + "start": 128.74, + "end": 129.12, + "confidence": 0.501 + }, + { + "text": "qu'on", + "start": 129.22, + "end": 129.32, + "confidence": 0.777 + }, + { + "text": "n'a", + "start": 129.32, + "end": 129.42, + "confidence": 0.439 + }, + { + "text": "même", + "start": 129.42, + "end": 129.56, + "confidence": 0.324 + }, + { + "text": "pas", + "start": 129.56, + "end": 129.74, + "confidence": 1.0 + }, + { + "text": "au", + "start": 129.74, + "end": 129.88, + "confidence": 0.64 + }, + { + "text": "chiot.", + "start": 129.88, + "end": 130.1, + "confidence": 0.514 + } + ] + }, + { + "id": 39, + "seek": 11098, + "start": 130.94, + "end": 135.32, + "text": " On pouvait être émervé par son mome qui occupeait la ligne de téléphone pendant une heure", + "tokens": [ + 51334, + 1282, + 45913, + 7418, + 1136, + 936, + 15797, + 971, + 1872, + 275, + 423, + 1956, + 2678, + 84, + 494, + 1001, + 635, + 34207, + 368, + 47159, + 17338, + 2251, + 30027, + 51574 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.656, + "words": [ + { + "text": "On", + "start": 130.94, + "end": 131.04, + "confidence": 0.99 + }, + { + "text": "pouvait", + "start": 131.04, + "end": 131.28, + "confidence": 0.588 + }, + { + "text": "être", + "start": 131.28, + "end": 131.6, + "confidence": 0.446 + }, + { + "text": "émervé", + "start": 131.6, + "end": 132.24, + "confidence": 0.725 + }, + { + "text": "par", + "start": 132.24, + "end": 132.5, + "confidence": 0.848 + }, + { + "text": "son", + "start": 132.5, + "end": 132.7, + "confidence": 0.445 + }, + { + "text": "mome", + "start": 132.7, + "end": 133.08, + "confidence": 0.255 + }, + { + "text": "qui", + "start": 133.08, + "end": 133.3, + "confidence": 0.911 + }, + { + "text": "occupeait", + "start": 133.3, + "end": 133.74, + "confidence": 0.642 + }, + { + "text": "la", + "start": 133.74, + "end": 133.86, + "confidence": 0.808 + }, + { + "text": "ligne", + "start": 133.86, + "end": 134.02, + "confidence": 0.975 + }, + { + "text": "de", + "start": 134.02, + "end": 134.24, + "confidence": 0.928 + }, + { + "text": "téléphone", + "start": 134.24, + "end": 134.56, + "confidence": 0.978 + }, + { + "text": "pendant", + "start": 134.56, + "end": 134.92, + "confidence": 0.901 + }, + { + "text": "une", + "start": 134.92, + "end": 135.18, + "confidence": 0.811 + }, + { + "text": "heure", + "start": 135.18, + "end": 135.32, + "confidence": 0.477 + } + ] + }, + { + "id": 40, + "seek": 11098, + "start": 135.32, + "end": 137.04, + "text": " chaque soir pour discuter avec un copain.", + "tokens": [ + 51574, + 18920, + 27105, + 2016, + 2983, + 20314, + 4163, + 517, + 2971, + 491, + 13, + 51664 + ], + "temperature": 0.0, + "avg_logprob": -0.44321310444242634, + "compression_ratio": 1.7050847457627119, + "no_speech_prob": 0.15390530228614807, + "confidence": 0.955, + "words": [ + { + "text": "chaque", + "start": 135.32, + "end": 135.6, + "confidence": 0.99 + }, + { + "text": "soir", + "start": 135.6, + "end": 135.8, + "confidence": 0.997 + }, + { + "text": "pour", + "start": 135.8, + "end": 136.0, + "confidence": 0.981 + }, + { + "text": "discuter", + "start": 136.0, + "end": 136.34, + "confidence": 0.882 + }, + { + "text": "avec", + "start": 136.34, + "end": 136.5, + "confidence": 0.993 + }, + { + "text": "un", + "start": 136.5, + "end": 136.62, + "confidence": 0.967 + }, + { + "text": "copain.", + "start": 136.62, + "end": 137.04, + "confidence": 0.953 + } + ] + }, + { + "id": 41, + "seek": 13698, + "start": 137.32, + "end": 141.84, + "text": " Mais ça ne ressemble pas à ce qu'on peut ressentir à voir même mome aujourd'hui.", + "tokens": [ + 50376, + 6313, + 2788, + 408, + 725, + 37227, + 1736, + 1531, + 1769, + 421, + 6, + 266, + 5977, + 24689, + 317, + 347, + 1531, + 10695, + 5698, + 275, + 423, + 14023, + 6, + 10556, + 13, + 50606 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.735, + "words": [ + { + "text": "Mais", + "start": 137.32, + "end": 137.52, + "confidence": 0.984 + }, + { + "text": "ça", + "start": 137.52, + "end": 137.7, + "confidence": 0.877 + }, + { + "text": "ne", + "start": 137.7, + "end": 137.8, + "confidence": 0.672 + }, + { + "text": "ressemble", + "start": 137.8, + "end": 138.22, + "confidence": 0.731 + }, + { + "text": "pas", + "start": 138.22, + "end": 138.76, + "confidence": 0.526 + }, + { + "text": "à", + "start": 138.76, + "end": 138.94, + "confidence": 0.98 + }, + { + "text": "ce", + "start": 138.94, + "end": 139.08, + "confidence": 0.578 + }, + { + "text": "qu'on", + "start": 139.08, + "end": 139.22, + "confidence": 0.96 + }, + { + "text": "peut", + "start": 139.22, + "end": 139.38, + "confidence": 0.547 + }, + { + "text": "ressentir", + "start": 139.38, + "end": 140.06, + "confidence": 0.888 + }, + { + "text": "à", + "start": 140.06, + "end": 140.26, + "confidence": 0.437 + }, + { + "text": "voir", + "start": 140.26, + "end": 140.54, + "confidence": 0.952 + }, + { + "text": "même", + "start": 140.54, + "end": 140.96, + "confidence": 0.297 + }, + { + "text": "mome", + "start": 140.96, + "end": 141.22, + "confidence": 0.496 + }, + { + "text": "aujourd'hui.", + "start": 141.22, + "end": 141.84, + "confidence": 0.961 + } + ] + }, + { + "id": 42, + "seek": 13698, + "start": 141.96, + "end": 145.86, + "text": " Continuellement avec son smartphone dans la main, comme c'était une sorte de estimateur", + "tokens": [ + 50606, + 14674, + 31816, + 4163, + 1872, + 13307, + 2680, + 635, + 2135, + 11, + 5173, + 269, + 6, + 9743, + 2251, + 25559, + 368, + 8017, + 15540, + 50810 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.663, + "words": [ + { + "text": "Continuellement", + "start": 141.96, + "end": 142.84, + "confidence": 0.844 + }, + { + "text": "avec", + "start": 142.84, + "end": 143.24, + "confidence": 0.646 + }, + { + "text": "son", + "start": 143.24, + "end": 143.42, + "confidence": 0.864 + }, + { + "text": "smartphone", + "start": 143.42, + "end": 143.72, + "confidence": 0.447 + }, + { + "text": "dans", + "start": 143.72, + "end": 143.92, + "confidence": 0.587 + }, + { + "text": "la", + "start": 143.92, + "end": 144.0, + "confidence": 0.974 + }, + { + "text": "main,", + "start": 144.0, + "end": 144.3, + "confidence": 0.995 + }, + { + "text": "comme", + "start": 144.42, + "end": 144.56, + "confidence": 0.895 + }, + { + "text": "c'était", + "start": 144.56, + "end": 144.84, + "confidence": 0.596 + }, + { + "text": "une", + "start": 144.84, + "end": 144.96, + "confidence": 0.972 + }, + { + "text": "sorte", + "start": 144.96, + "end": 145.1, + "confidence": 0.658 + }, + { + "text": "de", + "start": 145.1, + "end": 145.4, + "confidence": 0.312 + }, + { + "text": "estimateur", + "start": 145.4, + "end": 145.86, + "confidence": 0.489 + } + ] + }, + { + "id": 43, + "seek": 13698, + "start": 145.96, + "end": 149.0, + "text": " extère de tomber de lâcher à l'éantrénée, ça m'a eu immédiate.", + "tokens": [ + 50810, + 1279, + 4212, + 368, + 2916, + 607, + 368, + 48835, + 6759, + 1531, + 287, + 6, + 526, + 394, + 81, + 3516, + 3856, + 11, + 2788, + 275, + 6, + 64, + 2228, + 3397, + 526, + 4504, + 473, + 13, + 50954 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.416, + "words": [ + { + "text": "extère", + "start": 145.96, + "end": 146.4, + "confidence": 0.371 + }, + { + "text": "de", + "start": 146.4, + "end": 146.54, + "confidence": 0.254 + }, + { + "text": "tomber", + "start": 146.54, + "end": 146.7, + "confidence": 0.102 + }, + { + "text": "de", + "start": 146.7, + "end": 146.9, + "confidence": 0.454 + }, + { + "text": "lâcher", + "start": 146.9, + "end": 147.34, + "confidence": 0.75 + }, + { + "text": "à", + "start": 147.34, + "end": 147.46, + "confidence": 0.444 + }, + { + "text": "l'éantrénée,", + "start": 147.46, + "end": 147.9, + "confidence": 0.491 + }, + { + "text": "ça", + "start": 148.0, + "end": 148.02, + "confidence": 0.799 + }, + { + "text": "m'a", + "start": 148.02, + "end": 148.26, + "confidence": 0.531 + }, + { + "text": "eu", + "start": 148.26, + "end": 148.44, + "confidence": 0.136 + }, + { + "text": "immédiate.", + "start": 148.44, + "end": 149.0, + "confidence": 0.505 + } + ] + }, + { + "id": 44, + "seek": 13698, + "start": 149.08, + "end": 151.98, + "text": " Bon, je dis ça pour le mome, mais évidemment, va là, bon aussi.", + "tokens": [ + 50954, + 7368, + 11, + 1506, + 717, + 2788, + 2016, + 476, + 275, + 423, + 11, + 2420, + 24724, + 11, + 2773, + 3684, + 11, + 4428, + 6212, + 13, + 51126 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.564, + "words": [ + { + "text": "Bon,", + "start": 149.08, + "end": 149.28, + "confidence": 0.375 + }, + { + "text": "je", + "start": 149.34, + "end": 149.4, + "confidence": 0.471 + }, + { + "text": "dis", + "start": 149.4, + "end": 149.46, + "confidence": 0.324 + }, + { + "text": "ça", + "start": 149.46, + "end": 149.64, + "confidence": 0.952 + }, + { + "text": "pour", + "start": 149.64, + "end": 149.8, + "confidence": 0.963 + }, + { + "text": "le", + "start": 149.8, + "end": 149.92, + "confidence": 0.992 + }, + { + "text": "mome,", + "start": 149.92, + "end": 150.12, + "confidence": 0.619 + }, + { + "text": "mais", + "start": 150.46, + "end": 150.52, + "confidence": 0.694 + }, + { + "text": "évidemment,", + "start": 150.52, + "end": 151.12, + "confidence": 0.776 + }, + { + "text": "va", + "start": 151.26, + "end": 151.36, + "confidence": 0.388 + }, + { + "text": "là,", + "start": 151.36, + "end": 151.46, + "confidence": 0.59 + }, + { + "text": "bon", + "start": 151.6, + "end": 151.68, + "confidence": 0.825 + }, + { + "text": "aussi.", + "start": 151.68, + "end": 151.98, + "confidence": 0.162 + } + ] + }, + { + "id": 45, + "seek": 13698, + "start": 152.64, + "end": 154.64, + "text": " Donc, rapport immédiate d'accord.", + "tokens": [ + 51126, + 7477, + 11, + 18018, + 3397, + 526, + 4504, + 473, + 274, + 6, + 19947, + 13, + 51248 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.861, + "words": [ + { + "text": "Donc,", + "start": 152.64, + "end": 153.08, + "confidence": 0.982 + }, + { + "text": "rapport", + "start": 153.16, + "end": 153.58, + "confidence": 0.979 + }, + { + "text": "immédiate", + "start": 153.58, + "end": 154.28, + "confidence": 0.841 + }, + { + "text": "d'accord.", + "start": 154.28, + "end": 154.64, + "confidence": 0.816 + } + ] + }, + { + "id": 46, + "seek": 13698, + "start": 155.66, + "end": 158.36, + "text": " Mais pourquoi, à ton impression qu'on en sortira, j'amé?", + "tokens": [ + 51248, + 6313, + 19934, + 11, + 1531, + 2952, + 9995, + 421, + 6, + 266, + 465, + 26906, + 64, + 11, + 361, + 6, + 335, + 526, + 30, + 51440 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.709, + "words": [ + { + "text": "Mais", + "start": 155.66, + "end": 155.88, + "confidence": 0.984 + }, + { + "text": "pourquoi,", + "start": 155.88, + "end": 156.28, + "confidence": 0.897 + }, + { + "text": "à", + "start": 156.38, + "end": 156.58, + "confidence": 0.837 + }, + { + "text": "ton", + "start": 156.58, + "end": 156.7, + "confidence": 0.953 + }, + { + "text": "impression", + "start": 156.7, + "end": 157.06, + "confidence": 0.938 + }, + { + "text": "qu'on", + "start": 157.06, + "end": 157.28, + "confidence": 0.89 + }, + { + "text": "en", + "start": 157.28, + "end": 157.4, + "confidence": 0.801 + }, + { + "text": "sortira,", + "start": 157.4, + "end": 157.88, + "confidence": 0.576 + }, + { + "text": "j'amé?", + "start": 157.96, + "end": 158.36, + "confidence": 0.464 + } + ] + }, + { + "id": 47, + "seek": 13698, + "start": 159.16, + "end": 163.32, + "text": " Et puis, il faut en remettre la faute sur les gens qui ont créé cette critique merveilleux", + "tokens": [ + 51440, + 3790, + 9093, + 11, + 1930, + 8487, + 465, + 890, + 40681, + 635, + 2050, + 1169, + 1022, + 1512, + 10668, + 1956, + 6592, + 15609, + 526, + 5550, + 25673, + 3551, + 303, + 3409, + 2449, + 51678 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.714, + "words": [ + { + "text": "Et", + "start": 159.16, + "end": 159.32, + "confidence": 0.832 + }, + { + "text": "puis,", + "start": 159.32, + "end": 159.46, + "confidence": 0.494 + }, + { + "text": "il", + "start": 159.62, + "end": 159.64, + "confidence": 0.594 + }, + { + "text": "faut", + "start": 159.64, + "end": 159.66, + "confidence": 0.951 + }, + { + "text": "en", + "start": 159.66, + "end": 159.78, + "confidence": 0.937 + }, + { + "text": "remettre", + "start": 159.78, + "end": 160.14, + "confidence": 0.984 + }, + { + "text": "la", + "start": 160.14, + "end": 160.42, + "confidence": 0.602 + }, + { + "text": "faute", + "start": 160.42, + "end": 160.68, + "confidence": 0.59 + }, + { + "text": "sur", + "start": 160.68, + "end": 160.98, + "confidence": 0.966 + }, + { + "text": "les", + "start": 160.98, + "end": 161.24, + "confidence": 0.828 + }, + { + "text": "gens", + "start": 161.24, + "end": 161.46, + "confidence": 0.986 + }, + { + "text": "qui", + "start": 161.46, + "end": 161.6, + "confidence": 0.98 + }, + { + "text": "ont", + "start": 161.6, + "end": 161.68, + "confidence": 0.95 + }, + { + "text": "créé", + "start": 161.68, + "end": 162.3, + "confidence": 0.95 + }, + { + "text": "cette", + "start": 162.3, + "end": 162.48, + "confidence": 0.908 + }, + { + "text": "critique", + "start": 162.48, + "end": 162.72, + "confidence": 0.032 + }, + { + "text": "merveilleux", + "start": 162.72, + "end": 163.32, + "confidence": 0.839 + } + ] + }, + { + "id": 48, + "seek": 13698, + "start": 163.32, + "end": 165.34, + "text": " et diabolique et diabolique par que merveilleux.", + "tokens": [ + 51678, + 1030, + 1026, + 14923, + 1925, + 1030, + 1026, + 14923, + 1925, + 971, + 631, + 3551, + 303, + 3409, + 2449, + 13, + 51778 + ], + "temperature": 0.0, + "avg_logprob": -0.533746434354234, + "compression_ratio": 1.6231454005934718, + "no_speech_prob": 0.1096271350979805, + "confidence": 0.708, + "words": [ + { + "text": "et", + "start": 163.32, + "end": 163.44, + "confidence": 0.945 + }, + { + "text": "diabolique", + "start": 163.44, + "end": 163.86, + "confidence": 0.573 + }, + { + "text": "et", + "start": 163.86, + "end": 163.94, + "confidence": 0.238 + }, + { + "text": "diabolique", + "start": 163.94, + "end": 164.38, + "confidence": 0.892 + }, + { + "text": "par", + "start": 164.38, + "end": 164.62, + "confidence": 0.544 + }, + { + "text": "que", + "start": 164.62, + "end": 164.82, + "confidence": 0.529 + }, + { + "text": "merveilleux.", + "start": 164.82, + "end": 165.34, + "confidence": 0.981 + } + ] + }, + { + "id": 49, + "seek": 16526, + "start": 166.9, + "end": 168.8, + "text": " Les économistes parlent de dépendance du santé.", + "tokens": [ + 50410, + 6965, + 31171, + 22368, + 13734, + 317, + 368, + 45768, + 719, + 1581, + 30068, + 13, + 50542 + ], + "temperature": 0.0, + "avg_logprob": -0.6644251346588135, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.1915542334318161, + "confidence": 0.765, + "words": [ + { + "text": "Les", + "start": 166.9, + "end": 167.06, + "confidence": 0.699 + }, + { + "text": "économistes", + "start": 167.06, + "end": 167.52, + "confidence": 0.917 + }, + { + "text": "parlent", + "start": 167.52, + "end": 167.78, + "confidence": 0.793 + }, + { + "text": "de", + "start": 167.78, + "end": 167.84, + "confidence": 0.813 + }, + { + "text": "dépendance", + "start": 167.84, + "end": 168.36, + "confidence": 0.698 + }, + { + "text": "du", + "start": 168.36, + "end": 168.5, + "confidence": 0.963 + }, + { + "text": "santé.", + "start": 168.5, + "end": 168.8, + "confidence": 0.489 + } + ] + }, + { + "id": 50, + "seek": 16526, + "start": 168.82, + "end": 173.42, + "text": " Ces vidéos, en fait, on est un santé qui a été étabis, un soit mon termine, en marchand dessus,", + "tokens": [ + 50542, + 28414, + 25417, + 11, + 465, + 3887, + 11, + 322, + 871, + 517, + 30068, + 1956, + 257, + 8862, + 4823, + 455, + 271, + 11, + 517, + 12703, + 1108, + 1433, + 533, + 11, + 465, + 8368, + 474, + 30677, + 11, + 50784 + ], + "temperature": 0.0, + "avg_logprob": -0.6644251346588135, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.1915542334318161, + "confidence": 0.474, + "words": [ + { + "text": "Ces", + "start": 168.82, + "end": 169.14, + "confidence": 0.443 + }, + { + "text": "vidéos,", + "start": 169.14, + "end": 169.4, + "confidence": 0.455 + }, + { + "text": "en", + "start": 169.44, + "end": 169.64, + "confidence": 0.744 + }, + { + "text": "fait,", + "start": 169.64, + "end": 169.66, + "confidence": 0.976 + }, + { + "text": "on", + "start": 169.72, + "end": 169.74, + "confidence": 0.303 + }, + { + "text": "est", + "start": 169.74, + "end": 169.9, + "confidence": 0.814 + }, + { + "text": "un", + "start": 169.9, + "end": 170.48, + "confidence": 0.367 + }, + { + "text": "santé", + "start": 170.48, + "end": 170.8, + "confidence": 0.951 + }, + { + "text": "qui", + "start": 170.8, + "end": 170.92, + "confidence": 0.835 + }, + { + "text": "a", + "start": 170.92, + "end": 171.02, + "confidence": 0.819 + }, + { + "text": "été", + "start": 171.02, + "end": 171.12, + "confidence": 0.994 + }, + { + "text": "étabis,", + "start": 171.12, + "end": 171.5, + "confidence": 0.343 + }, + { + "text": "un", + "start": 171.76, + "end": 171.9, + "confidence": 0.236 + }, + { + "text": "soit", + "start": 171.9, + "end": 172.16, + "confidence": 0.31 + }, + { + "text": "mon", + "start": 172.16, + "end": 172.36, + "confidence": 0.205 + }, + { + "text": "termine,", + "start": 172.36, + "end": 172.72, + "confidence": 0.4 + }, + { + "text": "en", + "start": 172.8, + "end": 172.82, + "confidence": 0.309 + }, + { + "text": "marchand", + "start": 172.82, + "end": 173.14, + "confidence": 0.816 + }, + { + "text": "dessus,", + "start": 173.14, + "end": 173.42, + "confidence": 0.201 + } + ] + }, + { + "id": 51, + "seek": 16526, + "start": 173.86, + "end": 177.42, + "text": " soit des finissants débordes, des finissants, une signalétique.", + "tokens": [ + 50784, + 12703, + 730, + 962, + 891, + 1719, + 36529, + 765, + 279, + 11, + 730, + 962, + 891, + 1719, + 11, + 2251, + 6358, + 42379, + 13, + 50974 + ], + "temperature": 0.0, + "avg_logprob": -0.6644251346588135, + "compression_ratio": 1.4761904761904763, + "no_speech_prob": 0.1915542334318161, + "confidence": 0.582, + "words": [ + { + "text": "soit", + "start": 173.86, + "end": 174.4, + "confidence": 0.993 + }, + { + "text": "des", + "start": 174.4, + "end": 175.2, + "confidence": 0.759 + }, + { + "text": "finissants", + "start": 175.2, + "end": 175.58, + "confidence": 0.436 + }, + { + "text": "débordes,", + "start": 175.58, + "end": 175.98, + "confidence": 0.467 + }, + { + "text": "des", + "start": 176.14, + "end": 176.18, + "confidence": 0.222 + }, + { + "text": "finissants,", + "start": 176.18, + "end": 176.62, + "confidence": 0.961 + }, + { + "text": "une", + "start": 176.66, + "end": 176.84, + "confidence": 0.75 + }, + { + "text": "signalétique.", + "start": 176.84, + "end": 177.42, + "confidence": 0.566 + } + ] + } + ], + "language": "fr" +} \ No newline at end of file diff --git a/tests/expected/verbose.cpu/accurate.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/accurate.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..532d5eaf637389f5cfdbecc58823d2ccd29cfacb --- /dev/null +++ b/tests/expected/verbose.cpu/accurate.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.460 --> 00:00.780] Боже +[00:00.780 --> 00:01.980] улыл! +[00:02.020 --> 00:02.260] Таков +[00:02.260 --> 00:02.400] уже +[00:02.400 --> 00:02.460] на +[00:02.460 --> 00:02.800] меня! +[00:32.980 --> 00:33.280] Боже +[00:33.280 --> 00:33.800] улыл! +[00:34.400 --> 00:34.800] Эскому +[00:34.800 --> 00:34.960] за +[00:34.960 --> 00:35.260] меня! diff --git a/tests/expected/verbose.cpu/accurate.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/accurate.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..bbe59141c7bb85cc4567ff0ac51949b691aea753 --- /dev/null +++ b/tests/expected/verbose.cpu/accurate.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,13 @@ +[00:00.460 --> 00:00.680] Bonjour! +[00:01.860 --> 00:02.140] Est-ce +[00:02.140 --> 00:02.180] que +[00:02.180 --> 00:02.340] vous +[00:02.340 --> 00:02.480] allez +[00:02.480 --> 00:02.800] bien? +[00:32.980 --> 00:33.160] Bonjour! +[00:34.400 --> 00:34.610] Bonjour! +[00:34.610 --> 00:34.660] Est-ce +[00:34.660 --> 00:34.720] que +[00:34.720 --> 00:34.860] vous +[00:34.860 --> 00:35.000] allez +[00:35.000 --> 00:35.340] bien? diff --git a/tests/expected/verbose.cpu/efficient.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/efficient.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..532d5eaf637389f5cfdbecc58823d2ccd29cfacb --- /dev/null +++ b/tests/expected/verbose.cpu/efficient.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.460 --> 00:00.780] Боже +[00:00.780 --> 00:01.980] улыл! +[00:02.020 --> 00:02.260] Таков +[00:02.260 --> 00:02.400] уже +[00:02.400 --> 00:02.460] на +[00:02.460 --> 00:02.800] меня! +[00:32.980 --> 00:33.280] Боже +[00:33.280 --> 00:33.800] улыл! +[00:34.400 --> 00:34.800] Эскому +[00:34.800 --> 00:34.960] за +[00:34.960 --> 00:35.260] меня! diff --git a/tests/expected/verbose.cpu/efficient.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose.cpu/efficient.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..bbe59141c7bb85cc4567ff0ac51949b691aea753 --- /dev/null +++ b/tests/expected/verbose.cpu/efficient.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,13 @@ +[00:00.460 --> 00:00.680] Bonjour! +[00:01.860 --> 00:02.140] Est-ce +[00:02.140 --> 00:02.180] que +[00:02.180 --> 00:02.340] vous +[00:02.340 --> 00:02.480] allez +[00:02.480 --> 00:02.800] bien? +[00:32.980 --> 00:33.160] Bonjour! +[00:34.400 --> 00:34.610] Bonjour! +[00:34.610 --> 00:34.660] Est-ce +[00:34.660 --> 00:34.720] que +[00:34.720 --> 00:34.860] vous +[00:34.860 --> 00:35.000] allez +[00:35.000 --> 00:35.340] bien? diff --git a/tests/expected/verbose.cpu/hf_bonjour.wav.stdout b/tests/expected/verbose.cpu/hf_bonjour.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..99635535fd87302067c97ce13d42f2988dacdd83 --- /dev/null +++ b/tests/expected/verbose.cpu/hf_bonjour.wav.stdout @@ -0,0 +1,3 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: French +[00:00.120 --> 00:00.640] Bonjour. diff --git a/tests/expected/verbose/accurate.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/accurate.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..6bfee4b97e33192bc46f69994bddcd817c220226 --- /dev/null +++ b/tests/expected/verbose/accurate.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,10 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: Russian +[00:00.440 --> 00:00.860] Боже +[00:00.860 --> 00:01.880] улыл! +[00:01.880 --> 00:02.300] Эскому +[00:02.300 --> 00:03.140] зарегиан! +[00:32.980 --> 00:33.360] Боже +[00:33.360 --> 00:34.020] улыл! +[00:34.420 --> 00:34.840] Эскому +[00:34.840 --> 00:35.720] зарегиан! diff --git a/tests/expected/verbose/accurate.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/accurate.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..ec0d882ef119c14b9da62584ee57728eab7691fd --- /dev/null +++ b/tests/expected/verbose/accurate.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.440 --> 00:01.440] Bonjour ! +[00:01.880 --> 00:02.200] Est-ce +[00:02.200 --> 00:02.240] que +[00:02.240 --> 00:02.360] vous +[00:02.360 --> 00:02.560] allez +[00:02.560 --> 00:03.120] bien ? +[00:32.980 --> 00:33.480] Bonjour ! +[00:34.420 --> 00:34.760] Esque +[00:34.760 --> 00:34.900] vous +[00:34.900 --> 00:35.080] allez +[00:35.080 --> 00:35.480] bien ! diff --git a/tests/expected/verbose/efficient.auto_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/efficient.auto_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..6bfee4b97e33192bc46f69994bddcd817c220226 --- /dev/null +++ b/tests/expected/verbose/efficient.auto_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,10 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: Russian +[00:00.440 --> 00:00.860] Боже +[00:00.860 --> 00:01.880] улыл! +[00:01.880 --> 00:02.300] Эскому +[00:02.300 --> 00:03.140] зарегиан! +[00:32.980 --> 00:33.360] Боже +[00:33.360 --> 00:34.020] улыл! +[00:34.420 --> 00:34.840] Эскому +[00:34.840 --> 00:35.720] зарегиан! diff --git a/tests/expected/verbose/efficient.fr_bonjour_vous_allez_bien.mp3.stdout b/tests/expected/verbose/efficient.fr_bonjour_vous_allez_bien.mp3.stdout new file mode 100644 index 0000000000000000000000000000000000000000..ec0d882ef119c14b9da62584ee57728eab7691fd --- /dev/null +++ b/tests/expected/verbose/efficient.fr_bonjour_vous_allez_bien.mp3.stdout @@ -0,0 +1,11 @@ +[00:00.440 --> 00:01.440] Bonjour ! +[00:01.880 --> 00:02.200] Est-ce +[00:02.200 --> 00:02.240] que +[00:02.240 --> 00:02.360] vous +[00:02.360 --> 00:02.560] allez +[00:02.560 --> 00:03.120] bien ? +[00:32.980 --> 00:33.480] Bonjour ! +[00:34.420 --> 00:34.760] Esque +[00:34.760 --> 00:34.900] vous +[00:34.900 --> 00:35.080] allez +[00:35.080 --> 00:35.480] bien ! diff --git a/tests/expected/verbose/hf_bonjour.wav.stdout b/tests/expected/verbose/hf_bonjour.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..99635535fd87302067c97ce13d42f2988dacdd83 --- /dev/null +++ b/tests/expected/verbose/hf_bonjour.wav.stdout @@ -0,0 +1,3 @@ +Detecting language using up to the first 30 seconds. Use `--language` to specify the language +Detected language: French +[00:00.120 --> 00:00.640] Bonjour. diff --git a/tests/expected/verbose/vad_words.wav.stdout b/tests/expected/verbose/vad_words.wav.stdout new file mode 100644 index 0000000000000000000000000000000000000000..f585abe377f6c53c6c58ea0cd1ff93a5271d2f64 --- /dev/null +++ b/tests/expected/verbose/vad_words.wav.stdout @@ -0,0 +1,8 @@ +[00:00.140 --> 00:01.320] Settlement. +[00:03.020 --> 00:03.600] Kentucky. +[00:05.170 --> 00:06.130] Causing. +[00:08.040 --> 00:08.940] Damage. +[00:10.890 --> 00:11.510] President. +[00:13.730 --> 00:14.790] Expansion. +[00:16.980 --> 00:17.600] Hospital. +[00:20.410 --> 00:21.430] Devastated. diff --git a/tests/json_schema.json b/tests/json_schema.json new file mode 100644 index 0000000000000000000000000000000000000000..63dfcc6e7ac6247c85d515442d144d744109fc12 --- /dev/null +++ b/tests/json_schema.json @@ -0,0 +1,41 @@ +{ + "type": "object", + "properties": { + "text": {"type": "string"}, + "segments": { + "type": "array", + "items": { + "type": "object", + "properties": { + "id": {"type": "integer", "minimum":0}, + "start": {"type": "number", "minimum":0}, + "end": {"type": "number", "minimum":0}, + "text": {"type": "string"}, + "tokens": { + "type": "array", + "items": {"type": "integer", "minimum": 0, "maximum": 51864} + }, + "temperature": {"type": "number", "minimum":0, "maximum":1}, + "avg_logprob": {"type": "number", "maximum":0}, + "compression_ratio": {"type": "number", "minimum":0}, + "no_speech_prob": {"type": "number", "minimum":0, "maximum":1}, + "confidence": {"type": "number", "minimum":0, "maximum":1}, + "words": { + "type": "array", + "items": { + "type": "object", + "properties": { + "text": {"type": "string"}, + "start": {"type": "number", "minimum":0}, + "end": {"type": "number", "minimum":0}, + "confidence": {"type": "number", "minimum":0, "maximum":1} + } + } + } + } + }, + "minItems": 0, + "uniqueItems": true + } + } +} diff --git a/tests/run_tests.py b/tests/run_tests.py new file mode 100644 index 0000000000000000000000000000000000000000..4a9da66d22f2f52093f7c07796d7f939abcccd85 --- /dev/null +++ b/tests/run_tests.py @@ -0,0 +1,45 @@ +import sys +import unittest + +from test_transcribe import * +import test_transcribe + +if __name__ == '__main__': + + # Handle several ways of generating expected outputs + if "--long" in sys.argv: + test_transcribe.SKIP_LONG_TEST_IF_CPU = False + sys.argv.remove("--long") + if "--generate" in sys.argv: + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate") + if "--generate_device" in sys.argv: + test_transcribe.GENERATE_DEVICE_DEPENDENT = True + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate_device") + if "--generate_new" in sys.argv: + test_transcribe.GENERATE_NEW_ONLY = True + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate_new") + if "--generate_all" in sys.argv: + test_transcribe.GENERATE_ALL = True + test_transcribe.FAIL_IF_REFERENCE_NOT_FOUND = False + sys.argv.remove("--generate_all") + + # Pass options to whisper_timestamped CLI + args = sys.argv[1:] + for i, arg in enumerate(args): + if arg not in [ + "-h", "--help", + "-v", "--verbose", + "--locals", + "-q", "--quiet", + "-f", "--failfast", + "-c", "--catch", + "-b", "--buffer", + "-k", + ] and (i==0 or args[i-1] not in ["-k"]) and (arg.startswith("-") or (i>0 and args[i-1].startswith("-"))): + test_transcribe.CMD_OPTIONS.append(arg) + sys.argv.remove(arg) + + unittest.main() diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py new file mode 100644 index 0000000000000000000000000000000000000000..45e5b874590f3e9b87b7a6ddb0410a4889913b77 --- /dev/null +++ b/tests/test_transcribe.py @@ -0,0 +1,853 @@ +__author__ = "Jérôme Louradour" +__credits__ = ["Jérôme Louradour"] +__license__ = "GPLv3" + +import unittest +import sys +import os +import subprocess +import shutil +import tempfile +import json +import torch +import jsonschema + +FAIL_IF_REFERENCE_NOT_FOUND = True +GENERATE_NEW_ONLY = False +GENERATE_ALL = False +GENERATE_DEVICE_DEPENDENT = False +SKIP_LONG_TEST_IF_CPU = True +CMD_OPTIONS = [] + + +class TestHelper(unittest.TestCase): + + def skipLongTests(self): + return SKIP_LONG_TEST_IF_CPU and not torch.cuda.is_available() + + def setUp(self): + self.maxDiff = None + self.createdReferences = [] + + def tearDown(self): + if GENERATE_ALL or GENERATE_NEW_ONLY or not FAIL_IF_REFERENCE_NOT_FOUND or GENERATE_DEVICE_DEPENDENT: + if len(self.createdReferences) > 0: + print("WARNING: Created references: " + + ", ".join(self.createdReferences).replace(self.get_data_path()+"/", "")) + else: + self.assertEqual(self.createdReferences, [], "Created references: " + + ", ".join(self.createdReferences).replace(self.get_data_path()+"/", "")) + + def get_main_path(self, fn=None, check=False): + return self._get_path("whisper_timestamped", fn, check=check) + + def get_output_path(self, fn=None): + if fn == None: + return tempfile.gettempdir() + return os.path.join(tempfile.gettempdir(), fn + self._extra_cmd_options()) + + def get_expected_path(self, fn=None, check=False): + return self._get_path("tests/expected" + self._extra_cmd_options(), fn, check=check) + + def _extra_cmd_options(self): + s = "".join([f.replace("-","").strip() for f in CMD_OPTIONS]) + if s: + return "." + s + return "" + + def get_data_files(self, files=None, excluded_by_default=["apollo11.mp3", "music.mp4", "arabic.mp3", "japanese.mp3", "empty.wav", "words.wav"]): + if files == None: + files = os.listdir(self.get_data_path()) + files = [f for f in files if f not in excluded_by_default and not f.endswith("json")] + files = sorted(files) + return [self.get_data_path(fn) for fn in files] + + def get_generated_files(self, input_filename, output_path, extensions): + for ext in extensions: + yield os.path.join(output_path, os.path.basename(input_filename) + "." + ext.lstrip(".")) + + def main_script(self, pyscript = "transcribe.py", exename = "whisper_timestamped"): + main_script = self.get_main_path(pyscript, check=False) + if not os.path.exists(main_script): + main_script = exename + return main_script + + def assertRun(self, cmd): + if isinstance(cmd, str): + return self.assertRun(cmd.split()) + curdir = os.getcwd() + os.chdir(tempfile.gettempdir()) + if cmd[0].endswith(".py"): + cmd = [sys.executable] + cmd + print("Running:", " ".join(cmd)) + p = subprocess.Popen(cmd, + # Otherwise ".local" path might be missing + env=dict( + os.environ, PYTHONPATH=os.pathsep.join(sys.path)), + stdout=subprocess.PIPE, stderr=subprocess.PIPE + ) + os.chdir(curdir) + (stdout, stderr) = p.communicate() + self.assertEqual(p.returncode, 0, msg=stderr.decode("utf-8")) + return (stdout.decode("utf-8"), stderr.decode("utf-8")) + + def assertNonRegression(self, content, reference, string_is_file=True): + """ + Check that a file/folder is the same as a reference file/folder. + """ + if isinstance(content, dict): + # Make a temporary file + with tempfile.NamedTemporaryFile(mode="w", suffix=".json", encoding="utf8", delete=False) as f: + json.dump(content, f, indent=2, ensure_ascii=False) + content = f.name + res = self.assertNonRegression(f.name, reference) + os.remove(f.name) + return res + elif not isinstance(content, str): + raise ValueError(f"Invalid content type: {type(content)}") + + if not string_is_file: + with tempfile.NamedTemporaryFile(mode="w", suffix=".txt", encoding="utf8", delete=False) as f: + f.write(content) + content = f.name + res = self.assertNonRegression(f.name, reference) + os.remove(f.name) + return res + + self.assertTrue(os.path.exists(content), f"Missing file: {content}") + is_file = os.path.isfile(reference) if os.path.exists(reference) else os.path.isfile(content) + + reference = self.get_expected_path( + reference, check=FAIL_IF_REFERENCE_NOT_FOUND) + if not os.path.exists(reference) or ((GENERATE_ALL or GENERATE_DEVICE_DEPENDENT) and reference not in self.createdReferences): + dirname = os.path.dirname(reference) + if not os.path.isdir(dirname): + os.makedirs(dirname) + if is_file: + shutil.copyfile(content, reference) + else: + shutil.copytree(content, reference) + self.createdReferences.append(reference) + + if is_file: + self.assertTrue(os.path.isfile(content)) + self._check_file_non_regression(content, reference) + else: + self.assertTrue(os.path.isdir(content)) + for root, dirs, files in os.walk(content): + for f in files: + f_ref = os.path.join(reference, f) + self.assertTrue(os.path.isfile(f_ref), + f"Additional file: {f}") + self._check_file_non_regression( + os.path.join(root, f), f_ref) + for root, dirs, files in os.walk(reference): + for f in files: + f = os.path.join(content, f) + self.assertTrue(os.path.isfile(f), f"Missing file: {f}") + + def get_data_path(self, fn=None, check=True): + return self._get_path("tests/data", fn, check) + + def _get_path(self, prefix, fn=None, check=True): + path = os.path.join( + os.path.dirname(os.path.dirname(__file__)), + prefix + ) + if fn: + path = os.path.join(path, fn) + if check: + self.assertTrue(os.path.exists(path), f"Cannot find {path}") + return path + + def _check_file_non_regression(self, file, reference): + if file.endswith(".json"): + with open(file) as f: + content = json.load(f) + with open(reference) as f: + reference_content = json.load(f) + if "language" in content and "language" in reference_content: + content["language"] = self.norm_language(content["language"]) + reference_content["language"] = self.norm_language(reference_content["language"]) + self.assertClose(content, reference_content, + msg=f"File {file} does not match reference {reference}") + return + with open(file) as f: + content = f.readlines() + with open(reference) as f: + reference_content = f.readlines() + self.assertEqual(content, reference_content, + msg=f"File {file} does not match reference {reference}") + + def assertClose(self, obj1, obj2, msg=None): + return self.assertEqual(self.loose(obj1), self.loose(obj2), msg=msg) + + def loose(self, obj): + # Return an approximative value of an object + if isinstance(obj, list): + return [self.loose(a) for a in obj] + if isinstance(obj, float): + f = round(obj, 1) + return 0.0 if f == -0.0 else f + if isinstance(obj, dict): + return {k: self.loose(v) for k, v in obj.items()} + if isinstance(obj, tuple): + return tuple(self.loose(list(obj))) + if isinstance(obj, set): + return self.loose(list(obj), "set") + return obj + + def get_audio_duration(self, audio_file): + # Get the duration in sec *without introducing additional dependencies* + import whisper + return len(whisper.load_audio(audio_file)) / whisper.audio.SAMPLE_RATE + + def get_device_str(self): + import torch + return "cpu" if not torch.cuda.is_available() else "cuda" + + def norm_language(self, language): + # Cheap custom stuff to avoid importing everything + return { + "japanese": "ja", + }.get(language.lower(), language) + + +class TestHelperCli(TestHelper): + + json_schema = None + + def _test_cli_(self, opts, name, files=None, extensions=["words.json"], prefix=None, one_per_call=True, device_specific=None): + """ + Test command line + opts: list of options + name: name of the test + files: list of files to process + extensions: list of extensions to check, or None to test the stdout + prefix: prefix to add to the reference files + one_per_call: if True, each file is processed separately, otherwise all files are processed by a single process + """ + + opts = opts + CMD_OPTIONS + + output_dir = self.get_output_path(name) + + input_filenames = self.get_data_files(files) + + for i, input_filename in enumerate(input_filenames): + + # Butterfly effect: Results are different depending on the device for long files + duration = self.get_audio_duration(input_filename) + if device_specific is None: + device_dependent = duration > 60 or (duration > 30 and "tiny_fr" in name) or ("empty" in input_filename and "medium_auto" in name) + else: + device_dependent = device_specific + name_ = name + if device_dependent and self.get_device_str() != "cuda": + name_ += f".{self.get_device_str()}" + + def ref_name(output_filename): + return name_ + "/" + (f"{prefix}_" if prefix else "") + os.path.basename(output_filename) + generic_name = ref_name(input_filename + ".*") + + if GENERATE_DEVICE_DEPENDENT and not device_dependent: + print("Skipping non-regression test", generic_name) + continue + + if GENERATE_NEW_ONLY and min([os.path.exists(self.get_expected_path(ref_name(output_filename))) + for output_filename in self.get_generated_files(input_filename, output_dir, extensions=extensions)] + ): + print("Skipping non-regression test", generic_name) + continue + + print("Running non-regression test", generic_name) + + if one_per_call or i == 0: + if one_per_call: + (stdout, stderr) = self.assertRun([self.main_script(), input_filename, "--output_dir", output_dir, *opts]) + else: + (stdout, stderr) = self.assertRun([self.main_script(), *input_filenames, "--output_dir", output_dir, *opts]) + print(stdout) + print(stderr) + + output_json = self.get_generated_files(input_filename, output_dir, extensions=["words.json"]).__next__() + if os.path.isfile(output_json): + self.check_json(output_json) + + if extensions is None: + output_filename = list(self.get_generated_files(input_filename, output_dir, extensions=["stdout"]))[0] + self.assertNonRegression(stdout, ref_name(output_filename), string_is_file=False) + else: + for output_filename in self.get_generated_files(input_filename, output_dir, extensions=extensions): + self.assertNonRegression(output_filename, ref_name(output_filename)) + + + shutil.rmtree(output_dir, ignore_errors=True) + + def check_json(self, json_file): + with open(json_file) as f: + content = json.load(f) + + if self.json_schema is None: + schema_file = os.path.join(os.path.dirname(__file__), "json_schema.json") + self.assertTrue(os.path.isfile(schema_file), msg=f"Schema file {schema_file} not found") + self.json_schema = json.load(open(schema_file)) + + jsonschema.validate(instance=content, schema=self.json_schema) + + + +class TestTranscribeTiny(TestHelperCli): + + def test_cli_tiny_auto(self): + self._test_cli_( + ["--model", "tiny"], + "tiny_auto", + ) + + def test_cli_tiny_fr(self): + self._test_cli_( + ["--model", "tiny", "--language", "fr"], + "tiny_fr", + ) + + +class TestTranscribeMedium(TestHelperCli): + + def test_cli_medium_auto(self): + self._test_cli_( + ["--model", "medium"], + "medium_auto", + ) + + def test_cli_medium_fr(self): + self._test_cli_( + ["--model", "medium", "--language", "fr"], + "medium_fr", + ) + + +class TestTranscribeNaive(TestHelperCli): + + def test_naive(self): + + self._test_cli_( + ["--model", "small", "--language", "en", "--efficient", "--naive"], + "naive", + files=["apollo11.mp3"], + prefix="naive", + ) + + self._test_cli_( + ["--model", "small", "--language", "en", "--accurate"], + "naive", + files=["apollo11.mp3"], + prefix="accurate", + ) + + def test_stucked_segments(self): + self._test_cli_( + ["--model", "tiny"], + "corner_cases", + files=["apollo11.mp3"], + prefix="accurate.tiny", + ) + + +class TestTranscribeCornerCases(TestHelperCli): + + def test_stucked_lm(self): + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "small", "--language", "en", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="stucked_lm", + ) + + def test_punctuation_only(self): + + # When there is only a punctuation detected in a segment, it could cause issue #24 + self._test_cli_( + ["--model", "medium.en", "--efficient", "--punctuations", "False"], + "corner_cases", + files=["empty.wav"], + prefix="issue24", + ) + + def test_temperature(self): + + self._test_cli_( + ["--model", "small", "--language", "English", + "--condition", "False", "--temperature", "0.1", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="random.nocond", + ) + + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "small", "--language", "en", "--temperature", "0.2", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="random", + ) + + def test_not_conditioned(self): + + if not os.path.exists(self.get_data_path("music.mp4", check=False)): + return + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "medium", "--language", "en", "--condition", "False", "--efficient"], + "corner_cases", + files=["music.mp4"], + prefix="nocond", + ) + + self._test_cli_( + ["--model", "medium", "--language", "en", + "--condition", "False", "--temperature", "0.4", "--efficient"], + "corner_cases", + files=["music.mp4"], + prefix="nocond.random", + ) + + def test_large(self): + if self.skipLongTests(): + return + + self._test_cli_( + ["--model", "large-v2", "--language", "en", + "--condition", "False", "--temperature", "0.4", "--efficient"], + "corner_cases", + files=["apollo11.mp3"], + prefix="large", + ) + + if os.path.exists(self.get_data_path("arabic.mp3", check=False)): + self._test_cli_( + ["--model", "large-v2", "--language", "Arabic", "--efficient"], + "corner_cases", + files=["arabic.mp3"] + ) + + def test_gloria(self): + + for model in ["medium", "large-v2"]: + for dec in ["efficient", "accurate"]: + self._test_cli_( + ["--model", model, "--language", "en", "--" + dec], + "corner_cases", + files=["gloria.mp3"], + prefix=model + "." + dec, + ) + +class TestTranscribeMonolingual(TestHelperCli): + + def test_monolingual_tiny(self): + + files = ["bonjour_vous_allez_bien.mp3"] + + self._test_cli_( + ["--model", "tiny.en", "--efficient"], + "tiny.en", + files=files, + prefix="efficient", + ) + + self._test_cli_( + ["--model", "tiny.en", "--accurate"], + "tiny.en", + files=files, + prefix="accurate", + ) + + self._test_cli_( + ["--model", "tiny.en", "--condition", "False", "--efficient"], + "tiny.en", + files=files, + prefix="nocond", + ) + + def test_monolingual_small(self): + + self._test_cli_( + ["--model", "small.en", "--condition", "True", "--efficient"], + "small.en", + files=["arabic.mp3"], + device_specific=True, + ) + + +class TestTranscribeWithVad(TestHelperCli): + + def test_vad(self): + self._test_cli_( + ["--model", "large", "--accurate", "--language", "en", "--vad", "True", "--verbose", "True"], + "verbose", + files=["words.wav"], + prefix="vad", + extensions=None, + ) + + +class TestTranscribeUnspacedLanguage(TestHelperCli): + + def test_japanese(self): + + self._test_cli_( + ["--model", "tiny", "--efficient"], + "tiny_auto", + files=["japanese.mp3"], + device_specific=True, + ) + + self._test_cli_( + ["--model", "tiny", "--language", "Japanese", "--efficient"], + "tiny_auto", + files=["japanese.mp3"], + device_specific=True, + ) + + self._test_cli_( + ["--model", "tiny", "--accurate"], + "tiny_auto", + files=["japanese.mp3"], + prefix="accurate", + device_specific=True, + ) + + self._test_cli_( + ["--model", "tiny", "--language", "Japanese", "--accurate"], + "tiny_auto", + files=["japanese.mp3"], + prefix="accurate", + device_specific=True, + ) + +class TestTranscribeFormats(TestHelperCli): + + def test_cli_outputs(self): + files = ["punctuations.mp3", "bonjour.wav"] + extensions = ["txt", "srt", "vtt", "words.srt", "words.vtt", + "words.json", "csv", "words.csv", "tsv", "words.tsv"] + opts = ["--model", "medium", "--language", "fr"] + + # An audio / model combination that produces coma + self._test_cli_( + opts, + "punctuations_yes", + files=files, + extensions=extensions, + one_per_call=False, + ) + self._test_cli_( + opts + ["--punctuations", "False"], + "punctuations_no", + files=files, + extensions=extensions, + one_per_call=False, + ) + + def test_verbose(self): + + files = ["bonjour_vous_allez_bien.mp3"] + opts = ["--model", "tiny", "--verbose", "True"] + + self._test_cli_( + ["--efficient", *opts], + "verbose", files=files, extensions=None, + prefix="efficient.auto", + device_specific=True, + ) + + self._test_cli_( + ["--language", "fr", "--efficient", *opts], + "verbose", files=files, extensions=None, + prefix="efficient.fr", + device_specific=True, + ) + + self._test_cli_( + opts, + "verbose", files=files, extensions=None, + prefix="accurate.auto", + device_specific=True, + ) + + self._test_cli_( + ["--language", "fr", *opts], + "verbose", files=files, extensions=None, + prefix="accurate.fr", + device_specific=True, + ) + +class TestMakeSubtitles(TestHelper): + + def test_make_subtitles(self): + + main_script = self.main_script("make_subtitles.py", "whisper_timestamped_make_subtitles") + + inputs = [ + self.get_data_path("smartphone.mp3.words.json"), + self.get_data_path("no_punctuations.mp3.words.json", check=True), + self.get_data_path("yes_punctuations.mp3.words.json", check=True), + ] + + for i, input in enumerate(inputs): + filename = os.path.basename(input).replace(".words.json", "") + for len in 6, 20, 50: + output_dir = self.get_output_path() + self.assertRun([main_script, + input if i > 0 else self.get_data_path(), output_dir, + "--max_length", str(len), + ]) + for format in "vtt", "srt",: + output_file = os.path.join(output_dir, f"{filename}.{format}") + self.assertTrue(os.path.isfile(output_file), msg=f"File {output_file} not found") + expected_file = f"split_subtitles/{filename.split('_')[-1]}_{len}.{format}" + self.assertNonRegression(output_file, expected_file) + os.remove(output_file) + self.assertRun([main_script, + input, output_file, + "--max_length", str(len), + ]) + self.assertTrue(os.path.isfile(output_file), msg=f"File {output_file} not found") + self.assertNonRegression(output_file, expected_file) + +class TestHuggingFaceModel(TestHelperCli): + + def test_hugging_face_model(self): + + self._test_cli_( + ["--model", "qanastek/whisper-tiny-french-cased", "--verbose", "True"], + "verbose", files=["bonjour.wav"], extensions=None, + prefix="hf", + device_specific=True, + ) + +# "ZZZ" to run this test at last (because it will fill the CUDA with some memory) +class TestZZZPythonImport(TestHelper): + + def test_python_import(self): + + try: + import whisper_timestamped + except ModuleNotFoundError: + sys.path.append(os.path.realpath( + os.path.dirname(os.path.dirname(__file__)))) + import whisper_timestamped + + # Test version + version = whisper_timestamped.__version__ + self.assertTrue(isinstance(version, str)) + + (stdout, sterr) = self.assertRun([self.main_script(), "-v"]) + self.assertEqual(stdout.strip(), version) + + model = whisper_timestamped.load_model("tiny") + + # Check processing of different files + for filename in "bonjour.wav", "laugh1.mp3", "laugh2.mp3": + res = whisper_timestamped.transcribe( + model, self.get_data_path(filename)) + if self._can_generate_reference(): + self.assertNonRegression(res, f"tiny_auto/{filename}.words.json") + + for filename in "bonjour.wav", "laugh1.mp3", "laugh2.mp3": + res = whisper_timestamped.transcribe( + model, self.get_data_path(filename), language="fr") + if self._can_generate_reference(): + self.assertNonRegression(res, f"tiny_fr/{filename}.words.json") + + def _can_generate_reference(self): + return not GENERATE_DEVICE_DEPENDENT or self.get_device_str() != "cpu" + + def test_split_tokens(self): + + import whisper + whisperversion = whisper.__version__ + + import whisper_timestamped as whisper + from whisper_timestamped.transcribe import split_tokens_on_spaces + + tokenizer = whisper.tokenizer.get_tokenizer(True, language=None) + + # 220 means space + tokens = [50364, 220, 6455, 11, 2232, 11, 286, 2041, 11, 2232, 11, 8660, + 291, 808, 493, 220, 365, 11, 220, 445, 718, 505, 458, 13, 220, 50714] + + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'So,', 'uh,', 'I', 'guess,', 'uh,', 'wherever', 'you', 'come', 'up', 'with,', 'just', 'let', 'us', 'know.', '<|7.00|>'], + [['<|0.00|>'], + [' ', 'So', ','], + [' uh', ','], + [' I'], + [' guess', ','], + [' uh', ','], + [' wherever'], + [' you'], + [' come'], + [' up'], + [' ', ' with', ','], + [' ', ' just'], + [' let'], + [' us'], + [' know', '.', ' '], + ['<|7.00|>']], + [[50364], + [220, 6455, 11], + [2232, 11], + [286], + [2041, 11], + [2232, 11], + [8660], + [291], + [808], + [493], + [220, 365, 11], + [220, 445], + [718], + [505], + [458, 13, 220], + [50714] + ]) + ) + + tokens = [50366, 314, 6, 11771, 17134, 11, 4666, 11, 1022, 220, 875, 2557, 68, 11, 6992, 631, 269, 6, 377, 220, 409, 7282, 1956, 871, 566, 2707, 394, 1956, 256, 622, 8208, 631, 8208, 871, 517, 7282, 1956, 5977, 7418, 371, 1004, 306, 580, 11, 5977, 12, 9498, 9505, 84, 6, 50416] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + ( + ['<|0.04|>', "T'façon,", 'nous,', 'sur', 'la', 'touche,', 'parce', 'que', "c'est", 'un', 'sport', 'qui', 'est', 'important', 'qui', 'tue', 'deux', 'que', 'deux', 'est', 'un', 'sport', 'qui', 'peut', 'être', 'violent,', 'peut-être', "qu'", '<|1.04|>'], + [['<|0.04|>'], + [' T', "'", 'fa', 'çon', ','], + [' nous', ','], + [' sur'], + [' ', 'la'], + [' touch', 'e', ','], + [' parce'], + [' que'], + [' c', "'", 'est'], + [' ', 'un'], + [' sport'], + [' qui'], + [' est'], + [' im', 'port', 'ant'], + [' qui'], + [' t', 'ue'], + [' deux'], + [' que'], + [' deux'], + [' est'], + [' un'], + [' sport'], + [' qui'], + [' peut'], + [' être'], + [' v', 'io', 'le', 'nt', ','], + [' peut', '-', 'être'], + [' q', 'u', "'"], + ['<|1.04|>']], + [[50366], + [314, 6, 11771, 17134, 11], + [4666, 11], + [1022], + [220, 875], + [2557, 68, 11], + [6992], + [631], + [269, 6, 377], + [220, 409], + [7282], + [1956], + [871], + [566, 2707, 394], + [1956], + [256, 622], + [8208], + [631], + [8208], + [871], + [517], + [7282], + [1956], + [5977], + [7418], + [371, 1004, 306, 580, 11], + [5977, 12, 9498], + [9505, 84, 6], + [50416]] + ) + ) + + tokens = [50364, 220, 220, 6455, 11, 220, 220, 2232, 220, 220, 11, 220, 50714] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'So,', 'uh', ',', '<|7.00|>'], + [['<|0.00|>'], + [' ', ' ', 'So', ','], + [' ', ' ', ' uh'], + [' ', ' ', ',', ' '], + ['<|7.00|>']], + [[50364], [220, 220, 6455, 11], [220, 220, 2232], [220, 220, 11, 220], [50714]] + ) + ) + + # Careful with the double spaces at the end... + tokens = [50364, 220, 220, 6455, 11, 220, 220, 2232, 220, 220, 11, 220, 220, 50714] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'So,', 'uh', ',', '', '<|7.00|>'], + [['<|0.00|>'], + [' ', ' ', 'So', ','], + [' ', ' ', ' uh'], + [' ', ' ', ','], + [' ', ' '], + ['<|7.00|>']], + [[50364], [220, 220, 6455, 11], [220, 220, 2232], [220, 220, 11], [220, 220], [50714]] + ) + ) + + # Tokens that could be removed + tokens = [50364, 6024, 95, 8848, 7649, 8717, 38251, 11703, 3224, 51864] + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', 'الآذان', 'نسمّه', '<|30.00|>'], + [['<|0.00|>'], ['', ' الآ', 'ذ', 'ان'], [' ن', 'سم', 'ّ', 'ه'], ['<|30.00|>']], + [[50364], [6024, 95, 8848, 7649], [8717, 38251, 11703, 3224], [51864]] + ) + ) + + # issue #61 + # Special tokens that are not timestamps + tokens = [50414, 805, 12, 17, 50299, 11, 568, 12, 18, 12, 21, 11, 502, 12, 17, 12, 51464] + # 50299 is "<|te|>" and appears as "" + te = "" + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|1.00|>', f'3-2{te},', '2-3-6,', '1-2-', '<|22.00|>'], + [['<|1.00|>'], [' 3', '-', '2', f'{te}', ','], [' 2', '-', '3', '-','6', ','], [' 1', '-', '2', '-'], ['<|22.00|>']], + [[50414], [805, 12, 17, 50299, 11], [568, 12, 18, 12, 21, 11], [502, 12, 17, 12], [51464]]) + ) + + tokenizer = whisper.tokenizer.get_tokenizer(False, language="en") + + # Just a punctuation character + tokens = [50363, 764, 51813] + + _dot = "." if whisperversion < "20230314" else " ." + self.assertEqual( + split_tokens_on_spaces(tokens, tokenizer), + (['<|0.00|>', ".", '<|29.00|>'], + [['<|0.00|>'], [_dot], ['<|29.00|>']], + [[50363], [764], [51813]] + ) + ) diff --git a/whisper_timestamped/__init__.py b/whisper_timestamped/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..24fd61eeea212883b618d1fb974d40ab24f07ec0 --- /dev/null +++ b/whisper_timestamped/__init__.py @@ -0,0 +1,10 @@ +from whisper import available_models, _download, _MODELS # defined in __init__.py +from whisper import audio, decoding, model, normalizers, tokenizer, utils +from whisper.audio import load_audio, log_mel_spectrogram, pad_or_trim +from whisper.decoding import DecodingOptions, DecodingResult, decode, detect_language +from whisper.model import Whisper, ModelDimensions + +from .transcribe import transcribe_timestamped +from .transcribe import transcribe_timestamped as transcribe +from .transcribe import load_model +from .transcribe import __version__ \ No newline at end of file diff --git a/whisper_timestamped/make_subtitles.py b/whisper_timestamped/make_subtitles.py new file mode 100755 index 0000000000000000000000000000000000000000..bfc49ceda111efd3ac8065b082276aed26e79977 --- /dev/null +++ b/whisper_timestamped/make_subtitles.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 + +import json +import string + +_punctuation = "".join(c for c in string.punctuation if c not in ["-", "'"]) + "。,!?:”、…" + +def split_long_segments(segments, max_length, use_space = True): + new_segments = [] + for segment in segments: + text = segment["text"] + if len(text) <= max_length: + new_segments.append(segment) + else: + meta_words = segment["words"] + # Note: we do this in case punctuation were removed from words + if use_space: + # Split text around spaces and punctuations (keeping punctuations) + words = text.split() + else: + words = [w["text"] for w in meta_words] + if len(words) != len(meta_words): + new_words = [w["text"] for w in meta_words] + print(f"WARNING: {' '.join(words)} != {' '.join(new_words)}") + words = new_words + current_text = "" + current_start = segment["start"] + current_best_idx = None + current_best_end = None + current_best_next_start = None + for i, (word, meta) in enumerate(zip(words, meta_words)): + current_text_before = current_text + if current_text and use_space: + current_text += " " + current_text += word + + if len(current_text) > max_length and len(current_text_before): + start = current_start + if current_best_idx is not None: + text = current_text[:current_best_idx] + end = current_best_end + current_text = current_text[current_best_idx+1:] + current_start = current_best_next_start + else: + text = current_text_before + end = meta_words[i-1]["end"] + current_text = word + current_start = meta["start"] + + current_best_idx = None + current_best_end = None + current_best_next_start = None + + new_segments.append({"text": text, "start": start, "end": end}) + + # Try to cut after punctuation + if current_text and current_text[-1] in _punctuation: + current_best_idx = len(current_text) + current_best_end = meta["end"] + current_best_next_start = meta_words[i+1]["start"] if i+1 < len(meta_words) else None + + if len(current_text): + new_segments.append({"text": current_text, "start": current_start, "end": segment["end"]}) + + return new_segments + +def format_timestamp(seconds: float, always_include_hours: bool = False, decimal_marker: str = '.'): + assert seconds >= 0, "non-negative timestamp expected" + milliseconds = round(seconds * 1000.0) + + hours = milliseconds // 3_600_000 + milliseconds -= hours * 3_600_000 + + minutes = milliseconds // 60_000 + milliseconds -= minutes * 60_000 + + seconds = milliseconds // 1_000 + milliseconds -= seconds * 1_000 + + hours_marker = f"{hours:02d}:" if always_include_hours or hours > 0 else "" + return f"{hours_marker}{minutes:02d}:{seconds:02d}{decimal_marker}{milliseconds:03d}" + +def write_vtt(result, file): + print("WEBVTT\n", file=file) + for segment in result: + print( + f"{format_timestamp(segment['start'])} --> {format_timestamp(segment['end'])}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + +def write_srt(result, file): + for i, segment in enumerate(result, start=1): + # write srt lines + print( + f"{i}\n" + f"{format_timestamp(segment['start'], always_include_hours=True, decimal_marker=',')} --> " + f"{format_timestamp(segment['end'], always_include_hours=True, decimal_marker=',')}\n" + f"{segment['text'].strip().replace('-->', '->')}\n", + file=file, + flush=True, + ) + +def cli(): + + import os + import argparse + + supported_formats = ["srt", "vtt"] + + parser = argparse.ArgumentParser( + description='Convert .word.json transcription files (output of whisper_timestamped) to srt or vtt, being able to cut long segments', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('input', type=str, help='Input json file, or input folder') + parser.add_argument('output', type=str, help='Output srt or vtt file, or output folder') + parser.add_argument('--max_length', default=200, help='Maximum length of a segment in characters', type=int) + parser.add_argument('--format', type=str, default="all", help='Output format (if the output is a folder, i.e. not a file with an explicit extension)', choices= supported_formats + ["all"]) + args = parser.parse_args() + + if os.path.isdir(args.input) or not max([args.output.endswith(e) for e in supported_formats]): + input_files = [f for f in os.listdir(args.input) if f.endswith(".words.json")] if os.path.isdir(args.input) else [os.path.basename(args.input)] + extensions = [args.format] if args.format != "all" else ["srt", "vtt"] + output_files = [[os.path.join(args.output, f[:-11] + "." + e) for e in extensions] for f in input_files] + if os.path.isdir(args.input): + input_files = [os.path.join(args.input, f) for f in input_files] + else: + input_files = [args.input] + if not os.path.isdir(args.output): + os.makedirs(args.output) + else: + input_files = [args.input] + output_files = [[args.output]] + if not os.path.isdir(os.path.dirname(args.output)): + os.makedirs(os.path.dirname(args.output)) + + for fn, outputs in zip(input_files, output_files): + with open(fn, "r", encoding="utf-8") as f: + transcript = json.load(f) + segments = transcript["segments"] + if args.max_length: + language = transcript["language"] + use_space = language not in ["zh", "ja", "th", "lo", "my"] + segments = split_long_segments(segments, args.max_length, use_space=use_space) + for output in outputs: + if output.endswith(".srt"): + with open(output, "w", encoding="utf-8") as f: + write_srt(segments, file=f) + elif output.endswith(".vtt"): + with open(output, "w", encoding="utf-8") as f: + write_vtt(segments, file=f) + else: + raise RuntimeError(f"Unknown output format for {output}") + +if __name__ == "__main__": + cli() \ No newline at end of file diff --git a/whisper_timestamped/transcribe.py b/whisper_timestamped/transcribe.py new file mode 100755 index 0000000000000000000000000000000000000000..d80369e4846b0d5eaf9e51c28b9a4cd8912463e1 --- /dev/null +++ b/whisper_timestamped/transcribe.py @@ -0,0 +1,2375 @@ +#!/usr/bin/env python3 + +__author__ = "Jérôme Louradour" +__credits__ = ["Jérôme Louradour"] +__license__ = "GPLv3" +__version__ = "1.12.20" + +# Set some environment variables +import os +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1' # Remove warning "This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)..." +os.environ['CUDA_DEVICE_ORDER'] = 'PCI_BUS_ID' # GPU in the right order + +# Whisper and Torch +import whisper +import torch +import torch.nn.functional as F + +# For alignment +import numpy as np +import dtw +# from scipy.signal import medfilt as median_filter +from scipy.ndimage import median_filter # faster owing to https://github.com/openai/whisper/commit/f0083e7eb20d032390e42f6f6039947fa8669c93 +from scipy.signal import find_peaks + +# Additional +import string +import csv +import sys +import gzip, base64 +import copy +import re + +# Constant variables +from whisper.utils import format_timestamp +from whisper.audio import N_FRAMES, HOP_LENGTH, SAMPLE_RATE # 3000, 160, 16000 +AUDIO_SAMPLES_PER_TOKEN = HOP_LENGTH * 2 # 320 +AUDIO_TIME_PER_TOKEN = AUDIO_SAMPLES_PER_TOKEN / SAMPLE_RATE # 0.02 (sec) +SEGMENT_DURATION = N_FRAMES * HOP_LENGTH / SAMPLE_RATE # 30.0 (sec) + +# Logs +import logging +logger = logging.getLogger("whisper_timestamped") + +USE_EFFICIENT_BY_DEFAULT = True +TRUST_WHISPER_TIMESTAMP_BY_DEFAULT = True +DISFLUENCY_MARK = "[*]" + +try: + whisper_version = whisper.__version__ +except NameError: + whisper_version = "" +WHIPSER_GE_20230306 = whisper_version >= "20230306" +WHIPSER_GE_20230308 = whisper_version >= "20230308" + +def transcribe_timestamped( + # Main Whisper options + model, + audio, + language=None, + task="transcribe", + + # Additional options for word alignment + remove_punctuation_from_words=False, + compute_word_confidence=True, + include_punctuation_in_confidence=False, + refine_whisper_precision=0.5, + min_word_duration=0.02, # Was 0.04 before 1.11 + plot_word_alignment=False, + word_alignement_most_top_layers=None, # Was 6 before 1.9 + remove_empty_words=False, + + # Reproducibility + seed=1234, + + vad=False, + detect_disfluencies=False, + trust_whisper_timestamps=TRUST_WHISPER_TIMESTAMP_BY_DEFAULT, + naive_approach=False, + + # Other Whisper options + temperature=0.0 if USE_EFFICIENT_BY_DEFAULT else (0.0, 0.2, 0.4, 0.6, 0.8, 1.0), + best_of=None, + beam_size=None, + patience=None, + length_penalty=None, + compression_ratio_threshold=2.4, + logprob_threshold=-1.0, + no_speech_threshold=0.6, + fp16=None, + condition_on_previous_text=True, + initial_prompt=None, + suppress_tokens="-1", + sample_len=None, + verbose=False, +): + """ + Transcribe an audio file using Whisper + + Parameters + ---------- + model: Whisper + The Whisper model instance. + + audio: Union[str, np.ndarray, torch.Tensor] + The path to the audio file to open, or the audio waveform in 16kHz. + + language: str + The language to use for the transcription. If None, the language is detected automatically. + + task: str + The task to perform: either "transcribe" or "translate". + + remove_punctuation_from_words: bool + If False, words will be glued with the next punctuation mark (if any). + If True, there will be no punctuation mark in the `words[:]["text"]` list. + It only affects these strings; This has no influence on the computation of the word confidence, whatever the value of `include_punctuation_in_confidence` is. + + include_punctuation_in_confidence: bool + Whether to include proba of punctuation in the computation of the (previous) word confidence. + + compute_word_confidence: bool + Whether to compute word confidence. + If True, a finer confidence for each segment will be computed as well. + + vad: bool + Whether to perform voice activity detection (VAD) on the audio file, to remove silent parts before transcribing with Whisper model. + This should decrease hallucinations from the Whisper model. + + detect_disfluencies: bool + Whether to detect disfluencies (i.e. hesitations, filler words, repetitions, corrections, etc.) that Whisper model might have omitted in the transcription. + This should make the word timestamp prediction more accurate. + And probable disfluencies will be marked as special words "[*]". + + trust_whisper_timestamps: bool + Whether to rely on Whisper's timestamps to get approximative first estimate of segment positions (up to refine_whisper_precision). + + refine_whisper_precision: float + How much can we refine Whisper segment positions, in seconds. Must be a multiple of 0.02. + + min_word_duration: float + Minimum duration of a word, in seconds. If a word is shorter than this, timestamps will be adjusted. + + plot_word_alignment: bool + Whether to plot the word alignment for each segment. matplotlib must be installed to use this option. + + remove_empty_words: bool + Whether to remove words with no duration occuring at the end of segments (probable Whisper hallucinations). + + seed: int + Random seed to use for temperature sampling, for the sake of reproducibility. + Choose None for unpredictable randomness. + + naive_approach: bool + Force the naive approach that consists in decoding twice the audio file, once to get the transcritpion and once with the decoded tokens to get the alignment. + Note that this approach is used anyway when beam_size is not None and/or when the temperature is a list with more than one element. + + temperature: float + Temperature for sampling. + + compression_ratio_threshold: float + If the gzip compression ratio is above this value, treat as failed. + + logprob_threshold: float + If the average log probability over sampled tokens is below this value, treat as failed. + + no_speech_threshold: float + If the no_speech probability is higher than this value AND the average log probability + over sampled tokens is below `logprob_threshold`, consider the segment as silent. + + condition_on_previous_text: bool + if True, the previous output of the model is provided as a prompt for the next window; + disabling may make the text inconsistent across windows, but the model becomes less prone to + getting stuck in a failure loop, such as repetition looping or timestamps going out of sync. + + initial_prompt: str + Optional text to provide as a prompt for the first window. + + suppress_tokens: str + Comma-separated list of token ids to suppress during sampling; + '-1' will suppress most special characters except common punctuations. + + verbose: bool + Whether to display the text being decoded to the console. If True, displays all the details, + If False, displays minimal details. If None, does not display anything + + Returns + ------- + A dictionary containing the resulting text ("text") and segment-level details ("segments"), and + the spoken language ("language"), which is detected when `decode_options["language"]` is None. + """ + + if seed is not None: + torch.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + + # Check input options + assert refine_whisper_precision >= 0 and refine_whisper_precision / AUDIO_TIME_PER_TOKEN == round(refine_whisper_precision / AUDIO_TIME_PER_TOKEN), f"refine_whisper_precision must be a positive multiple of {AUDIO_TIME_PER_TOKEN}" + refine_whisper_precision_nframes = round(refine_whisper_precision / AUDIO_TIME_PER_TOKEN) + assert min_word_duration >= 0, f"min_word_duration must be a positive number" + assert word_alignement_most_top_layers is None or word_alignement_most_top_layers > 0, f"word_alignement_most_top_layers must be a strictly positive number" + + if isinstance(temperature, (list, tuple)) and len(temperature) == 1: + temperature = temperature[0] + if isinstance(temperature, (list, tuple)): + # temperature fallback + naive_approach = True + elif temperature > 0 and best_of is not None and best_of > 1: + naive_approach = True + if beam_size is not None: + # beam-search + naive_approach = True + + # Input options + if fp16 is None: + fp16 = model.device != torch.device("cpu") + + # Safety check + input_stride = N_FRAMES // model.dims.n_audio_ctx + time_precision = input_stride * HOP_LENGTH / SAMPLE_RATE + assert time_precision == AUDIO_TIME_PER_TOKEN + + alignment_options = dict( + remove_punctuation_from_words=remove_punctuation_from_words, + compute_word_confidence=compute_word_confidence, + include_punctuation_in_confidence=include_punctuation_in_confidence, + detect_disfluencies=detect_disfluencies, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + plot_word_alignment=plot_word_alignment, + word_alignement_most_top_layers=word_alignement_most_top_layers, + alignment_heads=get_alignment_heads(model) if word_alignement_most_top_layers is None else None, + ) + whisper_options = dict( + language=language, + task=task, + fp16=fp16, + temperature=temperature, + best_of=best_of, + beam_size=beam_size, + patience=patience, + length_penalty=length_penalty, + condition_on_previous_text=condition_on_previous_text, + initial_prompt=initial_prompt, + suppress_tokens=suppress_tokens, + sample_len=sample_len, + verbose=verbose if (not vad or verbose is not True) else False, + ) + other_options = dict( + no_speech_threshold=no_speech_threshold, + logprob_threshold=logprob_threshold, + compression_ratio_threshold=compression_ratio_threshold, + ) + + if vad: + audio = get_audio_tensor(audio) + audio, convert_timestamps = remove_non_speech(audio, plot=plot_word_alignment) + + global num_alignment_for_plot + num_alignment_for_plot = 0 + + if naive_approach: + (transcription, words) = _transcribe_timestamped_naive(model, audio, + min_word_duration=0.0, # Was 0.04 before 1.11 + trust_whisper_timestamps=trust_whisper_timestamps, + **alignment_options, **whisper_options, **other_options) + else: + (transcription, words) = _transcribe_timestamped_efficient(model, audio, + trust_whisper_timestamps=trust_whisper_timestamps, + **alignment_options, **whisper_options, **other_options) + if remove_empty_words: + # Remove words with empty duration happening at the end of segments, to remove some hallucinations + transcription, words = remove_last_null_duration_words(transcription, words, recompute_text=True) + + # Refine word positions + ensure_increasing_positions(words, min_duration=min_word_duration if trust_whisper_timestamps else 0) + + # Combine words and segments + whisper_segments = transcription["segments"] + for word in words: + if verbose and not naive_approach and not vad: + print_timestamped(word) + word.pop("tokens") + word.pop("tokens_indices") + if "avg_logprob_reliable" in word: + word.pop("avg_logprob_reliable") + idx_segment = word.pop("idx_segment") + assert idx_segment < len(whisper_segments), f"Fatal error: Got unexpected segment index {idx_segment} >= {len(whisper_segments)}" + segment = whisper_segments[idx_segment] + if "words" in segment: + segment["words"].append(word) + else: + segment["words"] = [word] + if refine_whisper_precision: + segment["start"] = word["start"] + if refine_whisper_precision: + segment["end"] = word["end"] + + if vad: + # Recompute timestamps to match the original audio + for segment in whisper_segments: + for word in segment.get("words", []): + word["start"], word["end"] = convert_timestamps(word["start"], word["end"]) + if verbose: + print_timestamped(word) + if refine_whisper_precision and len(segment.get("words", [])): + segment["start"] = segment["words"][0]["start"] + segment["end"] = segment["words"][-1]["end"] + else: + segment["start"], segment["end"] = convert_timestamps(segment["start"], segment["end"]) + + return transcription + +def _transcribe_timestamped_efficient( + model, + audio, + remove_punctuation_from_words, + compute_word_confidence, + include_punctuation_in_confidence, + refine_whisper_precision_nframes, + alignment_heads, + plot_word_alignment, + word_alignement_most_top_layers, + detect_disfluencies, + trust_whisper_timestamps, + use_timestamps_for_alignment = True, + # Whisper specific options + **whisper_options, +): + + # Get options + sample_len = whisper_options["sample_len"] + temperature = whisper_options["temperature"] + no_speech_threshold = whisper_options["no_speech_threshold"] + logprob_threshold = whisper_options["logprob_threshold"] + verbose = whisper_options["verbose"] + # Note: "on-the-fly" verbose is not implementable in the current state (we don't know the absolute position of the current chunk). See issue #18 + verbose_bugged = False + whisper_options["verbose"] = None if whisper_options["verbose"] is True else whisper_options["verbose"] # We will print intermediate results ourselves + + logit_filters = get_logit_filters(model, whisper_options) + language = whisper_options["language"] + tokenizer = whisper.tokenizer.get_tokenizer(model.is_multilingual, task=whisper_options["task"], language=language) + + max_sample_len = sample_len or model.dims.n_text_ctx // 2 + n_ctx = model.dims.n_text_ctx + + debug = logger.getEffectiveLevel() >= logging.DEBUG + + word_alignement_most_top_layers = float("inf") if word_alignement_most_top_layers is None else word_alignement_most_top_layers + + # The main outcome + timestamped_word_segments = [] # list of timestamped word segments that have been collected so far + # Main variables to be accumulated + segment_tokens = [[]] # list of lists of token indices that have been collected so far (one list per segment) + segment_attweights = [[] for _ in range(min(word_alignement_most_top_layers, len(model.decoder.blocks)))] + # attention weights on the last segments + segment_avglogprobs = [] # average log probability for each segment (actually of the corresponding chunk, as computed by whisper) + segment_logprobs = [] # token log probabilities for each segment + # Variables related to options that can skip some segments + sot_index = None # index of the SOT token in the current set of processed tokens + no_speech_prob = None # no speech probability for the current 30 sec chunk + chunk_logprobs = [] # log probabilities for the current 30 sec chunk + chunk_tokens = [] # tokens for the current 30 sec chunk (list of Torch tensors) + chunk_tokens_nosot = [] # tokens for the current 30 sec chunk, without the SOT tokens (list of indices) + last_chunk_token = None # last token of the current chunk, that may be needed for corner cases + last_token_fallback = None # last token to use as a fallback if the model gets stuck + has_started = False # whether we have started decoding + mfcc = None # MFCC features for the current 30 sec chunk + new_mfcc = None # + num_inference_steps = 0 # number of inference steps performed so far (for debugging only) + + def is_sot(curr_tokens): + return curr_tokens is None or len(curr_tokens) > 1 or curr_tokens[0] == tokenizer.sot + + def has_reached_decoding_limit(): + n = len(chunk_tokens_nosot) + 1 + m = n + (len(chunk_tokens[0]) if len(chunk_tokens) > 0 else 0) + return n + 1 >= max_sample_len or m > n_ctx + + def reset(add_segment, keep_last_token=True): + """ Reset the list of tokens for the current speech segment, and corresponding cross-attention weights """ + nonlocal segment_tokens, segment_attweights + if add_segment: + if keep_last_token: + segment_tokens.append([segment_tokens[-1][-1]]) + segment_attweights = [w[-1:] for w in segment_attweights] + else: + segment_tokens.append([]) + segment_attweights = [[] for w in segment_attweights] + segment_tokens[-2].pop(0) + elif len(segment_tokens[-1]) > 0: + if debug: + logger.debug(f"Reset last segment: {tokenizer.decode_with_timestamps(segment_tokens[-1])}") + segment_tokens[-1] = [] + segment_attweights = [[] for w in segment_attweights] + + saw_consecutive_timestamps = False + def must_flush_segment(curr_tokens): + """ Return whether or not the previously collected tokens must be used to add a new speech segment """ + nonlocal segment_tokens, saw_consecutive_timestamps, chunk_tokens_nosot + + if not is_sot(curr_tokens): + is_timestamp = curr_tokens[0] >= tokenizer.timestamp_begin + is_previous_timestamp = segment_tokens[-1][-1] >= tokenizer.timestamp_begin if len(segment_tokens[-1]) > 0 else False + consecutive_timestamps = is_timestamp and is_previous_timestamp + if consecutive_timestamps: + saw_consecutive_timestamps = True + return consecutive_timestamps + else: # Several tokens as a prompt or must flush last segments + + must_flush = len(segment_tokens[-1]) > 1 and not saw_consecutive_timestamps + if not must_flush and WHIPSER_GE_20230306: # If the last token is a timestamp, the last segment is used + if last_chunk_token is None: + must_flush = (len(segment_tokens[-1]) > 2 and segment_tokens[-1][-1] >= tokenizer.timestamp_begin) + else: + must_flush = (last_chunk_token >= tokenizer.timestamp_begin) + if not must_flush and trust_whisper_timestamps: + # Discard the end of the last transcription + reset(False) + saw_consecutive_timestamps = False + return must_flush + + index_begin_30sec_chunck = 0 + def get_index_begin_30sec_chunck(curr_tokens): + nonlocal index_begin_30sec_chunck, has_started + + if is_sot(curr_tokens) and has_started: + if trust_whisper_timestamps: + res = index_begin_30sec_chunck + index_begin_30sec_chunck = len(segment_tokens)-1 + else: + res = len(segment_tokens)-1 + return res + + def align_last_segment(curr_tokens=None): + nonlocal segment_tokens, segment_attweights, timestamped_word_segments, has_started, no_speech_prob, chunk_tokens, chunk_tokens_nosot, chunk_logprobs, mfcc, new_mfcc, logit_filters, index_begin_30sec_chunck, last_token_fallback, num_inference_steps + + if debug and trust_whisper_timestamps: + logger.debug(f"Add segment {len(timestamped_word_segments)+1} at step {num_inference_steps}:\n\t{tokenizer.decode_with_timestamps(segment_tokens[-1])}") + + tokens = segment_tokens[-1][1:] + + # When the decoding hit the max limit (number of tokens) -- usually when the language model gets stuck -- + # then we have to recover the last token from what is send to the decoder + unfinished_decoding = has_reached_decoding_limit() + last_is_not_timestamp = len(tokens) and tokens[-1] < tokenizer.timestamp_begin + last_token_reliable = True + + if unfinished_decoding: + logger.debug(f"WARNING: decoding hit the max limit for segment {segment_tokens[-1]} (It usually happens when the language model gets stuck)") + # The last token chosen is in the prompt for the new chunk + if curr_tokens is not None and curr_tokens[0] == tokenizer.sot_prev: + index_sot = (curr_tokens == tokenizer.sot).nonzero(as_tuple=True) + assert len(index_sot) == 1 + index_sot = index_sot[0].item() + assert index_sot > 0 + last_token_fallback = curr_tokens[index_sot-1].item() + logger.debug(f" Guessed last token from the prompt for the new chunk: {last_token_fallback}") + # Fallback for the last segment, or without prompt: Assume greedy decoding + else: + last_token_fallback = torch.argmax(chunk_logprobs[-1]).item() if last_chunk_token is None else last_chunk_token + last_token_reliable = (temperature == 0) + logger.debug(f" Guess last token using probas (assuming greedy decoding): {last_token_fallback}") + if debug: + logger.debug(f"WARNING: also add last token: {tokenizer.decode_with_timestamps([last_token_fallback])}") + + tokens.append(last_token_fallback) + segment_tokens[-1].append(last_token_fallback) + attention_weights = [torch.cat(w, dim=-2) for w in segment_attweights] + last_logprobs = chunk_logprobs[-1] + elif last_is_not_timestamp: # was emitted early, without a timestamp before + logger.debug(f"WARNING: end timestamp not produced. Adding <|endoftext|>") + tokens.append(tokenizer.eot) + segment_tokens[-1].append(tokenizer.eot) + attention_weights = [torch.cat(w, dim=-2) for w in segment_attweights] + last_logprobs = chunk_logprobs[-1] + else: + attention_weights = [torch.cat(w[:-1], dim=-2) for w in segment_attweights] + last_logprobs = chunk_logprobs[-2] + + # Check prediction of last token + end_token = tokens[-1] + if end_token >= tokenizer.timestamp_begin: + start_token = tokens[0] + assert start_token >= tokenizer.timestamp_begin + # If Whisper prediction of the end is obviously wrong, we predict it again (constrained) + if end_token <= start_token: + new_end_token = last_logprobs[start_token+1:].argmax() + start_token + 1 + tokens[-1] = new_end_token.item() + if debug: + logger.debug(f"Re-estimated end token {tokenizer.decode_with_timestamps([new_end_token])} (was {tokenizer.decode_with_timestamps([end_token])}) to be after start token {tokenizer.decode_with_timestamps([start_token])}") + + if len(tokens) <= 1: + # Corner case: nothing in between timestamps + ws = [] + else: + ws = perform_word_alignment( + tokens, + attention_weights, + tokenizer, + use_space=should_use_space(language), + alignment_heads=alignment_heads, + remove_punctuation_from_words=remove_punctuation_from_words, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + detect_disfluencies=detect_disfluencies, + unfinished_decoding=unfinished_decoding, + mfcc=mfcc, + plot=plot_word_alignment, + debug=debug, + ) + + add_segment = len(ws) > 0 + if add_segment: + timestamped_word_segments.append(ws) + else: + logger.debug(f"Not added!") + reset(add_segment, not is_sot(curr_tokens)) + + return add_segment, unfinished_decoding, last_token_reliable + + def may_flush_segment(curr_tokens = None): + """ Add a speech segment with the new tokens if necessary. + May also remove the last collected segments if filtered out by Whisper (no_speech_prob <= no_speech_threshold) + """ + nonlocal segment_tokens, segment_attweights, timestamped_word_segments, segment_logprobs, has_started, no_speech_prob, chunk_tokens, chunk_tokens_nosot, chunk_logprobs, mfcc, new_mfcc, logit_filters, index_begin_30sec_chunck, last_token_fallback, num_inference_steps, last_chunk_token + + # Check if a new segment should be added + unfinished_decoding = False + last_token_reliable = True + + if must_flush_segment(curr_tokens) and trust_whisper_timestamps: + _, unfinished_decoding, last_token_reliable = align_last_segment(curr_tokens) + + i_start = get_index_begin_30sec_chunck(curr_tokens) + + # All segments from previous 30sec chunck have been collected + if i_start is not None: + + if not trust_whisper_timestamps: + + tokens = torch.Tensor(segment_tokens[-1]).int() + idx_task = torch.where(tokens==tokenizer.sot_sequence[-1])[0][0].item() # index of <|transcribe|> + + is_special = tokens.ge(tokenizer.eot) + # Remove prompt + is_special[:idx_task] = True + # Keep begin timestamp + is_special[idx_task:idx_task+2] = False + + is_timestamp = tokens.ge(tokenizer.timestamp_begin) + consecutive = torch.where(is_timestamp[1:] & is_timestamp[:-1])[0] + if (WHIPSER_GE_20230306 or has_reached_decoding_limit()) and ( + (is_timestamp[-1] and not is_timestamp[-2]) if last_chunk_token is None else + last_chunk_token >= tokenizer.timestamp_begin and not is_timestamp[-2] + ): + consecutive = torch.cat([consecutive, torch.Tensor([len(tokens)-1]).int()]) + last_is_timestamp = True + if len(consecutive): + # Remove last tokens + is_special[consecutive[-1]+1:] = True + # Keep end timestamp + is_special[consecutive[-1]] = False + elif is_timestamp[-1]: + # Keep end timestamp + is_special[-1] = False + else: + last_is_timestamp = False + + if use_timestamps_for_alignment and len(consecutive): + # Keep all timestamps + is_special[idx_task+2:consecutive[-1]] = False + + # Do remove what has to be removed + is_next_achar = ~torch.cat([is_special[1:], torch.Tensor([False]).bool()]) + for i, weights in enumerate(segment_attweights): + assert len(weights) == len(tokens), f"{len(weights)} attention weights != {len(tokens)}" + # We must remove attention weights used to predict timestamp tokens + segment_attweights[i] = [w for s, w in zip(is_next_achar, weights) if s] + tokens_filtered = tokens[~is_special] + assert len(segment_attweights[0]) == len(tokens_filtered), f"{len(segment_attweights[0])} attention weights != {len(tokens_filtered)} " + + # Replace first and last timestamp + orig_start, orig_end = tokens_filtered[1].item(), tokens_filtered[-1].item() + tokens_filtered[1] = tokenizer.timestamp_begin # <|0.00|> + if last_is_timestamp: + tokens_filtered[-1] = tokenizer.timestamp_begin + N_FRAMES // 2 # <|30.00|> + segment_tokens[-1] = tokens_filtered.tolist() + + # Do alignement + added, unfinished_decoding, last_token_reliable = align_last_segment() + + # Re-split into segments (if necessary) + if added: + if len(consecutive) > 1: + segments_timestamped_concat = timestamped_word_segments[-1] + new_segments_timestamped = [] + new_segment_tokens = [] + start = idx_task+1 + i_word = 0 + for i, end in enumerate(consecutive): + end = end.item() + new_segment_tokens.append(tokens[start:end+1].tolist()) + if debug: + logger.debug(f"Add segment {len(timestamped_word_segments)+i}:\n\t{tokenizer.decode_with_timestamps(new_segment_tokens[-1])}") + total_length = end - start - 1 + start = end+1 + length = 0 + new_segments_timestamped.append([]) + while length < total_length: + if not use_timestamps_for_alignment and i_word == len(segments_timestamped_concat): + # This can happen in the case of "..." + assert total_length == 1 and i == len(consecutive)-1, "Unexpected situation!" + break + assert i_word < len(segments_timestamped_concat), f"i_word={i_word} < len(segments_timestamped_concat)={len(segments_timestamped_concat)}" + word = segments_timestamped_concat[i_word] + new_segments_timestamped[-1].append(word) + length += len(word["tokens_indices"]) + i_word += 1 + # This can be non zero, when a punctuation (alone in a segment) is glued to the previous segment + if use_timestamps_for_alignment: + assert length == total_length, f"length={length} != total_length={total_length}" + elif length > total_length: + delta = length - total_length + word = new_segments_timestamped[-1][-1] + word_tokindices = word["tokens_indices"] + word_tokens = word["tokens"] + word["tokens_indices"] = word_tokindices[:-delta] + word["tokens"] = word_tokens[:-delta] + word["word"] = "".join(word_tokens[:-delta]) + i_word -= 1 + t = segments_timestamped_concat[i_word]["end"] + segments_timestamped_concat[i_word] = dict( + text="".join(word_tokens[-delta:]), + start=t, end=t, # Word without timestamp + tokens=word_tokens[-delta:], + tokens_indices=word_tokindices[-delta:], + ) + + assert i_word == len(segments_timestamped_concat) + + segment_tokens = segment_tokens[:-2] + new_segment_tokens + [segment_tokens[-1]] + timestamped_word_segments = timestamped_word_segments[:-1] + new_segments_timestamped + + else: + + # Recover start and end token + segment = segment_tokens[-2] + tokenizer.decode_with_timestamps([orig_start,orig_end]) + segment[0] = orig_start + if last_is_timestamp: + segment[-1] = orig_end + + if debug: + logger.debug(f"Add segment {len(timestamped_word_segments)}:\n\t{tokenizer.decode_with_timestamps(segment)}") + + if unfinished_decoding: + timestamped_word_segments[-1][-1]["avg_logprob_reliable"] = last_token_reliable + + reset(False) + + mfcc = new_mfcc + + n_segments = len(segment_tokens)-1 + + # Get word confidence and/or check if previous segments shoud have been skipped + should_skip = False + if compute_word_confidence or no_speech_threshold is not None: + + # no voice activity check + should_skip = (no_speech_prob > no_speech_threshold) if (no_speech_threshold is not None) else False + if compute_word_confidence or (should_skip and logprob_threshold is not None): + n = len(chunk_logprobs) + if n == len(chunk_tokens_nosot): + chunk_tokens_nosot = chunk_tokens_nosot[1:] + if unfinished_decoding: + assert last_token_fallback is not None + last_tokens = [last_token_fallback] + timestamped_word_segments[-1][-1]["avg_logprob_reliable"] = last_token_reliable + n += 1 + elif has_reached_decoding_limit(): + # there were segments in the 30sec chunck, and then the LM got stuck + last_tokens = [torch.argmax(chunk_logprobs[-1]).item()] + timestamped_word_segments[-1][-1]["avg_logprob_reliable"] = (temperature == 0) + else: + last_tokens = [tokenizer.eot] + chunck_indices = chunk_tokens_nosot + last_tokens + assert len(chunk_logprobs) == len(chunck_indices), f"{len(chunk_logprobs)} != {len(chunck_indices)}" + logprobs = torch.cat([logprob[i].unsqueeze(0) for (logprob, i) in zip(chunk_logprobs, chunck_indices)]) + assert min([p.isfinite().item() for p in logprobs]), \ + f"Got infinite logprob among ({len(logprobs)}) {[(i, tokenizer.decode_with_timestamps([i]), v.item()) for (i,v) in zip(chunck_indices, logprobs)]}" + sum_logprob = sum(logprobs) + avg_logprob = sum_logprob/n + # don't skip if the logprob is high enough, whatever the no_speech_prob is + if logprob_threshold is not None and avg_logprob > logprob_threshold: + should_skip = False + + if should_skip: + logger.debug(f"Skipping last {n_segments-i_start} segments (no_speech_prob {no_speech_prob} > {no_speech_threshold} and avg_logprob {avg_logprob} < {logprob_threshold})") + index_begin_30sec_chunck -= n_segments-i_start + segment_tokens = segment_tokens[:i_start] + [segment_tokens[-1]] + timestamped_word_segments = timestamped_word_segments[:i_start] + elif compute_word_confidence: + avg_logprob = avg_logprob.item() + i_token_end = -1 + for i in range(i_start, n_segments): + tokens = segment_tokens[i] + i_token_start = i_token_end + 1 + i_token_end = i_token_start + len(tokens) + assert chunck_indices[i_token_start:i_token_end] == tokens, f"Inconsistent token list {tokenizer.decode_with_timestamps(chunck_indices[i_token_start:i_token_end])} != {tokenizer.decode_with_timestamps(tokens)}" + i_token_start += 1 # skip sos (start time) + if not unfinished_decoding or i != n_segments-1: + i_token_end -= 1 # skip eos (end time) + segment_logprobs.append(logprobs[i_token_start:i_token_end]) + segment_avglogprobs.append(avg_logprob) + else: + for i in range(i_start, n_segments): + segment_logprobs.append(None) + segment_avglogprobs.append(None) + + else: + for i in range(i_start, n_segments): + segment_logprobs.append(None) + segment_avglogprobs.append(None) + + if verbose_bugged and not should_skip: + for segment in timestamped_word_segments[i_start:]: + for word in segment: + print_timestamped(word) + + # Reset counters + chunk_tokens = [] + chunk_tokens_nosot = [] + chunk_logprobs = [] + no_speech_prob = None + + def hook_attention_weights(layer, ins, outs, index): + nonlocal segment_attweights + # In old version of whisper, output is a single tensor + assert isinstance(outs, tuple) and len(outs) == 2, "whisper seems to be outdated, please update it (pip install --upgrade --no-deps --force-reinstall git+https://github.com/openai/whisper.git)" + if not has_started: + return + w = outs[-1] + # Only the last attention weights is useful + if w.shape[-2] > 1: + w = w[:, :, -1:, :] + segment_attweights[index].append(w.cpu()) + + def hook_mfcc(layer, ins, outs): + nonlocal new_mfcc, mfcc + new_mfcc = ins[0] + if mfcc is None: + mfcc = new_mfcc + + def hook_input_tokens(layer, ins, outs): + nonlocal segment_tokens, sot_index, chunk_tokens, chunk_tokens_nosot, logit_filters, has_started, language, num_inference_steps + num_inference_steps += 1 + + curr_tokens = ins[0] + assert curr_tokens.shape[0] == 1, "Batch decoding is not supported" + curr_tokens = curr_tokens.squeeze(0) + + if is_sot(curr_tokens): + chunk_prompt = curr_tokens.tolist() + if language is None: + if len(curr_tokens) > 1: + language = tokenizer.decode(curr_tokens[-2:-1]) + language = language[2:-2] # remove trailing "<|" and "|>" + whisper_options["language"] = language + + if verbose and not whisper_options["verbose"] and len(curr_tokens) > 1: + # Reproduce whisper verbose (2/2) + print(f"Detected language: {whisper.tokenizer.LANGUAGES[language].title()}") + sys.stdout.flush() + + logit_filters = get_logit_filters(model, whisper_options, prompt = chunk_prompt[1:-len(tokenizer.sot_sequence)]) + + may_flush_segment(curr_tokens) + + # Get the index of the <|startoftranscript|> tokens (to get proba of silence later) + if is_sot(curr_tokens): + has_started = len(curr_tokens) > 1 or not model.is_multilingual + if no_speech_threshold is not None: + sot_index = curr_tokens.tolist().index(tokenizer.sot) + else: + sot_index = None + + # Keep the last token only + if has_started: + segment_tokens[-1].append(curr_tokens[-1].item()) + + # Accumulate tokens + if has_started: + chunk_tokens.append(curr_tokens) + if not is_sot(curr_tokens): + chunk_tokens_nosot.append(curr_tokens[-1].item()) + else: + if verbose and not whisper_options["verbose"]: + # Reproduce whisper verbose (1/2) + print("Detecting language using up to the first 30 seconds. Use `--language` to specify the language") + + embedding_weights = None + def hook_output_logits(layer, ins, outs): + nonlocal no_speech_prob, chunk_logprobs, segment_tokens, chunk_tokens, chunk_tokens_nosot, last_chunk_token, embedding_weights, has_started + + if embedding_weights is None: + embedding_weights = torch.transpose(model.decoder.token_embedding.weight, 0, 1).to(outs[0].dtype) + + # Get the probability of silence + if sot_index is not None: + logits = (outs[0][sot_index,:] @ embedding_weights).float() + logits = logits.softmax(dim=-1) + no_speech_prob = logits[tokenizer.no_speech].item() + + # Get the log-probabilities of tokens (we don't know yet which one will be chosen) + if has_started: + logits = (outs[0][-1:,:] @ embedding_weights).float() + tokens = torch.cat(chunk_tokens).unsqueeze(0) + for logit_filter in logit_filters: + logit_filter.apply(logits, tokens) + logits = F.log_softmax(logits.squeeze(0), dim=-1) + chunk_logprobs.append(logits) + + if WHIPSER_GE_20230306 and has_reached_decoding_limit(): + last_chunk_token = torch.argmax(logits).item() + else: + last_chunk_token = None + + try: + + # Add hooks to the model, to get tokens and attention weights on the fly + all_hooks = [] + all_hooks.append(model.encoder.conv1.register_forward_hook(hook_mfcc)) + all_hooks.append(model.decoder.token_embedding.register_forward_hook(hook_input_tokens)) + nblocks = len(model.decoder.blocks) + j = 0 + for i, block in enumerate(model.decoder.blocks): + if i < nblocks - word_alignement_most_top_layers: + continue + all_hooks.append( + block.cross_attn.register_forward_hook( + lambda layer, ins, outs, index=j: hook_attention_weights(layer, ins, outs, index)) + ) + j += 1 + if compute_word_confidence or no_speech_threshold is not None: + all_hooks.append(model.decoder.ln.register_forward_hook(hook_output_logits)) + + transcription = model.transcribe(audio, **whisper_options) + + finally: + + # Remove hooks + for hook in all_hooks: + hook.remove() + + # Finalize (collect last segment) + may_flush_segment() + segment_tokens.pop(-1) + + token_special_idx = min(tokenizer.sot, tokenizer.eot) + def filter_tokens(tokens): + while len(tokens) and tokens[0] >= token_special_idx: + tokens = tokens[1:] + while len(tokens) and tokens[-1] >= token_special_idx: + tokens = tokens[:-1] + return tokens + + assert len(segment_tokens) == len(timestamped_word_segments), f"Inconsistent number of segments: tokens ({len(segment_tokens)}) != timestamped_word_segments ({len(timestamped_word_segments)})" + assert len(segment_avglogprobs) == len(segment_tokens), f"Inconsistent number of segments: avg logprobs ({len(segment_avglogprobs)}) != tokens ({len(segment_tokens)})" + assert len(segment_logprobs) == len(segment_tokens), f"Inconsistent number of segments: logprobs ({len(segment_logprobs)}) != tokens ({len(segment_tokens)})" + + whisper_segments = transcription["segments"] + l1 = len(whisper_segments) + l2 = len(timestamped_word_segments) + if l1 != l2 and l1 != 0: + logger.warning(f"Inconsistent number of segments: whisper_segments ({l1}) != timestamped_word_segments ({l2})") + assert l1 == l2 or l1 == 0, f"Inconsistent number of segments: whisper_segments ({l1}) != timestamped_word_segments ({l2})" + + logger.debug("Compile results") + words = [] + for i, (segment, timestamped_words, token, avglogprob, logprobs) in enumerate(zip(whisper_segments, timestamped_word_segments, segment_tokens, segment_avglogprobs, segment_logprobs)): + timestamped_tokens = filter_tokens(token) + whisper_tokens = filter_tokens(segment["tokens"]) + if timestamped_tokens != whisper_tokens: + if len(timestamped_tokens) == len(whisper_tokens) + 1: + logger.warning(f"An additional token was added on segment {i}") + elif WHIPSER_GE_20230306 and len(whisper_tokens) == 0: + logger.warning(f"Whisper has empty segment {i}") + assert segment["end"] == segment["start"], f"Fatal Error: Got empty segment {i} with non-zero duration" + segment["tokens"] = timestamped_tokens + segment["text"] = tokenizer.decode(timestamped_tokens) + else: + assert len(timestamped_tokens) < len(whisper_tokens) and timestamped_tokens == whisper_tokens[:len(timestamped_tokens)], \ + f"Fatal Error: Got inconsistent text for segment {i}:\n({len(timestamped_tokens)})\n{tokenizer.decode_with_timestamps(timestamped_tokens)}\n{timestamped_tokens}\n!=\n({len(whisper_tokens)})\n{tokenizer.decode_with_timestamps(whisper_tokens)}\n{whisper_tokens[:len(timestamped_tokens)]}" + segment["tokens"] = token if WHIPSER_GE_20230306 else timestamped_tokens # tokens include special timestamp tokens since 20230306 + segment["text"] = tokenizer.decode(segment["tokens"]) + logger.warning(f"Text had to be shortned on segment {i}:\n{tokenizer.decode(timestamped_tokens)}\n!=\n{tokenizer.decode(whisper_tokens)}") + timestamped_words[-1]["avg_logprob_reliable"] = False + + offset = segment["seek"] * HOP_LENGTH / SAMPLE_RATE + for timestamped_word in timestamped_words: + timestamped_word["start"] += offset + timestamped_word["end"] += offset + timestamped_word["idx_segment"] = i + + if compute_word_confidence: + if "avg_logprob_reliable" not in timestamped_words[-1] or timestamped_words[-1]["avg_logprob_reliable"]: + # assert abs(segment["avg_logprob"] - avglogprob) < 1e-2, f"Fatal Error: Got inconsistent logprob for segment {i}: {segment['avg_logprob']} != {avglogprob}" + if abs(segment["avg_logprob"] - avglogprob) >= 1e-2: + logger.warning(f"Recomputed different logprob for segment {i}: {avglogprob} != {segment['avg_logprob']}") + if include_punctuation_in_confidence: + segment["confidence"] = round_confidence(logprobs.mean().exp().item()) + else: + logprobs_nopunc = [] + i_end = 0 + for timestamped_word in timestamped_words: + i_start = i_end + tokens = timestamped_word["tokens"] + i_end += len(tokens) + + assert i_end <= len(logprobs), f"Fatal Error: Got out-of-bound index for segment {i}: {i_end} > {len(logprobs)}" + if include_punctuation_in_confidence: + word_logprobs = logprobs[i_start:i_end] + else: + while len(tokens) > 1 and len(tokens[-1]) and tokens[-1][-1] in _punctuation: # Note: look at the last character of token, to take into account "...", "!!", etc. + tokens = tokens[:-1] + word_logprobs = logprobs[i_start:i_start + len(tokens)] + logprobs_nopunc.append(word_logprobs) + + timestamped_word["confidence"] = round_confidence(word_logprobs.mean().exp().item() if len(word_logprobs) else 0.0) + + if i_end not in [len(logprobs), len(logprobs)-1]: + logger.warning(f"Got inconsistent length for segment {i} ({len(logprobs)} != {i_end}). Some words have been ignored.") + if not include_punctuation_in_confidence: + logprobs_nopunc = torch.cat(logprobs_nopunc) + segment["confidence"] = round_confidence(logprobs_nopunc.mean().exp().item()) + + words.extend(timestamped_words) + + return transcription, words + +def _transcribe_timestamped_naive( + model, + audio, + remove_punctuation_from_words, + compute_word_confidence, + include_punctuation_in_confidence, + refine_whisper_precision_nframes, + alignment_heads, + plot_word_alignment, + word_alignement_most_top_layers, + detect_disfluencies, + trust_whisper_timestamps, + min_word_duration, + **whisper_options, +): + verbose = whisper_options["verbose"] + whisper_options["verbose"] = None if whisper_options["verbose"] is True else whisper_options["verbose"] # We will print intermediate results ourselves + language = whisper_options["language"] + refine_whisper_precision_sec = refine_whisper_precision_nframes * AUDIO_TIME_PER_TOKEN + + word_alignement_most_top_layers = float("inf") if word_alignement_most_top_layers is None else word_alignement_most_top_layers + + audio = get_audio_tensor(audio) + audio_duration = audio.shape[-1] / SAMPLE_RATE + + if verbose and language is None and not whisper_options["verbose"]: + # Reproduce whisper verbose (1/2) + print("Detecting language using up to the first 30 seconds. Use `--language` to specify the language") + + transcription = model.transcribe(audio, **whisper_options) + + if verbose and language is None and not whisper_options["verbose"]: + # Reproduce whisper verbose (2/2) + print(f"Detected language: {whisper.tokenizer.LANGUAGES[transcription['language']].title()}") + sys.stdout.flush() + + language = norm_language(transcription["language"]) + + tokenizer = whisper.tokenizer.get_tokenizer(model.is_multilingual, task=whisper_options["task"], language=language) + use_space = should_use_space(language) + + attention_weights = [[] for _ in range(min(word_alignement_most_top_layers,len(model.decoder.blocks)))] + + try: + + all_hooks = [] + + # Hook the model + nblocks = len(model.decoder.blocks) + j = 0 + for i, block in enumerate(model.decoder.blocks): + if i < nblocks - word_alignement_most_top_layers: + continue + all_hooks.append( + block.cross_attn.register_forward_hook( + lambda layer, ins, outs, index=j: attention_weights.__setitem__(index, outs[-1]) + ) + ) + j += 1 + + + # When not relying on Whisper timestamps + current_tokens = [] + token_to_idx_segment = [] + + words = [] + previous_end = 0 + whisper_segments = transcription["segments"] + for i_segment, segment in enumerate(whisper_segments): + + # Note: this could also be a fix to issue #61 where a "<|te|>" token was predicted + # segment["tokens"] = [t for t in segment["tokens"] if t < tokenizer.eot or t >= tokenizer.timestamp_begin] + + start = end = tokens = None + if trust_whisper_timestamps: + + start = segment["start"] + end = segment["end"] + if end < start: + # Whisper is wrong on the prediction of segment end + end = min(audio_duration, start + SEGMENT_DURATION) + + start_margin_min = start - refine_whisper_precision_sec + start_margin_max = start + refine_whisper_precision_sec + if start >= audio_duration - min_word_duration or (previous_end >= start_margin_min and previous_end <= start_margin_max): + # Make start as accurate as possible (as the decoding will start with timestamp <|0|>) + start = previous_end + else: + # Fallback + start = start_margin_min + + if start > audio_duration - min_word_duration: + # Skip last segment if too short + logger.warning(f"Skipping segment outside of audio duration {audio_duration} (original: {segment['start']}-{segment['end']}, new: {start}-XXX)") + continue + + end_margin_min = end - refine_whisper_precision_sec + end_margin_max = end + refine_whisper_precision_sec + if i_segment < len(whisper_segments) - 1: + # Try to enforce: + # end + min_word_duration <= next start + refine_whisper_precision_sec + end_margin_max2 = whisper_segments[i_segment + 1]["start"] + refine_whisper_precision_sec - min_word_duration + if end_margin_max2 >= end_margin_min: + end_margin_max = min(end_margin_max2, end_margin_max) + end = min(audio_duration, end_margin_max) + + if end < start + min_word_duration: + logger.warning(f"Got super short segment (original from whisper: {segment['start']}-{segment['end']}, new: {start, end})") + end = min(audio_duration, start + min_word_duration) + if end <= start: + logger.warning(f"Skipping this short segment occuring too close to the end of the audio") + continue + + tokens = segment["tokens"] + + else: + + seek = segment["seek"] + new_tokens = segment["tokens"] + if not len(new_tokens): + continue + # Add timestamps that will be needed after + if new_tokens[0] < tokenizer.timestamp_begin: + relative_start = segment["start"] - (seek * HOP_LENGTH / SAMPLE_RATE) + start_token = round(relative_start * SAMPLE_RATE / AUDIO_SAMPLES_PER_TOKEN) + tokenizer.timestamp_begin + new_tokens = [start_token] + new_tokens + if new_tokens[-1] < tokenizer.timestamp_begin: + relative_end = segment["end"] - (seek * HOP_LENGTH / SAMPLE_RATE) + end_token = round(relative_end * SAMPLE_RATE / AUDIO_SAMPLES_PER_TOKEN) + tokenizer.timestamp_begin + new_tokens = new_tokens + [end_token] + + current_tokens.extend(new_tokens) + token_to_idx_segment.extend([i_segment] * len(new_tokens)) + + next_seek = whisper_segments[i_segment+1]["seek"] if i_segment < len(whisper_segments) - 1 else None + if seek != next_seek: + start = float(seek * HOP_LENGTH / SAMPLE_RATE) + assert start < audio_duration, f"Got start {start} which is outside of audio duration {audio_duration}" + end = min(start + SEGMENT_DURATION, audio_duration) + tokens = current_tokens + + if tokens is None or not len(tokens): + continue + + start_sample = min(round(start * SAMPLE_RATE), audio.shape[-1]) + end_sample = min(round(end * SAMPLE_RATE), audio.shape[-1]) + + # Extract features on the audio segment + sub_audio = audio_minimum_padding(audio[start_sample:end_sample]) + + mfcc = whisper.log_mel_spectrogram(sub_audio).to(model.device) + mfcc = whisper.pad_or_trim(mfcc, N_FRAMES) + mfcc = mfcc.unsqueeze(0) + + segment_tokens_check = [] + if tokens[0] >= tokenizer.timestamp_begin: + segment_tokens_check.append(tokens[0]) + while tokens[0] >= tokenizer.timestamp_begin: + tokens = tokens[1:] + assert len(tokens), "Got transcription with only timestamps!" + last_token_check = None + while tokens[-1] >= tokenizer.timestamp_begin: + last_token_check = tokens[-1] + tokens = tokens[:-1] + + tokens = [ + *tokenizer.sot_sequence, + tokenizer.timestamp_begin, + ] + tokens + + i_start = len(tokenizer.sot_sequence) + + with torch.no_grad(): + logprobs = model(mfcc, torch.Tensor(tokens).int().to(model.device).unsqueeze(0)) + logprobs = F.log_softmax(logprobs, dim=-1) + + end_token = tokenizer.timestamp_begin + round(min(N_FRAMES * HOP_LENGTH, end_sample - start_sample) // AUDIO_SAMPLES_PER_TOKEN) + tokens = tokens[i_start:] + [end_token] + attention_weights = [w[:, :, i_start-1:, :] for w in attention_weights] + + ws = perform_word_alignment( + tokens, + attention_weights, + tokenizer, + use_space=use_space, + alignment_heads=alignment_heads, + remove_punctuation_from_words=remove_punctuation_from_words, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + detect_disfluencies=detect_disfluencies, + mfcc=mfcc, + plot=plot_word_alignment, + ) + + segment_logprobs = [] + i_token = 1 + + for word in ws: + + word["start"] = round(word["start"] + start, 2) + word["end"] = round(word["end"] + start, 2) + + if trust_whisper_timestamps: + word.update({"idx_segment": i_segment}) + else: + assert i_token < len(tokens) + assert not len(word["tokens_indices"]) or word["tokens_indices"][0] == tokens[i_token] + word.update({"idx_segment": token_to_idx_segment[i_token]}) + i_token += len(word["tokens"]) + while i_token < len(tokens) and tokens[i_token] >= tokenizer.timestamp_begin: + i_token += 1 + + tok_indices = word["tokens_indices"] + segment_tokens_check.extend(tok_indices) + + if compute_word_confidence: + tok = word["tokens"] + i_end = i_start + len(tok) + if include_punctuation_in_confidence: + while len(tok) > 1 and len(tok[-1]) and tok[-1][-1] in _punctuation: # Note: look at the last character of token, to take into account "...", "!!", etc. + tok = tok[:-1] + tok_indices = tok_indices[:-1] + word_logprobs = [logprobs[:, step, tok] for (step, tok) in zip(range(i_start, i_start + len(tok_indices)), tok_indices)] + i_start = i_end + if len(word_logprobs): + word_logprobs = torch.cat(word_logprobs) + segment_logprobs.append(word_logprobs) + word_confidence = word_logprobs.mean().exp().item() + else: + word_confidence = 0 + word.update({"confidence": round_confidence(word_confidence)}) + + words.append(word) + + if verbose: + print_timestamped(word) + + if last_token_check is not None: + segment_tokens_check.append(last_token_check) + if trust_whisper_timestamps: + if segment_tokens_check != segment["tokens"]: + assert len(segment_tokens_check) < len(segment["tokens"]) and segment_tokens_check[:-1] == segment["tokens"][:len(segment_tokens_check)-1], \ + f"Got inconsistent tokens: {tokenizer.decode(segment_tokens_check)} != {tokenizer.decode(segment['tokens'])}" + segment["tokens"] = segment_tokens_check + segment["text"] = tokenizer.decode(segment["tokens"]) + # else: TODO + + if len(segment_logprobs): + segment.update({"confidence": round_confidence(torch.cat(segment_logprobs).mean().exp().item())}) + + if len(ws): + previous_end = ws[-1]["end"] + + if not trust_whisper_timestamps: + current_tokens = [] + token_to_idx_segment = [] + + finally: + + # Remove hooks + for hook in all_hooks: + hook.remove() + + return (transcription, words) + +def get_audio_tensor(audio, device="cpu"): + if isinstance(audio, str): + audio = whisper.load_audio(audio) + if isinstance(audio, np.ndarray): + audio = torch.Tensor(audio) + else: + assert isinstance(audio, torch.Tensor), f"Got unexpected audio of type {type(audio)}" + return audio.to(device) + +def audio_minimum_padding(audio): + if audio.shape[-1] <= 200: + return whisper.pad_or_trim(audio, 201) + return audio + + +def should_use_space(language): + return norm_language(language) not in ["zh", "ja", "th", "lo", "my"] + +def norm_language(language): + if language is None: + return "en" + return whisper.tokenizer.TO_LANGUAGE_CODE.get(language.lower(), language) + +def print_timestamped(w): + line = f"[{format_timestamp(w['start'])} --> {format_timestamp(w['end'])}] {w['text']}\n" + # compared to just `print(line)`, this replaces any character not representable using + # the system default encoding with an '?', avoiding UnicodeEncodeError. + sys.stdout.buffer.write(line.encode(sys.getdefaultencoding(), errors="replace")) + sys.stdout.flush() + + +def get_logit_filters(model, whisper_options, prompt = None): + decoding_options = get_decoding_options(whisper_options) + if "initial_prompt" in decoding_options: + prompt0 = decoding_options.pop("initial_prompt") + if prompt is None: + prompt = prompt0 + if prompt is not None: + decoding_options["prompt"] = prompt + decoding_options = whisper.DecodingOptions( + without_timestamps=False, + max_initial_timestamp=1.0, + prefix=None, + suppress_blank=True, + **decoding_options + ) + + # This performs some checks on the options + decoding_task = whisper.decoding.DecodingTask(model, decoding_options) + return decoding_task.logit_filters + +def get_decoding_options(whisper_options): + return dict([(k,v) for (k,v) in whisper_options.items() + if k not in [ + "no_speech_threshold", + "logprob_threshold", + "compression_ratio_threshold", + "condition_on_previous_text", + "verbose", + ] + ]) + + +def perform_word_alignment( + tokens, + attention_weights, + tokenizer, + use_space=True, + mfcc=None, + refine_whisper_precision_nframes=0, + remove_punctuation_from_words=False, + include_punctuation_in_timing=False, # Was True before 1.9 + unfinished_decoding=False, + alignment_heads=None, + medfilt_width=9, + qk_scale=1.0, + detect_disfluencies=True, + subwords_can_be_empty=True, # Was False before 1.11 + plot=False, + debug=False, +): + """ + Perform word alignment on the given tokens and attention weights. + Returns a list of (word, start_time, end_time) tuples. + + tokens: list of tokens (integers) + attention_weights: list of attention weights (torch tensors) + tokenizer: tokenizer used to tokenize the text + use_space: whether to use spaces to split the tokens into words (should be true for all languages except Japanese, Chinese, ...) + mfcc: MFCC features (used to identify padded region, and for plotting) + refine_whisper_precision_nframes: precision time + remove_punctuation_from_words: whether to remove punctuation from words + include_punctuation_in_timing: whether to include punctuation in the timing of (previous) words + unfinished_decoding: whether the decoding is unfinished (e.g. because the model is stuck) + alignment_heads: list of attention heads to use for alignment + medfilt_width: width of the median filter used to smooth the attention weights + qk_scale: scale factor applied to the attention weights + plot: whether to plot the word alignment + debug: whether to print debug information + """ + + assert len(tokens) > 1, f"Got unexpected sequence of tokens of length {len(tokens)} {tokenizer.decode_with_timestamps(tokens)}" + start_token = tokens[0] - tokenizer.timestamp_begin + end_token = tokens[-1] - tokenizer.timestamp_begin + + # Check start / end tokens + if start_token < 0: + raise RuntimeError(f"Missing start token in: {tokenizer.decode_with_timestamps(tokens)}") + if len(tokens) == 1 or end_token < 0: + # This can happens when Whisper is stucked as a Language Model + if debug: + logger.debug(f"Missing end token in {tokenizer.decode_with_timestamps(tokens)}") + end_token = N_FRAMES // 2 + if end_token == start_token and refine_whisper_precision_nframes == 0: + if debug: + logger.debug(f"Got empty segment in {tokenizer.decode_with_timestamps(tokens)}") + return [] + + # Let a minimal duration given the number of tokens (see https://github.com/linto-ai/whisper-timestamped/issues/67) + end_token = min(N_FRAMES // 2, max(end_token, start_token + len(tokens))) + + # Put some margin around the segment + if refine_whisper_precision_nframes > 0: + start_token = max(start_token - refine_whisper_precision_nframes, 0) + end_token = min(end_token + refine_whisper_precision_nframes, N_FRAMES // 2) + + if end_token <= start_token: + raise RuntimeError(f"Got segment with null or negative duration {tokenizer.decode_with_timestamps(tokens)}: {start_token} {end_token}") + + start_time = start_token * AUDIO_TIME_PER_TOKEN + # end_time = end_token * AUDIO_TIME_PER_TOKEN + + split_tokens = split_tokens_on_spaces if use_space else split_tokens_on_unicode + words, word_tokens, word_tokens_indices = split_tokens(tokens, tokenizer, remove_punctuation_from_words=remove_punctuation_from_words) + + # If the last token is a punctuation that comes after a word + # group this final punctuation with the final timestamp + # This is to avoid assigning the final punctuation to a big silence or a noise/music background coming after + num_punctuations_per_tokens = [ + 0 if len(w) == 1 or w[-1] not in _punctuation else 1 + for w in word_tokens + ] + if include_punctuation_in_timing: + num_punctuations_per_tokens[:-2]=[0]*(len(num_punctuations_per_tokens)-2) + + for i, w in enumerate(attention_weights): + assert w.shape[-2] == len(tokens), f"Attention weights have wrong shape: {w.shape[-2]} (expected {len(tokens)})." + weights = torch.cat(attention_weights) # layers * heads * tokens * frames + + num_tokens = weights.shape[-2] + num_frames = end_token - start_token + if num_tokens > num_frames: + logger.warning(f"Too much text ({num_tokens} tokens) for the given number of frames ({num_frames}) in: {tokenizer.decode_with_timestamps(tokens)}\nThe end of the text will be removed.") + return perform_word_alignment( + tokens[:num_frames-1] + [tokens[-1]], + [torch.cat([w[:, :, :num_frames-1, :], w[:, :, -1:, :]], dim=-2) + for w in attention_weights], + tokenizer, + use_space=use_space, + refine_whisper_precision_nframes=refine_whisper_precision_nframes, + medfilt_width=medfilt_width, + qk_scale=qk_scale, + alignment_heads=alignment_heads, + mfcc=mfcc, + plot=plot, + remove_punctuation_from_words=remove_punctuation_from_words, + detect_disfluencies=detect_disfluencies, + subwords_can_be_empty=subwords_can_be_empty, + unfinished_decoding=True, + debug=debug, + ) + + assert end_token <= weights.shape[-1] + assert len(tokens) == num_tokens + + weights = weights[..., start_token: end_token].cpu() # layers * heads * tokens * frames + + if alignment_heads is None: + weights = weights.reshape(-1, *weights.shape[-2:]) # N * tokens * frames + else: + weights = torch.stack([weights[l][h] for l, h in alignment_heads.indices().T]) + weights = median_filter(weights, (1, 1, medfilt_width)) + weights = torch.tensor(weights * qk_scale).softmax(dim=-1) + weights = weights.mean(axis=(0)) # average over layers and heads # tokens * frames + weights = weights / weights.norm(dim=-2, keepdim=True) # This was before the mean before 1.9 + weights = -weights.double().numpy() + worse_weight = 0 + + # Get the limit of audio duration + max_duration = None + if mfcc is not None: + max_duration = find_start_padding(mfcc) + if max_duration is not None: + max_duration = max_duration // 2 + + # Enforce the max duration + if max_duration: + if start_token >= max_duration: + logger.warning(f"Got start time outside of audio boundary") + else: + weights[:-1, max_duration:] = worse_weight + + # Encourage to start early + weights[0, 0] = weights.min() + # weights[0, refine_whisper_precision_nframes*2:] = worse_weight + + if subwords_can_be_empty: + step_pattern = dtw.stepPattern.symmetric1 + else: + # Similar as "symmetric1" but without the possibility to have the same timestamp for two tokens + step_pattern = dtw.stepPattern.StepPattern(dtw.stepPattern._c( + 1, 1, 1, -1, + 1, 0, 0, 1, + 2, 0, 1, -1, + 2, 0, 0, 1, + )) + alignment = dtw.dtw(weights, step_pattern=step_pattern) + + global num_alignment_for_plot + num_alignment_for_plot += 1 + + if plot: + import matplotlib.pyplot as plt + import matplotlib.ticker as ticker + + plot_mfcc = 1 if mfcc is not None else 0 + plot_disfluencies = 1 if detect_disfluencies else 0 + nplots = (1 + plot_mfcc + plot_disfluencies) + + plt.subplots(nplots, 1, figsize=(16, 9), gridspec_kw={'height_ratios': [3] + [1] * (nplots - 1)}) + plt.subplot(nplots, 1, 1, frameon=False) + + plt.imshow(-weights, aspect="auto") + plt.plot(alignment.index2s, alignment.index1s, color="red") + + xticks = np.arange(0, weights.shape[1], 1 / AUDIO_TIME_PER_TOKEN) + xticklabels = [round_timestamp(x) for x in xticks * AUDIO_TIME_PER_TOKEN + start_time] + + ylims = plt.gca().get_ylim() + + ax = plt.gca() + ax.tick_params('both', length=0, width=0, which='minor', pad=6) + + ax.yaxis.set_ticks_position("left") + ax.yaxis.set_label_position("left") + ax.invert_yaxis() + ax.set_ylim(ylims) + + major_ticks = [-0.5] + minor_ticks = [] + current_y = 0 + + for word, word_token in zip(words, word_tokens): + minor_ticks.append(current_y + len(word_token) / 2 - 0.5) + current_y += len(word_token) + major_ticks.append(current_y - 0.5) + + words_with_subwords = ["|".join(s).strip() for (w, s) in zip(words, word_tokens)] + + ax.yaxis.set_minor_locator(ticker.FixedLocator(minor_ticks)) + ax.yaxis.set_minor_formatter( + ticker.FixedFormatter(words_with_subwords)) + ax.set_yticks(major_ticks) + ax.yaxis.set_major_formatter(ticker.NullFormatter()) + for y in major_ticks: + plt.axhline(y, color="black", linestyle="dashed") + + plt.ylabel("Words") + + if plot_mfcc: + plt.xticks(xticks) + plt.setp(plt.gca().get_xticklabels(), visible=False) + + xticks *= 2 + + plt.subplot(nplots, 1, 2, frameon=False) + plt.imshow(mfcc[0, :, start_token * 2: end_token * 2].cpu(), aspect="auto", origin="lower") + plt.yticks([]) + plt.ylabel("MFCC") + + plt.xticks(xticks, xticklabels) + plt.xlabel("Time (s)") + + jumps = np.diff(alignment.index1s) + jumps = np.pad(jumps, (1, 0), constant_values=1) + jumps = jumps.astype(bool) + jumps = alignment.index2s[jumps] + jumps = np.pad(jumps, (0, 1), constant_values=alignment.index2s[-1]) + + jumps_start = jumps + disfluences = {} + if detect_disfluencies: + jumps_start = copy.copy(jumps) + + for (i_token, (tok, begin, end)) in enumerate(zip(tokens, jumps[:-1], jumps[1:])): + + # Find local maxima in the portion of attention weights + attention_weights = -weights[i_token, begin:end] + peaks, properties = find_peaks(attention_weights, + width=3, + prominence=0.02, + ) + # If more than + if len(peaks) > 1: + if "left_ips" in properties: + left = [round(x) for x in properties["left_ips"]] + else: + left = properties["left_bases"] + + new_begin = left[-1] + begin + + jumps_start[i_token] = new_begin + + if new_begin != begin: + is_punctuation = tokenizer.decode_with_timestamps([tok]) in _punctuation + if not is_punctuation: + disfluences[i_token] = (begin, jumps_start[i_token]) + else: + disfluences[i_token+1] = (begin, end) + + if plot: + plt.subplot(nplots, 1, 2 + plot_mfcc, frameon=False) + plt.plot(range(begin,end), attention_weights) + plt.xlim(0, end) + + for i, p in enumerate(peaks): + color = 'red' if (len(peaks)>1 and i1 else 'green' + barxxy(begin+properties["left_bases"], begin+properties["right_bases"], properties.get("prominences",[1]*len(properties["left_bases"])), alpha=0.5, + # put a line with a custom color + linewidth=1, edgecolor=color + ) + if "left_ips" in properties: + for left in properties["left_ips"]: + plt.vlines(begin+left, 0, 0.5, color='green', linestyle=':') + for right in properties["right_ips"]: + plt.vlines(begin+right, 0, 0.5, color='red', linestyle=':') + + + # display the word-level timestamps in a table + word_boundaries = np.cumsum([len(t) for t in word_tokens]) + word_boundaries = np.pad(word_boundaries, (1, 0)) + begin_times = jumps_start[word_boundaries[:-1]] + end_times = jumps[word_boundaries[1:] - num_punctuations_per_tokens] + + begin_times = begin_times * AUDIO_TIME_PER_TOKEN + end_times = end_times * AUDIO_TIME_PER_TOKEN + + if detect_disfluencies: + to_be_added = [] + i_start = 0 + for i_word, toks in enumerate(word_tokens[:-1]): + i_end = i_start + len(toks) + if i_start in disfluences and i_word > 0: + begin, end = disfluences[i_start] + begin *= AUDIO_TIME_PER_TOKEN + end *= AUDIO_TIME_PER_TOKEN + to_be_added.append((i_word, begin, end)) + i_start = i_end + # Add from the end to avoid messing up the indices + for (i_word, begin, end) in to_be_added[-1::-1]: + words.insert(i_word, DISFLUENCY_MARK) + word_tokens.insert(i_word, []) + word_tokens_indices.insert(i_word, []) + begin_times = np.insert(begin_times, i_word, begin) + end_times = np.insert(end_times, i_word, end) + + # Ignore start / end tokens + if not refine_whisper_precision_nframes: + begin_times[1] = begin_times[0] + if not refine_whisper_precision_nframes: + end_times[-2] = end_times[-1] + if unfinished_decoding: + words = words[1:] + word_tokens = word_tokens[1:] + word_tokens_indices = word_tokens_indices[1:] + begin_times = begin_times[1:] + end_times = end_times[1:] + else: + words = words[1:-1] + word_tokens = word_tokens[1:-1] + word_tokens_indices = word_tokens_indices[1:-1] + begin_times = begin_times[1:-1] + end_times = end_times[1:-1] + + if plot: + ymin = 1 + + plt.subplot(nplots, 1, 1) + for i, (w, ws, begin, end) in enumerate(zip(words, word_tokens, begin_times, end_times)): + ymax = ymin + len(ws) + if mfcc is None: + plt.text(begin / AUDIO_TIME_PER_TOKEN, num_tokens-0.5, w, ha="left", va="top", color="red") + for x in [begin, end,]: + plt.axvline(x / AUDIO_TIME_PER_TOKEN, color="red", linestyle="dotted", + ymin=1-ymin/num_tokens, + ymax=0, # 1-ymax/num_tokens, + ) + ymin = ymax + + if plot_mfcc: + plt.subplot(nplots, 1, 2) + for i, (w, begin, end) in enumerate(zip(words, begin_times, end_times)): + plt.text(begin * 2 / AUDIO_TIME_PER_TOKEN, mfcc.shape[-2]*1.05, w, ha="left", va="bottom", color="red") + for x in [begin, end,]: + plt.axvline(x * 2 / AUDIO_TIME_PER_TOKEN, color="red", linestyle="dotted") + + if isinstance(plot, str): + plt.savefig(f"{plot}.alignment{num_alignment_for_plot:03d}.jpg", bbox_inches='tight', pad_inches=0) + else: + plt.show() + + return [ + dict( + text=word, + start=round_timestamp(begin + start_time), + end=round_timestamp(end + start_time), + tokens=tokens, + tokens_indices=tokens_indices, + ) + for word, begin, end, tokens, tokens_indices in zip(words, begin_times, end_times, word_tokens, word_tokens_indices) + if not word.startswith("<|") + ] + +def find_start_padding(mfcc): + """ Return start of padding given the mfcc, or None if there is no padding """ + last_mfcc = mfcc[0, :, -1] + if torch.min(last_mfcc) == torch.max(last_mfcc) == 0: + candidate_index = mfcc.shape[-1] - 2 + while candidate_index > 0: + candidate = mfcc[0, :, candidate_index] + if not torch.equal(candidate, last_mfcc): + return candidate_index + 1 + candidate_index -= 1 + return 0 # WTF!? + +def round_confidence(x): + return round(x, 3) + +def round_timestamp(x): + return round(x, 2) + +_punctuation = "".join(c for c in string.punctuation if c not in ["-", "'"]) + "。,!?:”、…" + +def split_tokens_on_unicode(tokens: list, tokenizer, remove_punctuation_from_words=False, isolate_punctuations=False): + words = [] + word_tokens = [] + word_tokens_indices = [] + current_tokens = [] + + for token in tokens: + current_tokens.append(token) + decoded = tokenizer.decode_with_timestamps([t for t in current_tokens if t < tokenizer.eot or t >= tokenizer.timestamp_begin]) + if "\ufffd" not in decoded: + empty_tokens = [""] * (len(current_tokens)-1) + punctuation = not isolate_punctuations and (decoded.strip() and decoded.strip() in _punctuation) + previous_special = len(word_tokens_indices) > 0 and (word_tokens_indices[-1][-1] >= tokenizer.timestamp_begin) + if punctuation and not previous_special: + if len(words) == 0: + words = [""] + word_tokens = [[]] + if not remove_punctuation_from_words: + words[-1] += decoded + word_tokens[-1].extend(empty_tokens + [decoded]) + word_tokens_indices[-1].extend(current_tokens) + else: + words.append(decoded) + word_tokens.append(empty_tokens + [decoded]) + word_tokens_indices.append(current_tokens) + current_tokens = [] + + return words, word_tokens, word_tokens_indices + + +def split_tokens_on_spaces(tokens: torch.Tensor, tokenizer, remove_punctuation_from_words=False): + subwords, subword_tokens_list, subword_tokens_indices_list = split_tokens_on_unicode(tokens, tokenizer, remove_punctuation_from_words=remove_punctuation_from_words) + words = [] + word_tokens = [] + word_tokens_indices = [] + + for i, (subword, subword_tokens, subword_tokens_indices) in enumerate(zip(subwords, subword_tokens_list, subword_tokens_indices_list)): + special = (subword_tokens_indices[0] >= tokenizer.timestamp_begin) + previous_special = (i > 0) and (subword_tokens_indices_list[i-1][0] >= tokenizer.timestamp_begin) + next_special = (i < len(subword_tokens_indices_list)-1) and (subword_tokens_indices_list[i+1][0] >= tokenizer.timestamp_begin) + previous_space = (i > 0) and (not subwords[i-1].strip()) + is_space = not subword.strip() + with_space = subword.startswith(" ") and not is_space + punctuation = not is_space and subword.strip() in _punctuation + if special or (not previous_space and (previous_special or (with_space and not punctuation) or (is_space and not next_special))): + words.append(subword.strip()) + word_tokens.append(subword_tokens) + word_tokens_indices.append(subword_tokens_indices) + else: + words[-1] = words[-1] + subword.strip() + word_tokens[-1].extend(subword_tokens) + word_tokens_indices[-1].extend(subword_tokens_indices) + + return words, word_tokens, word_tokens_indices + +silero_vad_model = None +def get_vad_segments(audio, + output_sample=False, + min_speech_duration=0.1, + min_silence_duration=0.1, + dilatation=0.5, + ): + """ + Get speech segments from audio using Silero VAD + parameters: + audio: torch.Tensor + audio data *in 16kHz* + output_sample: bool + if True, return start and end in samples instead of seconds + min_speech_duration: float + minimum duration (in sec) of a speech segment + min_silence_duration: float + minimum duration (in sec) of a silence segment + dilatation: float + how much (in sec) to enlarge each speech segment detected by the VAD + """ + global silero_vad_model, silero_get_speech_ts + + if silero_vad_model is None: + import onnxruntime + onnxruntime.set_default_logger_severity(3) # Remove warning "Removing initializer 'XXX'. It is not used by any node and should be removed from the model." + repo_or_dir = os.path.expanduser("~/.cache/torch/hub/snakers4_silero-vad_master") + source = "local" + if not os.path.exists(repo_or_dir): + repo_or_dir = "snakers4/silero-vad" + source = "github" + silero_vad_model, utils = torch.hub.load(repo_or_dir=repo_or_dir, model="silero_vad", onnx=True, source=source) + silero_get_speech_ts = utils[0] + + # Cheap normalization of the volume + audio = audio / max(0.1, audio.abs().max()) + + segments = silero_get_speech_ts(audio, silero_vad_model, + min_speech_duration_ms = round(min_speech_duration * 1000), + min_silence_duration_ms = round(min_silence_duration * 1000), + return_seconds = False, + ) + + if dilatation > 0: + dilatation = round(dilatation * SAMPLE_RATE) + new_segments = [] + for seg in segments: + new_seg = { + "start": max(0, seg["start"] - dilatation), + "end": min(len(audio), seg["end"] + dilatation) + } + if len(new_segments) > 0 and new_segments[-1]["end"] >= new_seg["start"]: + new_segments[-1]["end"] = new_seg["end"] + else: + new_segments.append(new_seg) + segments = new_segments + + ratio = 1 if output_sample else 1 / SAMPLE_RATE + + if ratio != 1: + for seg in segments: + seg["start"] *= ratio + seg["end"] *= ratio + if output_sample: + for seg in segments: + seg["start"] = round(seg["start"]) + seg["end"] = round(seg["end"]) + return segments + +def remove_non_speech(audio, + use_sample=False, + min_speech_duration=0.1, + min_silence_duration=1, + plot=False, + ): + """ + Remove non-speech segments from audio (using Silero VAD), + glue the speech segments together and return the result along with + a function to convert timestamps from the new audio to the original audio + """ + + segments = get_vad_segments( + audio, + output_sample=True, + min_speech_duration=min_speech_duration, + min_silence_duration=min_silence_duration, + ) + + segments = [(seg["start"], seg["end"]) for seg in segments] + if len(segments) == 0: + segments = [(0, audio.shape[-1])] + + audio_speech = torch.cat([audio[..., s:e] for s,e in segments], dim=-1) + + if plot: + import matplotlib.pyplot as plt + plt.figure() + plt.plot(audio) + for s,e in segments: + plt.axvspan(s, e, color='red', alpha=0.1) + if isinstance(plot, str): + plt.savefig(f"{plot}.VAD.jpg", bbox_inches='tight', pad_inches=0) + else: + plt.show() + + if not use_sample: + segments = [(float(s)/SAMPLE_RATE, float(e)/SAMPLE_RATE) for s,e in segments] + + return audio_speech, lambda t, t2 = None: do_convert_timestamps(segments, t, t2) + +def do_convert_timestamps(segments, t, t2 = None): + """ + Convert timestamp from audio without non-speech segments to original audio (with non-speech segments) + + parameters: + segments: list of tuple (start, end) corresponding to non-speech segments in original audio + t: timestamp to convert + t2: second timestamp to convert (optional), when the two timestamps should be in the same segment + """ + assert len(segments) + ioffset = 0 # Input offset + ooffset = 0 # Output offset + ipreviousend = 0 + result = [] + for istart, iend in segments: + ostart = ooffset + oend = ostart + (iend - istart) + ooffset = oend + ioffset += istart - ipreviousend + ipreviousend = iend + t_in = t <= oend + t2_in = t_in if t2 is None else t2 <= oend + if t_in or t2_in: + result.append([ + max(istart, min(iend, ioffset + t)), + max(istart, min(iend, ioffset + t2)) if t2 is not None else None + ]) + if t_in and t2_in: + break + if not len(result): + result.append( + [ioffset + t, ioffset + t2 if t2 is not None else None] + ) + + if len(result) > 1: + # Minimize difference between durations + result = sorted(result, key=lambda x: abs(abs(t2-t) - abs(x[1]-x[0]))) + result = result[0] + if t2 is None: + result = round(result[0], 2) + else: + result = [round(x, 2) for x in result] + return result + +def remove_last_null_duration_words(transcription, words, recompute_text=False): + """ + Remove words with null duration happening at the end of a chunk (probable Whisper hallucinations) + """ + # First group segments by audio chunk + segments_groups = {} + seek = None + current_chunk = -1 + for i, segment in enumerate(transcription["segments"]): + if segment["seek"] != seek: + current_chunk += 1 + seek = segment["seek"] + segments_groups[i] = current_chunk + + # Remove words with null duration happening at the end of a chunk + current_chunk = -1 + is_last_empty = False + to_remove = [] + for i, word in enumerate(words[::-1]): # Reverse order + i = len(words) - i - 1 + empty = (word["start"] == word["end"]) + idx_segment = word["idx_segment"] + group = segments_groups[idx_segment] + if current_chunk != group: + is_last_empty = empty + current_chunk = group + elif not empty: + is_last_empty = False + if is_last_empty: + # Remove word + to_remove.append(i) + # Shorten text of segment + full_word = "".join(word["tokens"]) + logger.debug(f"Removing word {i+1}/{len(words)} \"{full_word}\" with empty duration at the end of segment {idx_segment+1}/{len(transcription['segments'])}") + segment = transcription["segments"][idx_segment] + text = segment["text"] + if not text.endswith(full_word): # see issue #62 + if text.endswith(full_word[:-1]): + full_word = full_word[:-1] + elif text[:-1].endswith(full_word): + text = text[:-1] + else: + raise RuntimeError(f"\"{text}\" not ending with \"{full_word}\"") + text = text[:-len(full_word)] + if i > 0 and words[i-1]["idx_segment"] == idx_segment: + segment["text"] = text + else: + logger.debug(f"Removing empty segment {idx_segment}") + # Remove segment with no more words + transcription["segments"].pop(idx_segment) + for j in range(i+1, len(words)): + words[j]["idx_segment"] -= 1 + recompute_text = True + + for i in to_remove: + words.pop(i) # Warning: inplace modification + + if recompute_text: + transcription["text"] = "".join([s["text"] for s in transcription["segments"]]) + + return transcription, words + + +def ensure_increasing_positions(segments, min_duration=0): + """ + Ensure that "start" and "end" come in increasing order + """ + has_modified_backward = False + previous_end = 0 + for i, seg in enumerate(segments): + if seg["start"] < previous_end: + assert i > 0 + new_start = round_timestamp((previous_end + seg["start"]) / 2) + if new_start < segments[i-1]["start"] + min_duration: + new_start = previous_end + else: + segments[i-1]["end"] = new_start + has_modified_backward = True + seg["start"] = new_start + if seg["end"] <= seg["start"] + min_duration: + seg["end"] = seg["start"] + min_duration + previous_end = seg["end"] + if has_modified_backward: + return ensure_increasing_positions(segments, min_duration) + + previous_end = 0 + for seg in segments: + seg["start"] = round_timestamp(seg["start"]) + seg["end"] = round_timestamp(seg["end"]) + assert seg["start"] >= previous_end, f"Got segment {seg} coming before the previous finishes ({previous_end} > {seg['start']})" + assert seg["end"] >= seg["start"], f"Got segment {seg} with end < start" + previous_end = seg["end"] + + return segments + +## Some utilities for writing transcripts to files + +def flatten(list_of_lists, key = None): + for sublist in list_of_lists: + for item in sublist.get(key, []) if key else sublist: + yield item + +def remove_keys(list_of_dicts, key): + for d in list_of_dicts: + yield {k: d[k] for k in d.keys() - {key}} + + +def write_csv(transcript, file, sep = ",", text_first=True, format_timestamps=None, header=False): + writer = csv.writer(file, delimiter=sep) + if format_timestamps is None: format_timestamps = lambda x: x + if header is True: + header = ["text", "start", "end"] if text_first else ["start", "end", "text"] + if header: + writer.writerow(header) + if text_first: + writer.writerows( + [[segment["text"].strip(), format_timestamps(segment["start"]), format_timestamps(segment["end"])] for segment in transcript] + ) + else: + writer.writerows( + [[format_timestamps(segment["start"]), format_timestamps(segment["end"]), segment["text"].strip()] for segment in transcript] + ) + +# https://stackoverflow.com/questions/66588715/runtimeerror-cudnn-error-cudnn-status-not-initialized-using-pytorch +# CUDA initialization may fail on old GPU card +def force_cudnn_initialization(device=None, s=32): + if device is None: + device = torch.device('cuda') + torch.nn.functional.conv2d(torch.zeros(s, s, s, s, device=device), torch.zeros(s, s, s, s, device=device)) + +# base85-encoded (n_layers, n_heads) boolean arrays indicating the cross-attention heads that are +# highly correlated to the word-level timing, i.e. the alignment between audio and text tokens. +_ALIGNMENT_HEADS = { + "tiny.en": b"ABzY8J1N>@0{>%R00Bk>$p{7v037`oCl~+#00", + "tiny": b"ABzY8bu8Lr0{>%RKn9Fp%m@SkK7Kt=7ytkO", + "base.en": b"ABzY8;40c<0{>%RzzG;p*o+Vo09|#PsxSZm00", + "base": b"ABzY8KQ!870{>%RzyTQH3`Q^yNP!>##QT-?_)10{>%RpeA61k&I|OI3I$65C{;;pbCHh0B{qLQ;+}v00", + "small": b"ABzY8DmU6=0{>%Rpa?J`kvJ6qF(V^F86#Xh7JUGMK}P%R7%R7}kK1fFL7w6%<-Pf*t^=N)Qr&0RR9", + "large-v1": b"ABzY8r9j$a0{>%R7#4sLmoOs{s)o3~84-RPdcFk!JR%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj', + # "large": b'ABzY8zd+h!0{>%R7=D0pU<_bnWW*tkYAhobTNnu$jnkEkXqp)j;w1Tzk)UH3X%SZd&fFZ2fC2yj', +} + +_PARAMETERS_TO_MODEL_NAME = { + 37184256 : "tiny.en", + 37184640 : "tiny", + 71825408 : "base.en", + 71825920 : "base", + 240582144 : "small.en", + 240582912 : "small", + 762320896 : "medium.en", + 762321920 : "medium", + 1541384960 : "large", +} + +def get_alignment_heads(model): + if hasattr(model, "alignment_heads"): # Since version 20230306 + return model.alignment_heads + model_name = _PARAMETERS_TO_MODEL_NAME[_get_number_of_parameters(model)] + if model_name == "large": + if next(model.parameters())[0,0,0] > 0: + model_name = "large-v1" + else: + model_name = "large-v2" + num_layers = model.dims.n_text_layer + num_heads = model.dims.n_text_head + return _get_alignment_heads(model_name, num_layers, num_heads) + +def _get_alignment_heads(model_name, num_layers, num_heads): + dump = _ALIGNMENT_HEADS[model_name] + array = np.frombuffer(gzip.decompress(base64.b85decode(dump)), dtype=bool).copy() + mask = torch.from_numpy(array).reshape(num_layers, num_heads) + alignment_heads = mask.to_sparse() + return alignment_heads + +def _get_number_of_parameters(model): + return sum(p.numel() for p in model.parameters()) + +from typing import Optional, Union +def load_model( + name: str, + device: Optional[Union[str, torch.device]] = None, + download_root: str = None, + in_memory: bool = False, +): + extension = os.path.splitext(name)[-1] if os.path.isfile(name) else None + + if name in whisper.available_models() or extension == ".pt": + return whisper.load_model(name, device=device, download_root=download_root, in_memory=in_memory) + + # Otherwise, assume transformers + if extension in [".ckpt", ".bin"]: + model_path = name + else: + # Search for the cached file (download if necessary) + try: + import transformers + except ImportError: + raise ImportError(f"If you are trying to download a HuggingFace model with {name}, please install first the transformers library") + from transformers.utils import cached_file + + try: + model_path = cached_file(name, "pytorch_model.bin", cache_dir=download_root, use_auth_token=None, revision=None) + except Exception as e: + try: + if isinstance(e, OSError): + model_path = cached_file(name, "whisper.ckpt", cache_dir=download_root, use_auth_token=None, revision=None) + else: + raise e + except: + raise RuntimeError(f"Original error: {e}\nCould not find model {name} from HuggingFace nor local folders.") + # Load HF Model + hf_state_dict = torch.load(model_path, map_location="cpu") + # Rename layers + for key in list(hf_state_dict.keys())[:]: + new_key = hf_to_whisper_states(key) + hf_state_dict[new_key] = hf_state_dict.pop(key) + + # Remove useless key (Speechbrain + if "_mel_filters" in hf_state_dict: + hf_state_dict.pop("_mel_filters") + + # Init Whisper Model and replace model weights + dims = whisper.model.ModelDimensions(**states_to_dim(hf_state_dict)) + whisper_model = whisper.model.Whisper(dims) + whisper_model.load_state_dict(hf_state_dict) + del hf_state_dict + if hasattr(whisper_model, "alignment_heads"): + del whisper_model.alignment_heads # Will be recomputed later + whisper_model = whisper_model.to(device) + return whisper_model + +# Credit: https://github.com/openai/whisper/discussions/830 +def hf_to_whisper_states(text): + text = re.sub('.layers.', '.blocks.', text) + text = re.sub('.self_attn.', '.attn.', text) + text = re.sub('.q_proj.', '.query.', text) + text = re.sub('.k_proj.', '.key.', text) + text = re.sub('.v_proj.', '.value.', text) + text = re.sub('.out_proj.', '.out.', text) + text = re.sub('.fc1.', '.mlp.0.', text) + text = re.sub('.fc2.', '.mlp.2.', text) + text = re.sub('.fc3.', '.mlp.3.', text) + text = re.sub('.fc3.', '.mlp.3.', text) + text = re.sub('.encoder_attn.', '.cross_attn.', text) + text = re.sub('.cross_attn.ln.', '.cross_attn_ln.', text) + text = re.sub('.embed_positions.weight', '.positional_embedding', text) + text = re.sub('.embed_tokens.', '.token_embedding.', text) + text = re.sub('model.', '', text) + text = re.sub('attn.layer_norm.', 'attn_ln.', text) + text = re.sub('.final_layer_norm.', '.mlp_ln.', text) + text = re.sub('encoder.layer_norm.', 'encoder.ln_post.', text) + text = re.sub('decoder.layer_norm.', 'decoder.ln.', text) + return text + +def states_to_dim(state_dict): + n_audio_state = len(state_dict['encoder.ln_post.bias']) + n_text_state = len(state_dict["decoder.ln.bias"]) + return { + "n_mels": state_dict["encoder.conv1.weight"].shape[1], # 80 + "n_vocab": state_dict["decoder.token_embedding.weight"].shape[0], # 51864 / 51865 + "n_audio_ctx": state_dict["encoder.positional_embedding"].shape[0], # 1500 + "n_audio_state": n_audio_state, # 384 / 512 / 768 / 1024 / 1280 + "n_audio_head": n_audio_state // 64, # 6 / 8 / 12 / 16 / 20 + "n_audio_layer": len(set([".".join(k.split(".")[:3]) for k in state_dict.keys() if "encoder.blocks." in k])), # 4 / 6 / 12 / 24 / 32 + "n_text_ctx": state_dict["decoder.positional_embedding"].shape[0], # 448 + "n_text_state": n_text_state, # 384 / 512 / 768 / 1024 / 1280 + "n_text_head": n_text_state // 64, # 6 / 8 / 12 / 16 / 20 + "n_text_layer": len(set([".".join(k.split(".")[:3]) for k in state_dict.keys() if "decoder.blocks." in k])), # 4 / 6 / 12 / 24 / 32 + } + +def cli(): + + import os + import sys + import argparse + import json + + from whisper.utils import str2bool, optional_float, optional_int + + try: + # Old whisper version # Before https://github.com/openai/whisper/commit/da600abd2b296a5450770b872c3765d0a5a5c769 + from whisper.utils import write_txt, write_srt, write_vtt + write_tsv = lambda transcript, file: write_csv(transcript, file, sep="\t", header=True, text_first=False, format_timestamps=lambda x: round(1000 * x)) + + except ImportError: + # New whisper version + from whisper.utils import get_writer + + def do_write(transcript, file, output_format): + writer = get_writer(output_format, os.path.curdir) + try: + return writer.write_result({"segments": transcript}, file) + except TypeError: + # Version > 20230314 + return writer.write_result({"segments": list(transcript)}, file, { + "highlight_words": False, + "max_line_width": None, + "max_line_count": None, + }) + def get_do_write(output_format): + return lambda transcript, file: do_write(transcript, file, output_format) + + write_txt = get_do_write("txt") + write_srt = get_do_write("srt") + write_vtt = get_do_write("vtt") + write_tsv = get_do_write("tsv") + + parser = argparse.ArgumentParser( + description='Transcribe a single audio with whisper and compute word timestamps', + formatter_class=argparse.ArgumentDefaultsHelpFormatter + ) + parser.add_argument('-v', '--version', help="show version and exit", action='version', version=f'{__version__}') + parser.add_argument('--versions', help="show versions (of whisper-timestamped and whisper) and exit", action='version', + version=f'{__version__} -- Whisper {whisper.__version__} in {os.path.realpath(os.path.dirname(whisper.__file__))}') + + parser.add_argument('audio', help="audio file(s) to transcribe", nargs='+') + parser.add_argument('--model', help=f"name of the Whisper model to use. Examples: {', '.join(whisper.available_models())}", default="small") + parser.add_argument("--model_dir", default=None, help="the path to save model files; uses ~/.cache/whisper by default", type=str) + parser.add_argument("--device", default="cuda:0" if torch.cuda.is_available() else "cpu", help="device to use for PyTorch inference") + parser.add_argument("--output_dir", "-o", default=None, help="directory to save the outputs", type=str) + valid_formats = ["txt", "vtt", "srt", "tsv", "csv", "json"] + def str2output_formats(string): + if string == "all": + return valid_formats + formats = string.split(",") + for format in formats: + if format not in valid_formats: + raise ValueError(f"Expected one of {valid_formats}, got {format}") + return formats + parser.add_argument("--output_format", "-f", default="all", help=f"Format(s) of the output file(s). Possible formats are: {', '.join(valid_formats)}. Several formats can be specified by using commas (ex: \"json,vtt,srt\"). By default (\"all\"), all available formats will be produced", type=str2output_formats) + + parser.add_argument("--task", default="transcribe", help="whether to perform X->X speech recognition ('transcribe') or X->English translation ('translate')", choices=["transcribe", "translate"], type=str) + parser.add_argument('--language', help=f"language spoken in the audio, specify None to perform language detection.", choices=sorted(whisper.tokenizer.LANGUAGES.keys()) + sorted([k.title() for k in whisper.tokenizer.TO_LANGUAGE_CODE.keys()]), default=None) + # f"{', '.join(sorted(k+'('+v+')' for k,v in whisper.tokenizer.LANGUAGES.items()))} + + parser.add_argument('--vad', default=False, help="whether to run Voice Activity Detection (VAD) to remove non-speech segment before applying Whisper model (removes hallucinations)", type=str2bool) + parser.add_argument('--detect_disfluencies', default=False, help="whether to try to detect disfluencies, marking them as special words [*]", type=str2bool) + parser.add_argument('--recompute_all_timestamps', default=not TRUST_WHISPER_TIMESTAMP_BY_DEFAULT, help="Do not rely at all on Whisper timestamps (Experimental option: did not bring any improvement, but could be useful in cases where Whipser segment timestamp are wrong by more than 0.5 seconds)", type=str2bool) + parser.add_argument("--punctuations_with_words", default=True, help="whether to include punctuations in the words", type=str2bool) + + parser.add_argument("--temperature", default=0.0, help="temperature to use for sampling", type=float) + parser.add_argument("--best_of", type=optional_int, default=None if USE_EFFICIENT_BY_DEFAULT else 5, help="number of candidates when sampling with non-zero temperature") + parser.add_argument("--beam_size", type=optional_int, default=None if USE_EFFICIENT_BY_DEFAULT else 5, help="number of beams in beam search, only applicable when temperature is zero") + parser.add_argument("--patience", type=float, default=None, help="optional patience value to use in beam decoding, as in https://arxiv.org/abs/2204.05424, the default (1.0) is equivalent to conventional beam search") + parser.add_argument("--length_penalty", type=float, default=None, help="optional token length penalty coefficient (alpha) as in https://arxiv.org/abs/1609.08144, uses simple length normalization by default") + + parser.add_argument("--suppress_tokens", default="-1", help="comma-separated list of token ids to suppress during sampling; '-1' will suppress most special characters except common punctuations", type=str) + parser.add_argument("--initial_prompt", default=None, help="optional text to provide as a prompt for the first window.", type=str) + parser.add_argument("--condition_on_previous_text", default=True, help="if True, provide the previous output of the model as a prompt for the next window; disabling may make the text inconsistent across windows, but the model becomes less prone to getting stuck in a failure loop", type=str2bool) + parser.add_argument("--fp16", default=None, help="whether to perform inference in fp16; Automatic by default (True if GPU available, False otherwise)", type=str2bool) + + parser.add_argument("--temperature_increment_on_fallback", default=0.0 if USE_EFFICIENT_BY_DEFAULT else 0.2, help="temperature to increase when falling back when the decoding fails to meet either of the thresholds below", type=optional_float) + parser.add_argument("--compression_ratio_threshold", default=2.4, help="if the gzip compression ratio is higher than this value, treat the decoding as failed", type=optional_float) + parser.add_argument("--logprob_threshold", default=-1.0, help="if the average log probability is lower than this value, treat the decoding as failed", type=optional_float) + parser.add_argument("--no_speech_threshold", default=0.6, help="if the probability of the <|nospeech|> token is higher than this value AND the decoding has failed due to `logprob_threshold`, consider the segment as silence", type=optional_float) + parser.add_argument("--threads", default=0, help="number of threads used by torch for CPU inference; supercedes MKL_NUM_THREADS/OMP_NUM_THREADS", type=optional_int) + + parser.add_argument("--compute_confidence", default=True, help="whether to compute confidence scores for words", type=str2bool) + parser.add_argument("--verbose", type=str2bool, default=False, help="whether to print out the progress and debug messages of Whisper") + parser.add_argument('--plot', help="plot word alignments (save the figures if an --output_dir is specified, otherwhise just show figures that have to be closed to continue)", default=False, action="store_true") + parser.add_argument('--debug', help="print some debug information about word alignement", default=False, action="store_true") + + class ActionSetAccurate(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + assert nargs is None + super().__init__(option_strings, dest, nargs=0, **kwargs) + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, "best_of", 5) + setattr(namespace, "beam_size", 5) + setattr(namespace, "temperature_increment_on_fallback", 0.2) + parser.add_argument('--accurate', help="Shortcut to use the same default option as in Whisper (best_of=5, beam_search=5, temperature_increment_on_fallback=0.2)", action=ActionSetAccurate) + + class ActionSetEfficient(argparse.Action): + def __init__(self, option_strings, dest, nargs=None, **kwargs): + assert nargs is None + super().__init__(option_strings, dest, nargs=0, **kwargs) + def __call__(self, parser, namespace, values, option_string=None): + setattr(namespace, "best_of", None) + setattr(namespace, "beam_size", None) + setattr(namespace, "temperature_increment_on_fallback", None) + parser.add_argument('--efficient', help="Shortcut to disable beam size and options that requires to sample several times, for an efficient decoding", action=ActionSetEfficient) + + parser.add_argument('--naive', help="use naive approach, doing inference twice (once to get the transcription, once to get word timestamps and confidence scores).", default=False, action="store_true") + + args = parser.parse_args().__dict__ + args.pop("accurate") + args.pop("efficient") + + temperature = args.pop("temperature") + temperature_increment_on_fallback = args.pop("temperature_increment_on_fallback") + if temperature_increment_on_fallback: + temperature = tuple(np.arange(temperature, 1.0 + 1e-6, temperature_increment_on_fallback)) + else: + temperature = [temperature] + + threads = args.pop("threads") + if threads: + torch.set_num_threads(threads) + + audio_files = args.pop("audio") + + model = args.pop("model") + device = args.pop("device") + model_dir = args.pop("model_dir") + + if device.lower().startswith("cuda"): + force_cudnn_initialization(device) + + output_format = args.pop("output_format") + + model = load_model(model, device=device, download_root=model_dir) + + plot_word_alignment = args.pop("plot") + + debug = args.pop("debug") + logging.basicConfig() + if debug: + logger.setLevel(logging.DEBUG) + # This supposes to plug a logger with name "WHISPER" into Whisper source code (no harm if it's not set) + logging.getLogger("WHISPER").setLevel(logging.DEBUG) + + output_dir = args.pop("output_dir") + if output_dir and not os.path.isdir(output_dir): + os.makedirs(output_dir) + + args["naive_approach"] = args.pop("naive") + args["remove_punctuation_from_words"] = not args.pop("punctuations_with_words") + args["compute_word_confidence"] = args.pop("compute_confidence") + args["trust_whisper_timestamps"] = not args.pop("recompute_all_timestamps") + + # Quick early check + for audio_path in audio_files: + assert os.path.isfile(audio_path), f"File {audio_path} does not exist" + + for audio_path in audio_files: + + outname = os.path.join(output_dir, os.path.basename(audio_path)) if output_dir else None + + result = transcribe_timestamped( + model, audio_path, + temperature=temperature, + plot_word_alignment=outname if (outname and plot_word_alignment) else plot_word_alignment, + **args + ) + + if output_dir: + + if "json" in output_format: + # save JSON + with open(outname + ".words.json", "w", encoding="utf-8") as js: + json.dump(result, js, indent=2, ensure_ascii=False) + + # save TXT + if "txt" in output_format: + with open(outname + ".txt", "w", encoding="utf-8") as txt: + write_txt(result["segments"], file=txt) + + # save VTT + if "vtt" in output_format: + with open(outname + ".vtt", "w", encoding="utf-8") as vtt: + write_vtt(remove_keys(result["segments"], "words"), file=vtt) + with open(outname + ".words.vtt", "w", encoding="utf-8") as vtt: + write_vtt(flatten(result["segments"], "words"), file=vtt) + + # save SRT + if "srt" in output_format: + with open(outname + ".srt", "w", encoding="utf-8") as srt: + write_srt(remove_keys(result["segments"], "words"), file=srt) + with open(outname + ".words.srt", "w", encoding="utf-8") as srt: + write_srt(flatten(result["segments"], "words"), file=srt) + + # save CSV + if "csv" in output_format: + with open(outname + ".csv", "w", encoding="utf-8") as csv: + write_csv(result["segments"], file=csv) + with open(outname + ".words.csv", "w", encoding="utf-8") as csv: + write_csv(flatten(result["segments"], "words"), file=csv) + + # save TSV + if "tsv" in output_format: + with open(outname + ".tsv", "w", encoding="utf-8") as csv: + write_tsv(result["segments"], file=csv) + with open(outname + ".words.tsv", "w", encoding="utf-8") as csv: + write_tsv(flatten(result["segments"], "words"), file=csv) + + elif not args["verbose"]: + + json.dump(filtered_keys(result), sys.stdout, indent=2, ensure_ascii=False) + + +def filtered_keys(result, keys = [ + "text", + "segments", "words", + "language", + "start", + "end", + "confidence" +]): + if isinstance(result, dict): + return {k: filtered_keys(v, keys) for k, v in result.items() if k in keys} + if isinstance(result, list): + return [filtered_keys(v, keys) for v in result] + if isinstance(result, float): + return round(result, 2) + return result + + +if __name__ == "__main__": + cli() \ No newline at end of file