Update README.md
Browse files
README.md
CHANGED
@@ -113,6 +113,55 @@ asr_model.transcribe_file(
|
|
113 |
)
|
114 |
```
|
115 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
### Inference on GPU
|
117 |
To perform inference on the GPU, add `run_opts={"device":"cuda"}` when calling the `from_hparams` method.
|
118 |
|
|
|
113 |
)
|
114 |
```
|
115 |
|
116 |
+
<details>
|
117 |
+
<summary>Commandline tool to transcribe a file or a live stream</summary>
|
118 |
+
|
119 |
+
**Decoding from a live stream using ffmpeg (BBC Radio 4):** `python3 asr.py http://as-hls-ww-live.akamaized.net/pool_904/live/ww/bbc_radio_fourfm/bbc_radio_fourfm.isml/bbc_radio_fourfm-audio%3d96000.norewind.m3u8 --model-source=sdelangen/speechbrain-asr-conformer-test --device=cpu -v`
|
120 |
+
|
121 |
+
**Decoding from a file:** `python3 asr.py some-english-speech.wav --model-source=sdelangen/speechbrain-asr-conformer-test --device=cpu -v`
|
122 |
+
|
123 |
+
```python
|
124 |
+
from argparse import ArgumentParser
|
125 |
+
import logging
|
126 |
+
|
127 |
+
parser = ArgumentParser()
|
128 |
+
parser.add_argument("audio_path")
|
129 |
+
parser.add_argument("--model-source", required=True)
|
130 |
+
parser.add_argument("--device", default="cpu")
|
131 |
+
parser.add_argument("--ip", default="127.0.0.1")
|
132 |
+
parser.add_argument("--port", default=9431)
|
133 |
+
parser.add_argument("--chunk-size", default=24, type=int)
|
134 |
+
parser.add_argument("--left-context-chunks", default=4, type=int)
|
135 |
+
parser.add_argument("--num-threads", default=None, type=int)
|
136 |
+
parser.add_argument("--verbose", "-v", default=False, action="store_true")
|
137 |
+
args = parser.parse_args()
|
138 |
+
|
139 |
+
if args.verbose:
|
140 |
+
logging.getLogger().setLevel(logging.INFO)
|
141 |
+
|
142 |
+
logging.info("Loading libraries")
|
143 |
+
|
144 |
+
from speechbrain.inference.ASR import StreamingASR
|
145 |
+
from speechbrain.utils.dynamic_chunk_training import DynChunkTrainConfig
|
146 |
+
import torch
|
147 |
+
|
148 |
+
device = args.device
|
149 |
+
|
150 |
+
if args.num_threads is not None:
|
151 |
+
torch.set_num_threads(args.num_threads)
|
152 |
+
|
153 |
+
logging.info(f"Loading model from \"{args.model_source}\" onto device {device}")
|
154 |
+
|
155 |
+
asr = StreamingASR.from_hparams(args.model_source, run_opts={"device": device})
|
156 |
+
config = DynChunkTrainConfig(args.chunk_size, args.left_context_chunks)
|
157 |
+
|
158 |
+
logging.info(f"Starting stream from URI \"{args.audio_path}\"")
|
159 |
+
|
160 |
+
for text_chunk in asr.transcribe_file_streaming(args.audio_path, config):
|
161 |
+
print(text_chunk, flush=True, end="")
|
162 |
+
```
|
163 |
+
</details>
|
164 |
+
|
165 |
### Inference on GPU
|
166 |
To perform inference on the GPU, add `run_opts={"device":"cuda"}` when calling the `from_hparams` method.
|
167 |
|