poc_call_transcription / info /configs /offline_diarization_asr.yaml
pustozerov's picture
Temporary remove samples.
02dca0a
raw
history blame
1.57 kB
name: ClusterDiarizer
num_workers: 0
sample_rate: 16000
batch_size: 64
diarizer:
manifest_filepath: ???
out_dir: ???
oracle_vad: false
collar: 0.25
ignore_overlap: true
vad:
model_path: null
external_vad_manifest: null
parameters:
window_length_in_sec: 0.15
shift_length_in_sec: 0.01
smoothing: median
overlap: 0.875
onset: 0.4
offset: 0.7
pad_onset: 0.05
pad_offset: -0.1
min_duration_on: 0.2
min_duration_off: 0.2
filter_speech_first: true
speaker_embeddings:
model_path: ???
parameters:
window_length_in_sec: 1.5
shift_length_in_sec: 0.75
multiscale_weights: null
save_embeddings: false
clustering:
parameters:
oracle_num_speakers: false
max_num_speakers: 20
enhanced_count_thres: 80
max_rp_threshold: 0.25
sparse_search_volume: 30
maj_vote_spk_count: false
asr:
model_path: ???
parameters:
asr_based_vad: false
asr_based_vad_threshold: 0.05
asr_batch_size: null
lenient_overlap_WDER: true
decoder_delay_in_sec: null
word_ts_anchor_offset: null
word_ts_anchor_pos: start
fix_word_ts_with_VAD: false
colored_text: false
print_time: true
break_lines: false
ctc_decoder_parameters:
pretrained_language_model: null
beam_width: 32
alpha: 0.5
beta: 2.5
realigning_lm_parameters:
arpa_language_model: null
min_number_of_words: 3
max_number_of_words: 10
logprob_diff_threshold: 1.2