File size: 725 Bytes
5325fcc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# @package __global__

# Setup for execute only on audiocaps for audio generation
# evaluation with objective metrics
# execute_only=evaluate

dataset:
  max_audio_duration: null
  # ensure the proper values are broadcasted here for evaluate
  evaluate:
    min_audio_duration: 1.  # some metrics requires a minimum audio length
    max_audio_duration: null  # all samples from audiocaps should be ~10s
    num_samples: null
    segment_duration: null
  generate:
    min_audio_duration: 1.
    max_audio_duration: null
    num_samples: 500

evaluate:
  metrics:
    fad: true
    kld: true
    text_consistency: true

metrics:
  kld:
    passt:
      pretrained_length: 10  # similarly to reported results in AudioGen paper