ARCH / index.html
morenolq's picture
Update index.html
bc614c2
raw
history blame
12 kB
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width" />
<title>ARCH: Audio Representation benCHmark</title>
<link href='http://fonts.googleapis.com/css?family=Roboto' rel='stylesheet' type='text/css'>
<link rel="stylesheet" href="style.css" />
<style type="text/css">
.center_img {
display: block;
margin-left: auto;
margin-right: auto;
}
.center_table {
margin-left: auto;
margin-right: auto;
}
.mono_text {
font-family:'Lucida Console', monospace;
}
.width500 {
width: 500px;
}
.tg {border-collapse:collapse;border-spacing:0;}
.tg td{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
overflow:hidden;padding:10px 5px;word-break:normal;}
.tg th{border-color:black;border-style:solid;border-width:1px;font-family:Arial, sans-serif;font-size:14px;
font-weight:normal;overflow:hidden;padding:10px 5px;word-break:normal;}
.tg .tg-c3ow{border-color:inherit;text-align:center;vertical-align:top}
.tg .tg-7btt{border-color:inherit;font-weight:bold;text-align:center;vertical-align:top}
.tg .tg-f0bj{background-color:#DAE8FC;border-color:inherit;font-weight:bold;text-align:center;vertical-align:top}
</style>
</head>
<body>
<img src="arch_logo.png" class="center_img width500">
<br><br>
<table class="tg center_table" id="arch_res">
<thead>
<tr>
<th class="tg-c3ow" rowspan="2">Model</th>
<th class="tg-c3ow" rowspan="2">Size</th>
<th class="tg-c3ow" colspan="4">Sound</th>
<th class="tg-c3ow" colspan="4">Music</th>
<th class="tg-c3ow" colspan="4">Speech</th>
</tr>
<tr>
<th class="tg-c3ow">ESC-50</th>
<th class="tg-c3ow">US8K</th>
<th class="tg-c3ow">FSD50K</th>
<th class="tg-c3ow">VIVAE</th>
<th class="tg-c3ow">FMA</th>
<th class="tg-c3ow">MTT</th>
<th class="tg-c3ow">IRMAS</th>
<th class="tg-c3ow">MS-DB</th>
<th class="tg-c3ow">RAVDESS</th>
<th class="tg-c3ow">A-MNIST</th>
<th class="tg-c3ow">SLURP</th>
<th class="tg-c3ow">EMOVO</th>
</tr>
</thead>
<tbody>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-base">facebook/wav2vec2-base</a>
</td>
<td class="tg-c3ow">S</td>
<td class="tg-c3ow">45.73</td>
<td class="tg-c3ow">55.48</td>
<td class="tg-c3ow">19.39</td>
<td class="tg-c3ow">31.47</td>
<td class="tg-c3ow">50.54</td>
<td class="tg-c3ow">37.56</td>
<td class="tg-c3ow">35.14</td>
<td class="tg-c3ow">66.06</td>
<td class="tg-c3ow">55.32</td>
<td class="tg-c3ow">86.38</td>
<td class="tg-c3ow">14.37</td>
<td class="tg-c3ow">31.80</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/microsoft/wavlm-base">microsoft/wavlm-base</a>
</td>
<td class="tg-c3ow">S</td>
<td class="tg-c3ow">49.88</td>
<td class="tg-c3ow">61.84</td>
<td class="tg-c3ow">17.63</td>
<td class="tg-c3ow">36.31</td>
<td class="tg-c3ow">48.71</td>
<td class="tg-c3ow">34.93</td>
<td class="tg-c3ow">32.62</td>
<td class="tg-c3ow">54.18</td>
<td class="tg-7btt">67.94</td>
<td class="tg-c3ow">99.50</td>
<td class="tg-c3ow">30.98</td>
<td class="tg-7btt">43.08</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/microsoft/wavlm-base-plus">microsoft/wavlm-base-plus</a>
</td>
<td class="tg-c3ow">S</td>
<td class="tg-c3ow">58.73</td>
<td class="tg-c3ow">64.07</td>
<td class="tg-c3ow">21.57</td>
<td class="tg-c3ow">36.17</td>
<td class="tg-c3ow">56.17</td>
<td class="tg-c3ow">38.24</td>
<td class="tg-c3ow">35.76</td>
<td class="tg-c3ow">57.51</td>
<td class="tg-c3ow">52.20</td>
<td class="tg-7btt">99.63</td>
<td class="tg-c3ow">28.06</td>
<td class="tg-c3ow">36.73</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/hubert-base-ls960">facebook/hubert-base-ls960</a>
</td>
<td class="tg-c3ow">S</td>
<td class="tg-7btt">58.90</td>
<td class="tg-7btt">67.28</td>
<td class="tg-7btt">24.53</td>
<td class="tg-7btt">40.48</td>
<td class="tg-c3ow">54.63</td>
<td class="tg-7btt">38.78</td>
<td class="tg-7btt">36.65</td>
<td class="tg-c3ow">58.46</td>
<td class="tg-c3ow">65.28</td>
<td class="tg-c3ow">99.58</td>
<td class="tg-c3ow">33.75</td>
<td class="tg-c3ow">40.48</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/data2vec-audio-base">facebook/data2vec-audio-base</a>
</td>
<td class="tg-c3ow">S</td>
<td class="tg-c3ow">23.63</td>
<td class="tg-c3ow">45.63</td>
<td class="tg-c3ow">10.06</td>
<td class="tg-c3ow">30.19</td>
<td class="tg-c3ow">40.58</td>
<td class="tg-c3ow">27.60</td>
<td class="tg-c3ow">25.87</td>
<td class="tg-c3ow">50.74</td>
<td class="tg-c3ow">48.03</td>
<td class="tg-c3ow">99.06</td>
<td class="tg-7btt">43.57</td>
<td class="tg-c3ow">27.27</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/COMING_SOON">ALM/wav2vec2-base-audioset</a>
</td>
<td class="tg-c3ow">S</td>
<td class="tg-c3ow">49.48</td>
<td class="tg-c3ow">62.34</td>
<td class="tg-c3ow">21.44</td>
<td class="tg-c3ow">34.90</td>
<td class="tg-7btt">59.25</td>
<td class="tg-c3ow">36.13</td>
<td class="tg-c3ow">34.07</td>
<td class="tg-7btt">68.74</td>
<td class="tg-c3ow">51.50</td>
<td class="tg-c3ow">75.13</td>
<td class="tg-c3ow">11.01</td>
<td class="tg-c3ow">31.01</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-large-robust">facebook/wav2vec2-large-robust</a>
</td>
<td class="tg-c3ow">M</td>
<td class="tg-c3ow">13.13</td>
<td class="tg-c3ow">42.70</td>
<td class="tg-c3ow">5.80</td>
<td class="tg-c3ow">22.01</td>
<td class="tg-c3ow">41.71</td>
<td class="tg-c3ow">20.95</td>
<td class="tg-c3ow">19.91</td>
<td class="tg-c3ow">50.23</td>
<td class="tg-c3ow">11.57</td>
<td class="tg-c3ow">45.74</td>
<td class="tg-c3ow">7.33</td>
<td class="tg-c3ow">19.27</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-xls-r-300m">facebook/wav2vec2-xls-r-300m</a>
</td>
<td class="tg-c3ow">M</td>
<td class="tg-c3ow">51.28</td>
<td class="tg-c3ow">69.96</td>
<td class="tg-c3ow">23.71</td>
<td class="tg-c3ow">36.28</td>
<td class="tg-c3ow">56.96</td>
<td class="tg-c3ow">38.28</td>
<td class="tg-c3ow">38.42</td>
<td class="tg-c3ow">66.71</td>
<td class="tg-c3ow">31.48</td>
<td class="tg-c3ow">98.88</td>
<td class="tg-c3ow">12.74</td>
<td class="tg-c3ow">20.35</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/microsoft/wavlm-large">microsoft/wavlm-large</a>
</td>
<td class="tg-c3ow">M</td>
<td class="tg-f0bj">67.20</td>
<td class="tg-7btt">70.92</td>
<td class="tg-f0bj">32.21</td>
<td class="tg-7btt">42.51</td>
<td class="tg-7btt">61.13</td>
<td class="tg-7btt">41.29</td>
<td class="tg-7btt">42.53</td>
<td class="tg-7btt">68.00</td>
<td class="tg-c3ow">71.76</td>
<td class="tg-c3ow">99.75</td>
<td class="tg-c3ow">42.34</td>
<td class="tg-7btt">45.29</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/hubert-large-ll60k">facebook/hubert-large-ll60k</a>
</td>
<td class="tg-c3ow">M</td>
<td class="tg-c3ow">63.98</td>
<td class="tg-c3ow">70.00</td>
<td class="tg-c3ow">29.51</td>
<td class="tg-c3ow">40.95</td>
<td class="tg-c3ow">54.79</td>
<td class="tg-c3ow">38.36</td>
<td class="tg-c3ow">36.81</td>
<td class="tg-c3ow">64.08</td>
<td class="tg-7btt">72.57</td>
<td class="tg-f0bj">99.95</td>
<td class="tg-7btt">45.26</td>
<td class="tg-c3ow">43.76</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/data2vec-audio-large">facebook/data2vec-audio-large</a>
</td>
<td class="tg-c3ow">M</td>
<td class="tg-c3ow">25.35</td>
<td class="tg-c3ow">49.15</td>
<td class="tg-c3ow">10.82</td>
<td class="tg-c3ow">30.57</td>
<td class="tg-c3ow">43.46</td>
<td class="tg-c3ow">28.52</td>
<td class="tg-c3ow">27.08</td>
<td class="tg-c3ow">44.20</td>
<td class="tg-c3ow">45.14</td>
<td class="tg-c3ow">99.15</td>
<td class="tg-c3ow">28.60</td>
<td class="tg-c3ow">23.07</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/wav2vec2-xls-r-1b">facebook/wav2vec2-xls-r-1b</a>
</td>
<td class="tg-c3ow">L</td>
<td class="tg-7btt">66.95</td>
<td class="tg-f0bj">75.90</td>
<td class="tg-7btt">31.61</td>
<td class="tg-c3ow">40.41</td>
<td class="tg-f0bj">62.79</td>
<td class="tg-f0bj">41.99</td>
<td class="tg-f0bj">43.57</td>
<td class="tg-f0bj">69.79</td>
<td class="tg-c3ow">55.44</td>
<td class="tg-c3ow">99.86</td>
<td class="tg-c3ow">25.14</td>
<td class="tg-c3ow">34.58</td>
</tr>
<tr>
<td class="tg-c3ow">
<a class="mono_text" href="https://huggingface.co/facebook/hubert-xlarge-ll60k">facebook/hubert-xlarge-ll60k</a>
</td>
<td class="tg-c3ow">L</td>
<td class="tg-c3ow">63.40</td>
<td class="tg-c3ow">69.66</td>
<td class="tg-c3ow">29.32</td>
<td class="tg-f0bj">42.72</td>
<td class="tg-c3ow">56.25</td>
<td class="tg-c3ow">37.76</td>
<td class="tg-c3ow">37.30</td>
<td class="tg-c3ow">64.71</td>
<td class="tg-f0bj">75.69</td>
<td class="tg-f0bj">99.95</td>
<td class="tg-f0bj">47.81</td>
<td class="tg-f0bj">47.17</td>
</tr>
</tbody>
</table>
</body>
</html>