Update README.md
Browse files
README.md
CHANGED
@@ -77,3 +77,47 @@ The following hyperparameters were used during training:
|
|
77 |
- Pytorch 2.2.2
|
78 |
- Datasets 2.18.0
|
79 |
- Tokenizers 0.15.2
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
- Pytorch 2.2.2
|
78 |
- Datasets 2.18.0
|
79 |
- Tokenizers 0.15.2
|
80 |
+
|
81 |
+
|
82 |
+
### Additional Information
|
83 |
+
## Licensing Information
|
84 |
+
The licensing status of the dataset hinges on the legal status of the UWB-ATCC corpus creators.
|
85 |
+
|
86 |
+
They used Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International (CC BY-NC-SA 4.0) licensing.
|
87 |
+
|
88 |
+
## Citation Information
|
89 |
+
Contributors who prepared, processed, normalized and uploaded the dataset in HuggingFace:
|
90 |
+
|
91 |
+
@article{zuluaga2022how,
|
92 |
+
title={How Does Pre-trained Wav2Vec2. 0 Perform on Domain Shifted ASR? An Extensive Benchmark on Air Traffic Control Communications},
|
93 |
+
author={Zuluaga-Gomez, Juan and Prasad, Amrutha and Nigmatulina, Iuliia and Sarfjoo, Saeed and others},
|
94 |
+
journal={IEEE Spoken Language Technology Workshop (SLT), Doha, Qatar},
|
95 |
+
year={2022}
|
96 |
+
}
|
97 |
+
|
98 |
+
@article{zuluaga2022bertraffic,
|
99 |
+
title={BERTraffic: BERT-based Joint Speaker Role and Speaker Change Detection for Air Traffic Control Communications},
|
100 |
+
author={Zuluaga-Gomez, Juan and Sarfjoo, Seyyed Saeed and Prasad, Amrutha and others},
|
101 |
+
journal={IEEE Spoken Language Technology Workshop (SLT), Doha, Qatar},
|
102 |
+
year={2022}
|
103 |
+
}
|
104 |
+
|
105 |
+
@article{zuluaga2022atco2,
|
106 |
+
title={ATCO2 corpus: A Large-Scale Dataset for Research on Automatic Speech Recognition and Natural Language Understanding of Air Traffic Control Communications},
|
107 |
+
author={Zuluaga-Gomez, Juan and Vesel{\`y}, Karel and Sz{\"o}ke, Igor and Motlicek, Petr and others},
|
108 |
+
journal={arXiv preprint arXiv:2211.04054},
|
109 |
+
year={2022}
|
110 |
+
}
|
111 |
+
|
112 |
+
Authors of the dataset:
|
113 |
+
|
114 |
+
@article{vsmidl2019air,
|
115 |
+
title={Air traffic control communication (ATCC) speech corpora and their use for ASR and TTS development},
|
116 |
+
author={{\v{S}}m{\'\i}dl, Lubo{\v{s}} and {\v{S}}vec, Jan and Tihelka, Daniel and Matou{\v{s}}ek, Jind{\v{r}}ich and Romportl, Jan and Ircing, Pavel},
|
117 |
+
journal={Language Resources and Evaluation},
|
118 |
+
volume={53},
|
119 |
+
number={3},
|
120 |
+
pages={449--464},
|
121 |
+
year={2019},
|
122 |
+
publisher={Springer}
|
123 |
+
}
|