Add "about" page and examples from CO.
Browse files
app.py
CHANGED
@@ -2,6 +2,7 @@ import torch
|
|
2 |
import scipy
|
3 |
import os
|
4 |
import streamlit as st
|
|
|
5 |
from transformers import set_seed, pipeline
|
6 |
from transformers import VitsTokenizer, VitsModel
|
7 |
from datasets import load_dataset, Audio
|
@@ -20,7 +21,7 @@ language_list = ['mos', 'fra', 'eng']
|
|
20 |
|
21 |
|
22 |
st.title("Demo: Automated Tools for Mooré Language")
|
23 |
-
tts, stt, trans, lid = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID"])
|
24 |
|
25 |
########################
|
26 |
with tts:
|
@@ -40,6 +41,7 @@ with tts:
|
|
40 |
synth = synthesize_facebook(tts_text, tts_lang)
|
41 |
st.audio(synth, sample_rate=16_000)
|
42 |
|
|
|
43 |
########################
|
44 |
with stt:
|
45 |
|
@@ -54,6 +56,16 @@ with stt:
|
|
54 |
":violet[The transcription is:]"
|
55 |
':violet[ "' + stt + '"]'
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
########################
|
58 |
with trans:
|
59 |
|
@@ -79,6 +91,24 @@ with trans:
|
|
79 |
translation = translate(trans_text, src_lang, target_lang) #, trans_model)
|
80 |
translation
|
81 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
########################
|
83 |
with lid:
|
84 |
langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader")
|
@@ -90,8 +120,48 @@ with lid:
|
|
90 |
lang = decode_iso(lang)
|
91 |
":violet[The detected language is " + lang + "]"
|
92 |
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
import scipy
|
3 |
import os
|
4 |
import streamlit as st
|
5 |
+
import pandas as pd
|
6 |
from transformers import set_seed, pipeline
|
7 |
from transformers import VitsTokenizer, VitsModel
|
8 |
from datasets import load_dataset, Audio
|
|
|
21 |
|
22 |
|
23 |
st.title("Demo: Automated Tools for Mooré Language")
|
24 |
+
tts, stt, trans, lid, about = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID", "**About**"])
|
25 |
|
26 |
########################
|
27 |
with tts:
|
|
|
41 |
synth = synthesize_facebook(tts_text, tts_lang)
|
42 |
st.audio(synth, sample_rate=16_000)
|
43 |
|
44 |
+
|
45 |
########################
|
46 |
with stt:
|
47 |
|
|
|
56 |
":violet[The transcription is:]"
|
57 |
':violet[ "' + stt + '"]'
|
58 |
|
59 |
+
st.subheader("Examples")
|
60 |
+
"Using the supplied clips, here are the transcriptions:"
|
61 |
+
df = pd.read_csv("data/speech_to_text.csv")
|
62 |
+
df.columns = ['Clip ID', 'Spoken in Moore', 'Spoken in French', 'Transcription in Moore', 'Transcription in French']
|
63 |
+
|
64 |
+
df.set_index('Clip ID', inplace=True)
|
65 |
+
st.table(df[['Spoken in Moore', 'Transcription in Moore']])
|
66 |
+
|
67 |
+
st.table(df[['Spoken in French', 'Transcription in French']])
|
68 |
+
|
69 |
########################
|
70 |
with trans:
|
71 |
|
|
|
91 |
translation = translate(trans_text, src_lang, target_lang) #, trans_model)
|
92 |
translation
|
93 |
|
94 |
+
|
95 |
+
|
96 |
+
st.subheader("Examples")
|
97 |
+
"Using the supplied clips, here are the translations:"
|
98 |
+
df = pd.read_csv("data/translated_eng.csv",
|
99 |
+
usecols=['ID', 'French', 'Moore', 'English',
|
100 |
+
'tr_meta_mos_fra', 'tr_meta_mos_eng', 'tr_meta_eng_mos', 'tr_meta_fra_mos'])
|
101 |
+
|
102 |
+
df.columns = ['Clip ID', 'Original Moore', 'Original French', 'Original English',
|
103 |
+
'Moore-English Translation', 'Moore-French Translation',
|
104 |
+
'English-Moore Translation', 'French-Moore Translation']
|
105 |
+
|
106 |
+
df.set_index('Clip ID', inplace=True)
|
107 |
+
|
108 |
+
st.table(df[['Original Moore', 'Moore-French Translation', 'Moore-English Translation']])
|
109 |
+
st.table(df[['Original French', 'French-Moore Translation']])
|
110 |
+
st.table(df[['Original English', 'English-Moore Translation']])
|
111 |
+
|
112 |
########################
|
113 |
with lid:
|
114 |
langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader")
|
|
|
120 |
lang = decode_iso(lang)
|
121 |
":violet[The detected language is " + lang + "]"
|
122 |
|
123 |
+
st.subheader("Examples")
|
124 |
+
"Using the supplied clips, here are the recognized languages:"
|
125 |
+
df = pd.read_csv("data/language_id.csv")
|
126 |
+
df.columns = ['Clip ID', 'Language detected when speaking Mooré', 'Language detected when speaking French']
|
127 |
+
df.set_index('Clip ID', inplace=True)
|
128 |
+
st.dataframe(df)
|
129 |
+
|
130 |
+
|
131 |
+
# supported colors: blue, green, orange, red, violet, gray/grey, rainbow.
|
132 |
+
# https://docs.streamlit.io/library/api-reference/text/st.markdown
|
133 |
+
|
134 |
+
with about:
|
135 |
+
#st.header("How it works")
|
136 |
+
st.markdown('''
|
137 |
+
**Text to speech**, **speech to text**, and **language identification** capabilities are provided by Meta's [Massively Multilingual Speech (MMS)](https://ai.meta.com/blog/multilingual-model-speech-recognition/) model, which supports over 1000 languages.[^1][^2]
|
138 |
+
|
139 |
+
**Translation** capabilities are provided primarily by Meta's [No Language Left Behind (NLLB)](https://ai.meta.com/research/no-language-left-behind/) model, which supports translation between 200 languages.[^3]
|
140 |
+
We compare Meta's NLLB translations to two other translation alternatives. Masakhane, an African NLP initiative, offers endpoints for translations between Mooré and French.[^4] Helsinki NLP offers enpoints between Mooré and English, and one endpoint from French to Mooré.[^5]
|
141 |
+
|
142 |
+
Facebook has since released [SeamlessM4T](https://huggingface.co/docs/transformers/main/model_doc/seamless_m4t) which also provides support for audio-to-audio translation, however, Mooré is not currently one of the included languages.
|
143 |
+
[^1]: Endpoints used: TTS ([English](https://huggingface.co/facebook/mms-tts-eng),
|
144 |
+
[French](https://huggingface.co/facebook/mms-tts-fra),
|
145 |
+
[Mooré](https://huggingface.co/facebook/mms-tts-mos)),
|
146 |
+
[STT](https://huggingface.co/facebook/mms-1b-all),
|
147 |
+
[LID](https://huggingface.co/facebook/mms-lid-256). For language ID, the 256-language variant was chosen as this was the model with the smallest number of languages, which still included Mooré.
|
148 |
+
Learn more:
|
149 |
+
[Docs](https://huggingface.co/docs/transformers/model_doc/mms) |
|
150 |
+
[Paper](https://arxiv.org/abs/2305.13516) |
|
151 |
+
[Supported languages](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html)
|
152 |
+
[^3]: Endpoint used: [NLLB](https://huggingface.co/facebook/nllb-200-distilled-600M).
|
153 |
+
Learn more:
|
154 |
+
[Docs](https://huggingface.co/docs/transformers/model_doc/nllb) |
|
155 |
+
[Paper](https://huggingface.co/docs/transformers/model_doc/nllb) |
|
156 |
+
[Supported languages](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
|
157 |
+
[^4]: Endpoint used: [Mooré to French](https://huggingface.co/masakhane/m2m100_418M_mos_fr_news),
|
158 |
+
[French to Mooré](https://huggingface.co/masakhane/m2m100_418M_fr_mos_news).
|
159 |
+
Learn more:
|
160 |
+
[Docs](https://github.com/masakhane-io/lafand-mt) |
|
161 |
+
[Paper](https://arxiv.org/abs/2205.02022)
|
162 |
+
[^5]: Endpoints used: [Mooré to English](https://huggingface.co/Helsinki-NLP/opus-mt-mos-en),
|
163 |
+
[English to Mooré](https://huggingface.co/Helsinki-NLP/opus-mt-en-mos),
|
164 |
+
[French to Mooré](https://huggingface.co/Helsinki-NLP/opus-mt-fr-mos).
|
165 |
+
Learn more:
|
166 |
+
[Docs](https://github.com/Helsinki-NLP/Opus-MT)
|
167 |
+
''')
|