demo_language_moore

Sleeping

App Files Files Community

demo_language_moore / app.py

anyantudre

Update app.py

f963ed2 verified 7 months ago

raw

history blame

7.79 kB

	import torch
	import scipy
	import os
	import streamlit as st
	import pandas as pd
	from transformers import set_seed, pipeline
	from transformers import VitsTokenizer, VitsModel
	from datasets import load_dataset, Audio
	from src import *

	#from huggingface_hub import login
	#from dotenv import load_dotenv

	#load_dotenv()
	#HUGGINGFACE_KEY = os.environ.get("HUGGINGFACE_KEY")
	#login(HUGGINGFACE_KEY)


	########################
	language_list = ['mos', 'fra', 'eng']


	st.title("Demo: Finetuning models \| Mooré Language")
	tts, stt, trans, lid, about = st.tabs(["Text to speech", "Speech to text", "Translation", "Language ID", "About"])

	########################
	with tts:

	tts_text = st.text_area(label = "Please enter your text here:", value="", placeholder="ne y wĩndga")

	tts_col1, tts_col2, = st.columns(2)

	with tts_col1:
	tts_lang = st.selectbox('Language of text', (language_list), format_func = decode_iso)



	if st.button("Speak"):
	st.divider()
	with st.spinner(":rainbow[Synthesizing, please wait...]"):
	synth = synthesize_facebook(tts_text, tts_lang)
	st.audio(synth, sample_rate=16_000)


	########################
	with stt:

	stt_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "stt_uploader")
	stt_lang = st.selectbox("Please select the language:" , (language_list), format_func = decode_iso)


	if st.button("Transcribe"):
	st.divider()
	with st.spinner(":rainbow[Received your file, please wait while I process it...]"):
	stt = transcribe(stt_file, stt_lang)
	":violet[The transcription is:]"
	':violet[ "' + stt + '"]'

	st.subheader("Examples")
	"Using the supplied clips, here are the transcriptions:"
	df = pd.read_csv("data/speech_to_text.csv")
	df.columns = ['Clip ID', 'Spoken in Moore', 'Spoken in French', 'Transcription in Moore', 'Transcription in French']

	df.set_index('Clip ID', inplace=True)
	st.table(df[['Spoken in Moore', 'Transcription in Moore']])

	st.table(df[['Spoken in French', 'Transcription in French']])

	########################
	with trans:

	trans_text = st.text_area(label = "Please enter your translation text here:", value="", placeholder="ne y wĩndga")
	#trans_col1, trans_col2, trans_col3 = st.columns([.25, .25, .5])
	trans_col1, trans_col2 = st.columns(2)

	with trans_col1:
	src_lang = st.selectbox('Translate from:', (language_list), format_func = decode_iso)
	with trans_col2:
	target_lang = st.selectbox('Translate to:', (language_list), format_func = decode_iso, index=1)
	#with trans_col3:
	# trans_model = st.selectbox("Translation model:",
	# ("Facebook (nllb-200-distilled-600M)",
	# "Helsinki NLP (opus-mt-mos-en)",
	# "Masakhane (m2m100_418m_mos_fr_news)")
	# )


	if st.button("Translate"):
	st.divider()
	with st.spinner(":rainbow[Translating from " + decode_iso(src_lang) + " into " + decode_iso(target_lang) + ", please wait...]"):
	translation = translate(trans_text, src_lang, target_lang) #, trans_model)
	translation



	st.subheader("Examples")
	"Using the supplied clips, here are the translations:"
	df = pd.read_csv("data/translated_eng.csv",
	usecols=['ID', 'French', 'Moore', 'English',
	'tr_meta_mos_fra', 'tr_meta_mos_eng', 'tr_meta_eng_mos', 'tr_meta_fra_mos'])

	df.columns = ['Clip ID', 'Original Moore', 'Original French', 'Original English',
	'Moore-English Translation', 'Moore-French Translation',
	'English-Moore Translation', 'French-Moore Translation']

	df.set_index('Clip ID', inplace=True)

	st.table(df[['Original Moore', 'Moore-French Translation', 'Moore-English Translation']])
	st.table(df[['Original French', 'French-Moore Translation']])
	st.table(df[['Original English', 'English-Moore Translation']])

	########################
	with lid:
	langid_file = st.file_uploader("Please upload an audio file:", type=['mp3', 'm4a'], key = "lid_uploader")

	if st.button("Identify"):
	st.divider()
	with st.spinner(":rainbow[Received your file, please wait while I process it...]"):
	lang = identify_language(langid_file)
	lang = decode_iso(lang)
	":violet[The detected language is " + lang + "]"

	st.subheader("Examples")
	"Using the supplied clips, here are the recognized languages:"
	df = pd.read_csv("data/language_id.csv")
	df.columns = ['Clip ID', 'Language detected when speaking Mooré', 'Language detected when speaking French']
	df.set_index('Clip ID', inplace=True)
	st.dataframe(df)


	# supported colors: blue, green, orange, red, violet, gray/grey, rainbow.
	# https://docs.streamlit.io/library/api-reference/text/st.markdown

	with about:
	#st.header("How it works")
	st.markdown('''
	Text to speech, speech to text, and language identification capabilities are provided by Meta's [Massively Multilingual Speech (MMS)](https://ai.meta.com/blog/multilingual-model-speech-recognition/) model, which supports over 1000 languages.[^1]

	Translation capabilities are provided primarily by Meta's [No Language Left Behind (NLLB)](https://ai.meta.com/research/no-language-left-behind/) model, which supports translation between 200 languages.[^3]
	We compare Meta's NLLB translations to two other translation alternatives. Masakhane, an African NLP initiative, offers endpoints for translations between Mooré and French.[^4] Helsinki NLP offers enpoints between Mooré and English, and one endpoint from French to Mooré.[^5]

	Facebook has since released [SeamlessM4T](https://huggingface.co/docs/transformers/main/model_doc/seamless_m4t) which also provides support for audio-to-audio translation, however, Mooré is not currently one of the included languages.
	[^1]: Endpoints used: TTS ([English](https://huggingface.co/facebook/mms-tts-eng),
	[French](https://huggingface.co/facebook/mms-tts-fra),
	[Mooré](https://huggingface.co/facebook/mms-tts-mos)),
	[STT](https://huggingface.co/facebook/mms-1b-all),
	[LID](https://huggingface.co/facebook/mms-lid-256). For language ID, the 256-language variant was chosen as this was the model with the smallest number of languages, which still included Mooré.
	Learn more:
	[Docs](https://huggingface.co/docs/transformers/model_doc/mms) \|
	[Paper](https://arxiv.org/abs/2305.13516) \|
	[Supported languages](https://dl.fbaipublicfiles.com/mms/misc/language_coverage_mms.html)
	[^3]: Endpoint used: [NLLB](https://huggingface.co/facebook/nllb-200-distilled-600M).
	Learn more:
	[Docs](https://huggingface.co/docs/transformers/model_doc/nllb) \|
	[Paper](https://huggingface.co/docs/transformers/model_doc/nllb) \|
	[Supported languages](https://github.com/facebookresearch/flores/blob/main/flores200/README.md#languages-in-flores-200)
	[^4]: Endpoint used: [Mooré to French](https://huggingface.co/masakhane/m2m100_418M_mos_fr_news),
	[French to Mooré](https://huggingface.co/masakhane/m2m100_418M_fr_mos_news).
	Learn more:
	[Docs](https://github.com/masakhane-io/lafand-mt) \|
	[Paper](https://arxiv.org/abs/2205.02022)
	[^5]: Endpoints used: [Mooré to English](https://huggingface.co/Helsinki-NLP/opus-mt-mos-en),
	[English to Mooré](https://huggingface.co/Helsinki-NLP/opus-mt-en-mos),
	[French to Mooré](https://huggingface.co/Helsinki-NLP/opus-mt-fr-mos).
	Learn more:
	[Docs](https://github.com/Helsinki-NLP/Opus-MT)
	''')