Spaces:

cdleong
/

phonemize-audio

Runtime error

App Files Files Community

phonemize-audio / app.py

cdleong

update spinner again

6e454d6 over 2 years ago

raw history blame contribute delete

No virus

5.44 kB

	import streamlit as st
	import langcodes
	from allosaurus.app import read_recognizer
	from pathlib import Path
	import string
	from itertools import permutations
	from collections import defaultdict
	import torchaudio

	@st.cache
	def get_supported_codes():
	model = read_recognizer()
	supported_codes = []
	supported_codes.append("ipa") # default option
	for combo in permutations(string.ascii_lowercase, r=3):
	code = "".join(combo)
	if model.is_available(code):
	supported_codes.append(code)


	return supported_codes


	def get_path_to_wav_format(uploaded_file, suppress_outputs=False):
	# st.write(dir(uploaded_file))
	# st.write(type(uploaded_file))
	# st.write(uploaded_file)
	uploaded_bytes = uploaded_file.getvalue()
	actual_file_path = Path(uploaded_file.name)
	actual_file_path.write_bytes(uploaded_bytes)


	if ".wav" in uploaded_file.name:
	return Path(uploaded_file.name)
	if ".mp3" in uploaded_file.name or ".ogg" in uploaded_file.name:
	new_desired_path = actual_file_path.with_suffix(".wav")
	encoding="PCM_S" # Prevent encoding errors. https://stackoverflow.com/questions/60352850/wave-error-unknown-format-3-arises-when-trying-to-convert-a-wav-file-into-text
	bits_per_sample=16
	waveform, sample_rate = torchaudio.load(actual_file_path)
	if not suppress_outputs:
	st.info(f"Allosaurus requires .wav files. Converting with torchaudio, encoding={encoding}, bits_per_sample={bits_per_sample}")
	st.info(f"Uploaded file sample_rate: {sample_rate}")
	torchaudio.save(new_desired_path, waveform, sample_rate,
	encoding=encoding,
	bits_per_sample=bits_per_sample,
	)

	return new_desired_path

	@st.cache
	def get_langcode_description(input_code, url=False):
	langcode = "ipa" # the default allosaurus recognizer
	description = "the default universal setting, not specific to any language"

	if not input_code or input_code==langcode:
	return description



	try:
	lang = langcodes.get(input_code)
	alpha3 = lang.to_alpha3()
	langcode = alpha3
	display_name = lang.display_name()
	if url:
	description = f"[{display_name}](https://iso639-3.sil.org/code/{alpha3})"
	else:
	description = display_name

	except langcodes.LanguageTagError as e:
	pass
	return description

	@st.cache
	def get_langcode_with_description(input_code):
	return f"{input_code}: {get_langcode_description(input_code)}"


	if __name__ == "__main__":
	st.header("Phonemize Audio files with [Allosaurus](https://github.com/xinjli/allosaurus)")
	st.write("Allosaurus is a pretrained universal phone recognizer. It can be used to recognize phones in more than 2000 languages. It is written by Li, Xinjian and Dalmia, Siddharth and Li, Juncheng and Lee, Matthew and Littell, Patrick and Yao, Jiali and Anastasopoulos, Antonios and Mortensen, David R and Neubig, Graham and Black, Alan W and Florian, Metze. [Click here to visit their repository](https://github.com/xinjli/allosaurus)")
	st.write("I, [Colin Leong](cdleong.github.io) did not create Allosaurus, but I have created this web app (kindly hosted by Hugging Face) to make it convenient to use: simply upload your files below, and they will be transcribed to phonetic IPA symbols!")
	st.write(f"Feedback: Provide feedback regarding this web app at https://twitter.com/cleong110, or via slack: https://masakhane-nlp.slack.com/archives/D01DU3MHP7A")
	supported_codes = get_supported_codes()
	index_of_desired_default = supported_codes.index("ipa")
	with st.form("Allosaurus form"):

	langcode = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy",
	options=supported_codes,
	index=index_of_desired_default,
	format_func=get_langcode_with_description
	)

	model = read_recognizer()
	description = get_langcode_description(langcode, url=True)

	st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")

	st.subheader("Upload your files here")
	uploaded_files = st.file_uploader("Choose a file", type=[
	".wav",
	".mp3",
	".ogg",
	],
	accept_multiple_files=True,
	)

	submitted = st.form_submit_button("Run phone recognition!")
	if submitted:
	results = {} # for better download/display

	uploaded_files_count = len(uploaded_files)
	suppress_output_threshold = 2
	my_bar = st.progress(0)
	for i, uploaded_file in enumerate(uploaded_files):

	if uploaded_file is not None:
	wav_file = get_path_to_wav_format(uploaded_file, uploaded_files_count>suppress_output_threshold)
	with st.spinner(f"transcribing {uploaded_file.name}..."):
	result = model.recognize(wav_file, langcode)
	results[uploaded_file.name] = result
	files_done = i+1
	my_bar.progress(files_done/uploaded_files_count)
	st.write(results)