Spaces:

cdleong
/

phonemize-audio

Runtime error

File size: 3,167 Bytes

1ffd672
f8d0565
 
ff1bae1
b3ebb49
 
 
f8d0565
b3ebb49
 
 
 
 
 
 
8112d7a
f39c7a2
 
b3ebb49
 
f8d0565
55c2b20
663ac44
 
 
 
 
 
 
fa6f9dd
5caf6ad
0f7a10e
df07559
 
 
 
 
 
f8d0565
 
b3ebb49
f8d0565
0c3b1db
464e88d
aae5aef
b3ebb49
464e88d
 
 
f8d0565
 
464e88d
b3ebb49
 
464e88d
f8d0565
b3ebb49
 
f8d0565
 
1ffd672
 
b3ebb49
 
3db5979
 
 
 
 
 
f8d0565
 
3db5979
464e88d
 
 
f8d0565
5d2138a
f9db145
f083023
 
 
 
f9db145
f083023
f9db145
f083023
 
 
 
 
 
 
 
 
 
f8d0565

import streamlit as st
import langcodes
from allosaurus.app import read_recognizer
from pathlib import Path
import string
from itertools import combinations
from collections import defaultdict

@st.cache
def get_supported_codes():
  model = read_recognizer()  
  supported_codes = []
  for combo in combinations(string.ascii_lowercase, 3):
    code = "".join(combo)
    if model.is_available(code):
      supported_codes.append(code)
      
  supported_codes.append("ipa") # default option
  return supported_codes
  
  
def get_path_to_wav_format(uploaded_file):
#  st.write(dir(uploaded_file))
#  st.write(type(uploaded_file))
#  st.write(uploaded_file)
  uploaded_bytes = uploaded_file.getvalue()
  actual_file_path = Path(uploaded_file.name)
  actual_file_path.write_bytes(uploaded_bytes)
    

  if ".wav" in uploaded_file.name:
    return Path(uploaded_file.name)
  if ".mp3" in uploaded_file.name:
    new_desired_path = actual_file_path.with_suffix(".wav")
    waveform, sample_rate = torchaudio.load(actual_file_path)
    st.info(waveform, sample_rate)
    torchaudio.save(new_desired_path, waveform, sample_rate)
    return new_desired_path


def get_langcode_description(input_code):
  langcode = "ipa" # the default allosaurus recognizer
  description = "the default universal setting, not specific to any language"
  
  if not input_code or input_code==langcode:
    return description
    


  try:
    lang = langcodes.get(input_code)
    alpha3 = lang.to_alpha3()
    langcode = alpha3 
    description = lang.display_name()
    
  except langcodes.LanguageTagError as e:
    pass    
  return description 
  


if __name__ == "__main__":
#  input_code = st.text_input("(optional) 2 or 3-letter ISO code for input language. 2-letter codes will be converted to 3-letter codes", max_chars=3)
  supported_codes = get_supported_codes()
  index_of_desired_default = supported_codes.index("ipa")
  langcode = st.selectbox("ISO code for input language. Allosaurus doesn't need this, but it can improve accuracy", 
                          options=supported_codes, 
                          index=index_of_desired_default,
                          format_func=get_langcode_description
                          )
  
  model = read_recognizer()
  description = get_langcode_description(langcode)

  st.write(f"Instructing Allosaurus to recognize using language {langcode}. That is, {description}")

  
  uploaded_files = st.file_uploader("Choose a file", type=[
                                                          ".wav", 
#                                                          ".mp3", # TODO: convert .mp3 to .wav and save
                                                          ],
                                                    accept_multiple_files=True,
                                                          )
  
  for uploaded_file in uploaded_files:
    
    if uploaded_file is not None:
  
      
      
      st.audio(uploaded_file, format='audio/wav')
      
      wav_file = get_path_to_wav_format(uploaded_file)    
      st.write(wav_file)
      result = model.recognize(wav_file, langcode)
      st.write(result)