import csv from io import BytesIO import requests from omegaconf import OmegaConf EXTRA_G2P = { "z": "z", "o": "o", "h": "h", "g": "g", "y": "j", "w": "w", "c": "ʦ", "u": "u", "f": "f", "v": "v", "j": "ɟ", "b": "b", "q": "q", "e": "e", ",": ",", } def gh_download(repo, path, token): headers = { "Authorization": f"Bearer {token}", "Accept": "application/vnd.github.raw+json", } url = f"https://api.github.com/repos/{repo}/contents/{path}" response = requests.get(url, headers=headers) if response.status_code != 200: raise Exception(f"Failed to download {path} from {repo}, response: {response}") response.encoding = "utf-8-sig" return response.text def load_g2p(g2p_string): g2p = dict() csv_reader = csv.DictReader(g2p_string.split("\n")) for row in csv_reader: # print(row) language = row["Language"] dialect = row["Dialect"] if dialect == "-": lang_tag = f"{language}" else: lang_tag = f"{language}_{dialect}" for key in row: if key in ["Language", "Dialect"]: continue if row[key] == "-": continue g2p[lang_tag] = g2p.get(lang_tag, {}) g2p[lang_tag][key] = row[key].split(",")[0] for g, p in EXTRA_G2P.items(): if g not in g2p[lang_tag]: g2p[lang_tag][g] = p return g2p OmegaConf.register_new_resolver("gh_download", gh_download) OmegaConf.register_new_resolver("load_g2p", load_g2p) g2p = OmegaConf.to_object(OmegaConf.load("configs/g2p.yaml"))["g2p"]