""" will do this when I need to. Is it absolutely necessary to keep track of idioms separately? """ import os import wandb from idiomify.fetchers import fetch_literal2idiomatic, fetch_config from idiomify.paths import ROOT_DIR def main(): config = fetch_config()['idioms'] train_df, _ = fetch_literal2idiomatic(config['ver']) idioms_df = train_df[['Idiom', "Sense"]] idioms_df = idioms_df.groupby('Idiom').agg({'Sense': lambda x: list(set(x))}) with wandb.init(entity="eubinecto", project="idiomify") as run: # the paths to write datasets in tsv_path = ROOT_DIR / "all.tsv" idioms_df.to_csv(tsv_path, sep="\t") artifact = wandb.Artifact(name="idioms", type="dataset", description=config['description'], metadata=config) artifact.add_file(tsv_path) # then, we just log them here. run.log_artifact(artifact, aliases=["latest", config['ver']]) # don't forget to remove them os.remove(tsv_path) if __name__ == '__main__': main()