File size: 1,060 Bytes
207cddf
e3c7b5a
 
207cddf
e9d1a5a
 
e3c7b5a
 
207cddf
 
 
642d911
e3c7b5a
370afc1
 
e9d1a5a
e3c7b5a
 
370afc1
 
e3c7b5a
 
370afc1
e3c7b5a
e9d1a5a
e3c7b5a
370afc1
207cddf
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
"""
will do this when I need to.
Is it absolutely necessary to keep track of idioms separately?
"""
import os
import wandb
from idiomify.fetchers import fetch_literal2idiomatic, fetch_config
from idiomify.paths import ROOT_DIR


def main():
    config = fetch_config()['idioms']
    train_df, _ = fetch_literal2idiomatic(config['ver'])
    idioms_df = train_df[['Idiom', "Sense"]]
    idioms_df = idioms_df.groupby('Idiom').agg({'Sense': lambda x: list(set(x))})

    with wandb.init(entity="eubinecto", project="idiomify") as run:
        # the paths to write datasets in
        tsv_path = ROOT_DIR / "all.tsv"
        idioms_df.to_csv(tsv_path, sep="\t")
        artifact = wandb.Artifact(name="idioms", type="dataset", description=config['description'],
                                  metadata=config)
        artifact.add_file(tsv_path)
        # then, we just log them here.
        run.log_artifact(artifact, aliases=["latest", config['ver']])
        # don't forget to remove them
        os.remove(tsv_path)


if __name__ == '__main__':
    main()