File size: 1,374 Bytes
e9d1a5a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25e310b
e9d1a5a
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
"""
Here, what should you do here?
just upload all idioms here - name it as epie.
"""
import csv
import os
from idiomify.paths import ROOT_DIR
from idiomify.fetchers import fetch_pie
import argparse
import wandb


def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--ver", type=str, default="pie_v0",
                        choices=["pie_v0", "pie_v1"])
    config = vars(parser.parse_args())

    # get the idioms here
    if config['ver'] == "pie_v0":
        # only the first 106, and we use this just for piloting
        literal2idiom = [
            (row[3], row[2]) for row in fetch_pie()[:106]
        ]
    elif config['ver'] == "pie_v1":
        # just include all
        literal2idiom = [
            (row[3], row[2]) for row in fetch_pie()
        ]
    else:
        raise NotImplementedError

    with wandb.init(entity="eubinecto", project="idiomify", config=config) as run:
        artifact = wandb.Artifact(name="literal2idiomatic", type="dataset")
        tsv_path = ROOT_DIR / "all.tsv"
        with open(tsv_path, 'w') as fh:
            writer = csv.writer(fh, delimiter="\t")
            for row in literal2idiom:
                writer.writerow(row)
        artifact.add_file(tsv_path)
        run.log_artifact(artifact, aliases=["latest", config['ver']])
        os.remove(tsv_path)


if __name__ == '__main__':
    main()