nsorros commited on
Commit
fd5a1b3
1 Parent(s): 4709571

Update tagged grants

Browse files
Files changed (2) hide show
  1. tag.py +8 -4
  2. tagged_grants.jsonl +0 -0
tag.py CHANGED
@@ -1,6 +1,7 @@
1
  import json
2
 
3
  from transformers import AutoModel, AutoTokenizer
 
4
  import srsly
5
  import typer
6
 
@@ -22,11 +23,14 @@ def tag(data_path, tagged_data_path, sample_size: int = 10):
22
  )
23
 
24
  texts = [grant["title_and_description"] for grant in data]
25
- inputs = tokenizer(texts, padding="max_length")
26
- labels = model(**inputs, return_labels=True)
27
 
28
- for i, tags in enumerate(labels):
29
- data[i]["tags"] = tags
 
 
 
30
 
31
  srsly.write_jsonl(tagged_data_path, data)
32
 
 
1
  import json
2
 
3
  from transformers import AutoModel, AutoTokenizer
4
+ from tqdm import tqdm
5
  import srsly
6
  import typer
7
 
 
23
  )
24
 
25
  texts = [grant["title_and_description"] for grant in data]
26
+ for batch_index in tqdm(range(0, len(texts), 10)):
27
+ batch_texts = texts[batch_index:batch_index+10]
28
 
29
+ inputs = tokenizer(batch_texts, padding="max_length")
30
+ labels = model(**inputs, return_labels=True)
31
+
32
+ for i, tags in enumerate(labels):
33
+ data[i]["tags"] = tags
34
 
35
  srsly.write_jsonl(tagged_data_path, data)
36
 
tagged_grants.jsonl CHANGED
The diff for this file is too large to render. See raw diff