DaJulster commited on
Commit
453cc78
·
verified ·
1 Parent(s): e317cf2

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SuaveMultitaskModel"
4
+ ],
5
+ "model_type": "suave_multitask",
6
+ "base_model_name": "roberta-base",
7
+ "num_ai_classes": 62,
8
+ "classifier_dropout": 0.1,
9
+ "id2label": {
10
+ "0": "human",
11
+ "1": "ai"
12
+ },
13
+ "label2id": {
14
+ "human": 0,
15
+ "ai": 1
16
+ },
17
+ "auto_map": {
18
+ "AutoConfig": "configuration_suave_multitask.SuaveMultitaskConfig",
19
+ "AutoModel": "modeling_suave_multitask.SuaveMultitaskModel"
20
+ }
21
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
prepare_hf_artifacts_light.py CHANGED
@@ -5,6 +5,7 @@ from pathlib import Path
5
 
6
  import torch
7
  from huggingface_hub import hf_hub_download
 
8
 
9
 
10
  def _normalize_state_dict(raw_obj):
@@ -18,18 +19,27 @@ def _normalize_state_dict(raw_obj):
18
 
19
 
20
  def _download_roberta_tokenizer_files(local_dir: Path):
21
- files = [
22
  "tokenizer_config.json",
23
- "special_tokens_map.json",
24
  "vocab.json",
25
  "merges.txt",
 
 
 
26
  "tokenizer.json",
27
  ]
28
 
29
- for name in files:
30
  downloaded = hf_hub_download(repo_id="roberta-base", filename=name)
31
  shutil.copy2(downloaded, local_dir / name)
32
 
 
 
 
 
 
 
 
33
 
34
  def main():
35
  root = Path(".")
 
5
 
6
  import torch
7
  from huggingface_hub import hf_hub_download
8
+ from huggingface_hub.errors import EntryNotFoundError
9
 
10
 
11
  def _normalize_state_dict(raw_obj):
 
19
 
20
 
21
  def _download_roberta_tokenizer_files(local_dir: Path):
22
+ required_files = [
23
  "tokenizer_config.json",
 
24
  "vocab.json",
25
  "merges.txt",
26
+ ]
27
+ optional_files = [
28
+ "special_tokens_map.json",
29
  "tokenizer.json",
30
  ]
31
 
32
+ for name in required_files:
33
  downloaded = hf_hub_download(repo_id="roberta-base", filename=name)
34
  shutil.copy2(downloaded, local_dir / name)
35
 
36
+ for name in optional_files:
37
+ try:
38
+ downloaded = hf_hub_download(repo_id="roberta-base", filename=name)
39
+ shutil.copy2(downloaded, local_dir / name)
40
+ except EntryNotFoundError:
41
+ print(f"Optional tokenizer file not found and skipped: {name}")
42
+
43
 
44
  def main():
45
  root = Path(".")
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7627394cdadc634e9def45c93d4e044f2403a127531607c4f9dda0fe9a2fa938
3
+ size 498846582
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"model_max_length": 512}
vocab.json ADDED
The diff for this file is too large to render. See raw diff