Taizo Kaneko
commited on
Commit
•
8c5c4c6
1
Parent(s):
3a2ccd9
commit files to HF hub
Browse files- config.json +12 -0
- pytorch_model.bin +3 -0
- special_tokens_map.json +7 -0
- tokenizer_config.json +20 -0
- vocab.txt +0 -0
- zeroshot.py +48 -0
config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"CustomModel"
|
4 |
+
],
|
5 |
+
"auto_map": {
|
6 |
+
"AutoConfig": "__main__.CustomConfig",
|
7 |
+
"AutoModel": "__main__.CustomModel"
|
8 |
+
},
|
9 |
+
"model_type": "test-zeroshot",
|
10 |
+
"torch_dtype": "float32",
|
11 |
+
"transformers_version": "4.23.1"
|
12 |
+
}
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c10506fbdef05cb01eaaaacc2889d89634cba745074f72ee9a2b8430c9a07dd4
|
3 |
+
size 765
|
special_tokens_map.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"mask_token": "[MASK]",
|
4 |
+
"pad_token": "[PAD]",
|
5 |
+
"sep_token": "[SEP]",
|
6 |
+
"unk_token": "[UNK]"
|
7 |
+
}
|
tokenizer_config.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"cls_token": "[CLS]",
|
3 |
+
"do_lower_case": false,
|
4 |
+
"do_subword_tokenize": true,
|
5 |
+
"do_word_tokenize": true,
|
6 |
+
"jumanpp_kwargs": null,
|
7 |
+
"mask_token": "[MASK]",
|
8 |
+
"mecab_kwargs": null,
|
9 |
+
"model_max_length": 512,
|
10 |
+
"name_or_path": "cl-tohoku/bert-base-japanese",
|
11 |
+
"never_split": null,
|
12 |
+
"pad_token": "[PAD]",
|
13 |
+
"sep_token": "[SEP]",
|
14 |
+
"special_tokens_map_file": null,
|
15 |
+
"subword_tokenizer_type": "wordpiece",
|
16 |
+
"sudachi_kwargs": null,
|
17 |
+
"tokenizer_class": "BertJapaneseTokenizer",
|
18 |
+
"unk_token": "[UNK]",
|
19 |
+
"word_tokenizer_type": "mecab"
|
20 |
+
}
|
vocab.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
zeroshot.py
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers.modeling_outputs import SequenceClassifierOutput
|
2 |
+
from transformers.modeling_utils import PreTrainedModel
|
3 |
+
from transformers.configuration_utils import PretrainedConfig
|
4 |
+
import torch
|
5 |
+
from transformers import ZeroShotClassificationPipeline
|
6 |
+
|
7 |
+
|
8 |
+
class CustomConfig(PretrainedConfig):
|
9 |
+
model_type = "test-zeroshot"
|
10 |
+
|
11 |
+
def __init__(self, **kwargs):
|
12 |
+
super().__init__(**kwargs)
|
13 |
+
|
14 |
+
|
15 |
+
class CustomModel(PreTrainedModel):
|
16 |
+
config_class = CustomConfig
|
17 |
+
|
18 |
+
def __init__(self, config: CustomConfig):
|
19 |
+
super().__init__(config)
|
20 |
+
self.config = config
|
21 |
+
self.embeddings = torch.nn.Embedding(num_embeddings=1, embedding_dim=1)
|
22 |
+
|
23 |
+
def forward(self, **kwargs) -> SequenceClassifierOutput:
|
24 |
+
|
25 |
+
return SequenceClassifierOutput(logits=torch.tensor([[1, 2, 3]]))
|
26 |
+
|
27 |
+
|
28 |
+
from transformers.pipelines import PIPELINE_REGISTRY
|
29 |
+
|
30 |
+
from transformers import AutoModelForSequenceClassification, TFAutoModelForSequenceClassification
|
31 |
+
|
32 |
+
if __name__ == "__main__":
|
33 |
+
from transformers import pipeline
|
34 |
+
classifier = pipeline("zero-shot-classification",
|
35 |
+
model="cl-tohoku/bert-base-japanese")
|
36 |
+
from transformers import AutoConfig, AutoModel, AutoModelForImageClassification
|
37 |
+
|
38 |
+
CustomConfig.register_for_auto_class()
|
39 |
+
CustomModel.register_for_auto_class("AutoModel")
|
40 |
+
|
41 |
+
p = ZeroShotClassificationPipeline(model=CustomModel(CustomConfig()),
|
42 |
+
tokenizer=classifier.tokenizer)
|
43 |
+
from huggingface_hub import Repository
|
44 |
+
|
45 |
+
repo = Repository("zero-shot-classification",
|
46 |
+
clone_from="paulhindemith/zero-shot-classification")
|
47 |
+
p.save_pretrained("zero-shot-classification")
|
48 |
+
repo.push_to_hub()
|