tiwariratnesh commited on
Commit
fa4a83c
1 Parent(s): 9770e6a

Upload 8 files

Browse files
README.md ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ tags:
5
+ - aspect-based-sentiment-analysis
6
+ - PyABSA
7
+ license: mit
8
+ datasets:
9
+ - laptop14
10
+ - restaurant14
11
+ - restaurant16
12
+ - ACL-Twitter
13
+ - MAMS
14
+ - Television
15
+ - TShirt
16
+ - Yelp
17
+ metrics:
18
+ - accuracy
19
+ - macro-f1
20
+ widget:
21
+ - text: "[CLS] when tables opened up, the manager sat another party before us. [SEP] manager [SEP] "
22
+ ---
23
+
24
+
25
+ # Note
26
+ Please use (yangheng/deberta-v3-base-absa-v1.1)[https://huggingface.co/yangheng/deberta-v3-base-absa-v1.1], which is smaller and has better performance.
27
+ This model is training with 30k+ ABSA samples, see [ABSADatasets](https://github.com/yangheng95/ABSADatasets). Yet the test sets are not included in pre-training, so you can use this model for training and benchmarking on common ABSA datasets, e.g., Laptop14, Rest14 datasets. (Except for the Rest15 dataset!)
28
+
29
+ # DeBERTa for aspect-based sentiment analysis
30
+ The `deberta-v3-large-absa` model for aspect-based sentiment analysis, trained with English datasets from [ABSADatasets](https://github.com/yangheng95/ABSADatasets).
31
+
32
+ ## Training Model
33
+ This model is trained based on the FAST-LCF-BERT model with `microsoft/deberta-v3-large`, which comes from [PyABSA](https://github.com/yangheng95/PyABSA).
34
+ To track state-of-the-art models, please see [PyASBA](https://github.com/yangheng95/PyABSA).
35
+
36
+ ## Usage
37
+ ```python3
38
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
39
+
40
+ tokenizer = AutoTokenizer.from_pretrained("yangheng/deberta-v3-large-absa-v1.1")
41
+
42
+ model = AutoModelForSequenceClassification.from_pretrained("yangheng/deberta-v3-large-absa-v1.1")
43
+ ```
44
+
45
+ ## Example in PyASBA
46
+ An [example](https://github.com/yangheng95/PyABSA/blob/release/demos/aspect_polarity_classification/train_apc_multilingual.py) for using FAST-LCF-BERT in PyASBA datasets.
47
+
48
+ ## Datasets
49
+ This model is fine-tuned with 180k examples for the ABSA dataset (including augmented data). Training dataset files:
50
+ ```
51
+ loading: integrated_datasets/apc_datasets/SemEval/laptop14/Laptops_Train.xml.seg
52
+ loading: integrated_datasets/apc_datasets/SemEval/restaurant14/Restaurants_Train.xml.seg
53
+ loading: integrated_datasets/apc_datasets/SemEval/restaurant16/restaurant_train.raw
54
+ loading: integrated_datasets/apc_datasets/ACL_Twitter/acl-14-short-data/train.raw
55
+ loading: integrated_datasets/apc_datasets/MAMS/train.xml.dat
56
+ loading: integrated_datasets/apc_datasets/Television/Television_Train.xml.seg
57
+ loading: integrated_datasets/apc_datasets/TShirt/Menstshirt_Train.xml.seg
58
+ loading: integrated_datasets/apc_datasets/Yelp/yelp.train.txt
59
+
60
+ ```
61
+ If you use this model in your research, please cite our paper:
62
+ ```
63
+ @article{YangZMT21,
64
+ author = {Heng Yang and
65
+ Biqing Zeng and
66
+ Mayi Xu and
67
+ Tianxing Wang},
68
+ title = {Back to Reality: Leveraging Pattern-driven Modeling to Enable Affordable
69
+ Sentiment Dependency Learning},
70
+ journal = {CoRR},
71
+ volume = {abs/2110.08604},
72
+ year = {2021},
73
+ url = {https://arxiv.org/abs/2110.08604},
74
+ eprinttype = {arXiv},
75
+ eprint = {2110.08604},
76
+ timestamp = {Fri, 22 Oct 2021 13:33:09 +0200},
77
+ biburl = {https://dblp.org/rec/journals/corr/abs-2110-08604.bib},
78
+ bibsource = {dblp computer science bibliography, https://dblp.org}
79
+ }
80
+ ```
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"[MASK]": 128000}
config.json ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "yangheng/deberta-v3-large-absa",
3
+ "_num_labels": 3,
4
+ "architectures": [
5
+ "DebertaForSequenceClassification"
6
+ ],
7
+ "attention_probs_dropout_prob": 0.1,
8
+ "hidden_act": "gelu",
9
+ "hidden_dropout_prob": 0.1,
10
+ "hidden_size": 1024,
11
+ "id2label": {
12
+ "0": "Negative",
13
+ "1": "Neutral",
14
+ "2": "Positive"
15
+ },
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 4096,
18
+ "label2id": {
19
+ "Negative": 0,
20
+ "Neutral": 1,
21
+ "Positive": 2
22
+ },
23
+ "layer_norm_eps": 1e-07,
24
+ "max_position_embeddings": 512,
25
+ "max_relative_positions": -1,
26
+ "model_type": "deberta-v2",
27
+ "norm_rel_ebd": "layer_norm",
28
+ "num_attention_heads": 16,
29
+ "num_hidden_layers": 24,
30
+ "pad_token_id": 0,
31
+ "pooler_dropout": 0,
32
+ "pooler_hidden_act": "gelu",
33
+ "pooler_hidden_size": 1024,
34
+ "pos_att_type": [
35
+ "p2c",
36
+ "c2p"
37
+ ],
38
+ "position_biased_input": false,
39
+ "position_buckets": 256,
40
+ "relative_attention": true,
41
+ "share_att_key": true,
42
+ "transformers_version": "4.17.0",
43
+ "type_vocab_size": 0,
44
+ "vocab_size": 128100
45
+ }
gitattributes ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bin.* filter=lfs diff=lfs merge=lfs -text
5
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.model filter=lfs diff=lfs merge=lfs -text
12
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
13
+ *.onnx filter=lfs diff=lfs merge=lfs -text
14
+ *.ot filter=lfs diff=lfs merge=lfs -text
15
+ *.parquet filter=lfs diff=lfs merge=lfs -text
16
+ *.pb filter=lfs diff=lfs merge=lfs -text
17
+ *.pt filter=lfs diff=lfs merge=lfs -text
18
+ *.pth filter=lfs diff=lfs merge=lfs -text
19
+ *.rar filter=lfs diff=lfs merge=lfs -text
20
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
21
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
22
+ *.tflite filter=lfs diff=lfs merge=lfs -text
23
+ *.tgz filter=lfs diff=lfs merge=lfs -text
24
+ *.xz filter=lfs diff=lfs merge=lfs -text
25
+ *.zip filter=lfs diff=lfs merge=lfs -text
26
+ *.zstandard filter=lfs diff=lfs merge=lfs -text
27
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
28
+ model.safetensors filter=lfs diff=lfs merge=lfs -text
pytorch_metadata.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "name": "tiwariratnesh/security-classifier",
3
+ "commit_hash": "319caf575b8586cb97917ac124b231c7831b3c15",
4
+ "publish_time": "2024-05-01T15:58:37.942124+00:00"
5
+ }
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "sp_model_kwargs": {}, "vocab_type": "spm", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "yangheng/deberta-v3-large-absa", "tokenizer_class": "DebertaV2Tokenizer"}