OrK7 commited on
Commit
1c896b8
1 Parent(s): 7402f18

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +79 -0
README.md CHANGED
@@ -17,3 +17,82 @@ Recent research efforts have been directed toward the development of automated s
17
 
18
  <img src="https://github.com/OrKatz7/parler-hate-speech/blob/main/docs/parler_results.jpeg?raw=true">
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  <img src="https://github.com/OrKatz7/parler-hate-speech/blob/main/docs/parler_results.jpeg?raw=true">
19
 
20
+ ```
21
+ !pip install huggingface_hub
22
+ !pip install tokenizers transformers
23
+ !pip install iterative-stratification
24
+ !git clone https://github.com/OrKatz7/parler-hate-speech
25
+ %cd parler-hate-speech/src
26
+ ```
27
+
28
+ ```
29
+ from huggingface_hub import hf_hub_download
30
+ import torch
31
+ import sys
32
+ from model import CustomModel,MeanPooling
33
+ from transformers import AutoTokenizer, AutoModel, AutoConfig
34
+ import numpy as np
35
+ class CFG:
36
+ model="microsoft/deberta-v3-base"
37
+ target_cols=['label_mean']
38
+ ```
39
+
40
+ ```
41
+ name = "OrK7/parler_hate_speech"
42
+ downloaded_model_path = hf_hub_download(repo_id=name, filename="pytorch_model.bin")
43
+ model = torch.load(downloaded_model_path)
44
+ tokenizer = AutoTokenizer.from_pretrained(name)
45
+ ```
46
+
47
+ ```
48
+ def prepare_input(text):
49
+ inputs = tokenizer.encode_plus(
50
+ text,
51
+ return_tensors=None,
52
+ add_special_tokens=True,
53
+ max_length=512,
54
+ pad_to_max_length=True,
55
+ truncation=True
56
+ )
57
+ for k, v in inputs.items():
58
+ inputs[k] = torch.tensor(np.array(v).reshape(1,-1), dtype=torch.long)
59
+ return inputs
60
+
61
+ def collate(inputs):
62
+ mask_len = int(inputs["attention_mask"].sum(axis=1).max())
63
+ for k, v in inputs.items():
64
+ inputs[k] = inputs[k][:,:mask_len]
65
+ return inputs
66
+ ```
67
+
68
+ ```
69
+ from transformers import Pipeline
70
+ class HatePipeline(Pipeline):
71
+ def _sanitize_parameters(self, **kwargs):
72
+ preprocess_kwargs = {}
73
+ if "maybe_arg" in kwargs:
74
+ preprocess_kwargs["maybe_arg"] = kwargs["maybe_arg"]
75
+ return preprocess_kwargs, {}, {}
76
+
77
+ def preprocess(self, inputs):
78
+ out = prepare_input(inputs)
79
+ return collate(out)
80
+
81
+ def _forward(self, model_inputs):
82
+ outputs = self.model(model_inputs)
83
+ return outputs
84
+
85
+ def postprocess(self, model_outputs):
86
+ return np.array(model_outputs[0,0].numpy()).clip(0,1)*4+1
87
+ ```
88
+ ```
89
+ pipe = HatePipeline(model=model)
90
+ pipe("I Love you #")
91
+ ```
92
+ results: 1.0
93
+
94
+ ```
95
+ pipe("I Hate #$%#$%Jewish%$#@%^^@#")
96
+ ```
97
+ results: 4.155200004577637
98
+