maximuspowers commited on
Commit
7782f33
·
verified ·
1 Parent(s): a6e9ef2

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +43 -0
app.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import torch
3
+ from transformers import BertTokenizerFast, BertForTokenClassification
4
+ import gradio as gr
5
+
6
+ # init important things
7
+ tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')
8
+ model = BertForTokenClassification.from_pretrained('ethical-spectacle/social-bias-ner')
9
+ model.eval()
10
+ model.to('cuda' if torch.cuda.is_available() else 'cpu')
11
+
12
+ # ids to labels we want to display
13
+ id2label = {
14
+ 0: 'O',
15
+ 1: 'B-STEREO',
16
+ 2: 'I-STEREO',
17
+ 3: 'B-GEN',
18
+ 4: 'I-GEN',
19
+ 5: 'B-UNFAIR',
20
+ 6: 'I-UNFAIR'
21
+ }
22
+
23
+ # predict function you'll want to use if using in your own code
24
+ def predict_ner_tags(sentence):
25
+ inputs = tokenizer(sentence, return_tensors="pt", padding=True, truncation=True, max_length=128)
26
+ input_ids = inputs['input_ids'].to(model.device)
27
+ attention_mask = inputs['attention_mask'].to(model.device)
28
+
29
+ with torch.no_grad():
30
+ outputs = model(input_ids=input_ids, attention_mask=attention_mask)
31
+ logits = outputs.logits
32
+ probabilities = torch.sigmoid(logits)
33
+ predicted_labels = (probabilities > 0.5).int() # remember to try your own threshold
34
+
35
+ result = []
36
+ tokens = tokenizer.convert_ids_to_tokens(input_ids[0])
37
+ for i, token in enumerate(tokens):
38
+ if token not in tokenizer.all_special_tokens:
39
+ label_indices = (predicted_labels[0][i] == 1).nonzero(as_tuple=False).squeeze(-1)
40
+ labels = [id2label[idx.item()] for idx in label_indices] if label_indices.numel() > 0 else ['O']
41
+ result.append({"token": token, "labels": labels})
42
+
43
+ return json.dumps(result, indent=4)