KLeedrug commited on
Commit
4beebda
β€’
1 Parent(s): 469ac22

App done, hope this works

Browse files
Files changed (1) hide show
  1. app.py +130 -0
app.py ADDED
@@ -0,0 +1,130 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+
3
+ import torch
4
+ from torch import nn
5
+ import torch.nn.functional as F
6
+ from transformers import AutoTokenizer, AutoModelWithLMHead
7
+ from functools import lru_cache
8
+ from tokenizers import ByteLevelBPETokenizer
9
+ from tokenizers.processors import BertProcessing
10
+
11
+ import pandas as pd
12
+
13
+
14
+ def setup_tokenizer():
15
+ tokenizer = AutoTokenizer.from_pretrained('distilroberta-base')
16
+ tokenizer.save_pretrained("tokenizer")
17
+
18
+
19
+ import os
20
+ os.system("mkdir -p tokenizer")
21
+ setup_tokenizer()
22
+
23
+
24
+ # from https://github.com/digantamisra98/Mish/blob/b5f006660ac0b4c46e2c6958ad0301d7f9c59651/Mish/Torch/mish.py
25
+ @torch.jit.script
26
+ def mish(input):
27
+ return input * torch.tanh(F.softplus(input))
28
+
29
+ class Mish(nn.Module):
30
+ def forward(self, input):
31
+ return mish(input)
32
+
33
+ class EmoModel(nn.Module):
34
+ def __init__(self, base_model, n_classes=2, base_model_output_size=768, dropout=0.05):
35
+ super().__init__()
36
+ self.base_model = base_model
37
+
38
+ self.classifier = nn.Sequential(
39
+ nn.Dropout(dropout),
40
+ nn.Linear(base_model_output_size, base_model_output_size),
41
+ Mish(),
42
+ nn.Dropout(dropout),
43
+ # originally, n_classes = 6
44
+ # now, we want to use VA, change it to 2
45
+ nn.Linear(base_model_output_size, n_classes)
46
+ )
47
+
48
+ for layer in self.classifier:
49
+ if isinstance(layer, nn.Linear):
50
+ layer.weight.data.normal_(mean=0.0, std=0.02)
51
+ if layer.bias is not None:
52
+ layer.bias.data.zero_()
53
+
54
+ def forward(self, input_, *args):
55
+ X, attention_mask = input_
56
+ hidden_states = self.base_model(X, attention_mask=attention_mask)
57
+ return self.classifier(hidden_states[0][:, 0, :])
58
+
59
+
60
+ from pathlib import Path
61
+ pretrained_path = "on_plurk_new_fix_data_arch_1_epoch_2_bs_16.pt"
62
+ assert Path(pretrained_path).is_file()
63
+
64
+ model = EmoModel(AutoModelWithLMHead.from_pretrained("distilroberta-base").base_model)
65
+ model.load_state_dict(torch.load(pretrained_path,map_location=torch.device('cpu')))
66
+ model.eval()
67
+
68
+ from functools import lru_cache
69
+ @lru_cache(maxsize=1)
70
+ def get_tokenizer(max_tokens=512):
71
+ from tokenizers import ByteLevelBPETokenizer
72
+ from tokenizers.processors import BertProcessing
73
+ # add error checking
74
+ voc_file = "tokenizer/vocab.json"
75
+ merg_file = "tokenizer/merges.txt"
76
+
77
+ import os.path
78
+ if not os.path.isfile(voc_file) or not os.path.isfile(merg_file):
79
+ setup_tokenizer()
80
+
81
+ t = ByteLevelBPETokenizer(
82
+ voc_file,
83
+ merg_file
84
+ )
85
+ t._tokenizer.post_processor = BertProcessing(
86
+ ("</s>", t.token_to_id("</s>")),
87
+ ("<s>", t.token_to_id("<s>")),
88
+ )
89
+ t.enable_truncation(max_tokens)
90
+ t.enable_padding(length=max_tokens, pad_id=t.token_to_id("<pad>"))
91
+ return t
92
+
93
+ # Cell
94
+ def convert_text_to_tensor(text, tokenizer=None):
95
+ if tokenizer is None:
96
+ tokenizer = get_tokenizer()
97
+ enc = tokenizer.encode(text)
98
+ X = torch.tensor(enc.ids).unsqueeze(0)
99
+ Attn = torch.tensor(enc.attention_mask).unsqueeze(0)
100
+ return (X, Attn)
101
+
102
+ def get_output(text, model, tokenizer=None, return_tensor=False):
103
+ # we should add try/Except error handling for "model" argument
104
+ # , but i consider it to be ugly
105
+ import torch
106
+ with torch.no_grad():
107
+ model.eval()
108
+ out = model(convert_text_to_tensor(text, tokenizer))
109
+ if return_tensor == True:
110
+ return out
111
+
112
+ else: # return [float, float]
113
+ # remember to make it a 1-D tensor
114
+ tt = out[0]
115
+ return float(tt[0]), float(tt[1])
116
+
117
+ import gradio as gr
118
+
119
+ def fn2(text, model=model, return_tensor=False):
120
+ out = get_output(text,model, return_tensor=return_tensor)
121
+ return out
122
+
123
+ interface = gr.Interface(
124
+ fn = fn2,
125
+ inputs="text",
126
+ outputs=["number", "number"]
127
+ )
128
+
129
+ interface.launch()
130
+