Spaces:
Runtime error
Runtime error
File size: 3,889 Bytes
4beebda d638f0b 4beebda |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
# -*- coding: utf-8 -*-
import torch
from torch import nn
import torch.nn.functional as F
from transformers import AutoTokenizer, AutoModelWithLMHead
from functools import lru_cache
from tokenizers import ByteLevelBPETokenizer
from tokenizers.processors import BertProcessing
def setup_tokenizer():
tokenizer = AutoTokenizer.from_pretrained('distilroberta-base')
tokenizer.save_pretrained("tokenizer")
import os
os.system("mkdir -p tokenizer")
setup_tokenizer()
# from https://github.com/digantamisra98/Mish/blob/b5f006660ac0b4c46e2c6958ad0301d7f9c59651/Mish/Torch/mish.py
@torch.jit.script
def mish(input):
return input * torch.tanh(F.softplus(input))
class Mish(nn.Module):
def forward(self, input):
return mish(input)
class EmoModel(nn.Module):
def __init__(self, base_model, n_classes=2, base_model_output_size=768, dropout=0.05):
super().__init__()
self.base_model = base_model
self.classifier = nn.Sequential(
nn.Dropout(dropout),
nn.Linear(base_model_output_size, base_model_output_size),
Mish(),
nn.Dropout(dropout),
# originally, n_classes = 6
# now, we want to use VA, change it to 2
nn.Linear(base_model_output_size, n_classes)
)
for layer in self.classifier:
if isinstance(layer, nn.Linear):
layer.weight.data.normal_(mean=0.0, std=0.02)
if layer.bias is not None:
layer.bias.data.zero_()
def forward(self, input_, *args):
X, attention_mask = input_
hidden_states = self.base_model(X, attention_mask=attention_mask)
return self.classifier(hidden_states[0][:, 0, :])
from pathlib import Path
#pretrained_path = "on_plurk_new_fix_data_arch_1_epoch_2_bs_16.pt"
pretrained_path = "arch1_unfreeze_all.pt" # the latest weights!
assert Path(pretrained_path).is_file()
model = EmoModel(AutoModelWithLMHead.from_pretrained("distilroberta-base").base_model)
model.load_state_dict(torch.load(pretrained_path,map_location=torch.device('cpu')))
model.eval()
from functools import lru_cache
@lru_cache(maxsize=1)
def get_tokenizer(max_tokens=512):
from tokenizers import ByteLevelBPETokenizer
from tokenizers.processors import BertProcessing
# add error checking
voc_file = "tokenizer/vocab.json"
merg_file = "tokenizer/merges.txt"
import os.path
if not os.path.isfile(voc_file) or not os.path.isfile(merg_file):
setup_tokenizer()
t = ByteLevelBPETokenizer(
voc_file,
merg_file
)
t._tokenizer.post_processor = BertProcessing(
("</s>", t.token_to_id("</s>")),
("<s>", t.token_to_id("<s>")),
)
t.enable_truncation(max_tokens)
t.enable_padding(length=max_tokens, pad_id=t.token_to_id("<pad>"))
return t
# Cell
def convert_text_to_tensor(text, tokenizer=None):
if tokenizer is None:
tokenizer = get_tokenizer()
enc = tokenizer.encode(text)
X = torch.tensor(enc.ids).unsqueeze(0)
Attn = torch.tensor(enc.attention_mask).unsqueeze(0)
return (X, Attn)
def get_output(text, model, tokenizer=None, return_tensor=False):
# we should add try/Except error handling for "model" argument
# , but i consider it to be ugly
import torch
with torch.no_grad():
model.eval()
out = model(convert_text_to_tensor(text, tokenizer))
if return_tensor == True:
return out
else: # return [float, float]
# remember to make it a 1-D tensor
tt = out[0]
return float(tt[0]), float(tt[1])
import gradio as gr
def fn2(text, model=model, return_tensor=False):
out = get_output(text,model, return_tensor=return_tensor)
return out
interface = gr.Interface(
fn = fn2,
inputs="text",
outputs=["number", "number"]
)
interface.launch()
|