Spaces:
Sleeping
Sleeping
# -*- coding: utf-8 -*- | |
"""scratchpad | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/notebooks/empty.ipynb | |
""" | |
#!pip install gradio | |
#!pip install transformers tokenizers | |
import torch | |
from torch import nn | |
import torch.nn.functional as F | |
from transformers import AutoTokenizer, AutoModelWithLMHead | |
tokenizer = AutoTokenizer.from_pretrained('distilroberta-base') | |
tokenizer.save_pretrained("tokenizer") | |
# from https://github.com/digantamisra98/Mish/blob/b5f006660ac0b4c46e2c6958ad0301d7f9c59651/Mish/Torch/mish.py | |
def mish(input): | |
return input * torch.tanh(F.softplus(input)) | |
class Mish(nn.Module): | |
def forward(self, input): | |
return mish(input) | |
class NewEmoModel(nn.Module): | |
def __init__(self, base_model, n_classes=2, base_model_output_size=768, dropout=0.05): | |
super().__init__() | |
self.base_model = base_model | |
self.classifier = nn.Sequential( | |
nn.Dropout(dropout), | |
nn.Linear(base_model_output_size, base_model_output_size), | |
Mish(), | |
nn.Dropout(dropout), | |
nn.Linear(base_model_output_size, n_classes) | |
) | |
for layer in self.classifier: | |
if isinstance(layer, nn.Linear): | |
layer.weight.data.normal_(mean=0.0, std=0.02) | |
if layer.bias is not None: | |
layer.bias.data.zero_() | |
self.last_classifier = nn.Sequential( | |
nn.Dropout(dropout), | |
# n_classes: [V,A] -> 2 | |
# 4: v_bar, v_std, a_bar, a_std | |
nn.Linear(2*n_classes+4, base_model_output_size), | |
Mish(), | |
nn.Dropout(dropout), | |
nn.Linear(base_model_output_size, n_classes) | |
) | |
def forward_roberta(self, input_, *args): | |
X, attention_mask = input_ | |
hidden_states = self.base_model(X, attention_mask=attention_mask) | |
# maybe do some pooling / RNNs... go crazy here! | |
# use the <s> representation | |
return self.classifier(hidden_states[0][:, 0, :]) | |
def forward(self, input_): | |
#X, atten_mask, V_bar, V_std, A_bar, A_stat = input_ | |
# in1, in2 has X, atten_mask, respectively | |
in1, in2, V_bar, V_std, A_bar, A_stat = input_ | |
VAsj = self.forward_roberta( in1 ) | |
VAj_1 = self.forward_roberta( in2 ) | |
# split VAs into VA from sj...sk and sj+1 | |
#VAsj, VAj_1 = VAs[0], VAs[1] | |
# calculate new avg and std of V, A here | |
#V_new, A_new = 0, 0 | |
return self.last_classifier(torch.concat([VAsj, VAj_1, V_bar, V_std, A_bar, A_stat])) | |
n_classes = 2 | |
model = NewEmoModel(AutoModelWithLMHead.from_pretrained("distilroberta-base").base_model, n_classes) | |
model.eval() | |
# arr = ["句子1", "句子2", 0.16, 0, 0.5, 0] | |
def get_output(arr, ln): | |
with torch.no_grad(): | |
# forward pass | |
# [sent1, sent2, V_avg, V_平方和, A_avg, A_平方和] | |
#arr = ["句子1", "句子2", 0.16, 0, 0.5, 0] | |
# initialize stats | |
#ln = 0 # the passed number of data | |
stats = torch.tensor( [ arr[2:] ] ) # expected shape: (1,4) (or, (batch, 4)) | |
enc = tokenizer.encode_plus(arr[0]) | |
a = (torch.tensor(enc["input_ids"]).unsqueeze(0), torch.tensor(enc["attention_mask"]).unsqueeze(0)) | |
enc = tokenizer.encode_plus(arr[1]) | |
b = (torch.tensor(enc["input_ids"]).unsqueeze(0), torch.tensor(enc["attention_mask"]).unsqueeze(0)) | |
out1 = model.forward_roberta(a) # Sk...j | |
out2 = model.forward_roberta(b) # Sj+1 | |
ln += out1.shape[0] # the batch_size | |
ratio = out1.shape[0] / ln | |
#in_f = torch.concat([out1, out2, stats[:,0:1], stats[:,1:2]**2/ln - stats[:,0:1], stats[:,2:3], stats[:,3:]**2/ln - stats[:,2:3]], dim=1) | |
# 把標準差 改成 變異數,符合我們train的方式 | |
stats[0,1] = stats[0,1]**2 | |
stats[0,3] = stats[0,3]**2 | |
in_f = torch.concat([out1, out2, stats], dim=1) | |
output = model.last_classifier(in_f) # shape: (1,2) (or, (bs, 2)) | |
return output | |
# # update average & standard deviation (sigma(x**2) actually) | |
# stats[:,0] = stats[:,0] * (1-ratio) + output[:,0] * ratio | |
# stats[:,2] = stats[:,2] * (1-ratio) + output[:,1] * ratio | |
# stats[:,1] = stats[:,1]* (1-ratio) + output[:,0] ** 2 * ratio | |
# stats[:,3] = stats[:,3]* (1-ratio) + output[:,1] ** 2 * ratio | |
# the map of pretrained weight | |
mp = {0: "0627_1_epoch_all_unfreezed.pt", 1: "0629_on_pseudo_right_1_epoch_all_unfreezed.pt"} | |
# arr = ["句子1", "句子2", 0.16, 0, 0.5, 0] | |
def fn(sent1, sent2, v_avg, v_sqr, a_avg, a_sqr, ln, pretrained_path_idx): | |
# load pretrained model | |
pretrained_path = mp[pretrained_path_idx] | |
model.load_state_dict(torch.load(pretrained_path,map_location=torch.device('cpu'))() ) | |
# do the inference | |
arr = [sent1, sent2, v_avg, v_sqr, a_avg, a_sqr] | |
out = get_output(arr, ln) | |
return float(out[0,0]), float(out[0,1]) | |
weight_description=[] | |
for k, v in mp.items(): | |
weight_description.append(f"{k}: {v}") | |
# convert to string | |
weight_description = "\n".join(weight_description) | |
description = f"""here are the available weights, enter the index to choose from it, default to 0.\n | |
{weight_description} | |
""" | |
import gradio as gr | |
interface = gr.Interface( | |
fn = fn, | |
inputs=["text", "text", "number", "number", "number", "number", "number", "number"], | |
outputs=["number", "number"], | |
description=description | |
) | |
interface.launch() | |