test_skim / app.py
adamtayzzz's picture
Update app.py
d0a55ad verified
import argparse
import logging
import math
import os
import random
import datasets
from datasets import load_dataset, load_metric
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
import gradio as gr
import transformers
from accelerate import Accelerator # huggingface package
from transformers import (
AdamW,
AutoConfig,
AutoModelForSequenceClassification,
AutoTokenizer,
DataCollatorWithPadding,
PretrainedConfig,
SchedulerType,
default_data_collator,
get_scheduler,
set_seed,
BertTokenizer,
)
from transformers.utils.versions import require_version
import torch
from test_module.modeling_transkimer import BertForSequenceClassification as TranskimerForSequenceClassification
from test_module.modeling_transkimer_roberta import RobertaForSequenceClassification as TranskimerRobertaForSequenceClassification
from test_module.modeling_utils import convert_softmax_mask_to_digit
from blackbox_utils.my_attack import CharacterAttack
from transformers import glue_processors as processors
task_to_keys = {
"cola": ("sentence", None),
"mnli": ("premise", "hypothesis"),
"mrpc": ("sentence1", "sentence2"),
"qnli": ("question", "sentence"),
"qqp": ("question1", "question2"),
"rte": ("sentence1", "sentence2"),
"sst2": ("sentence", None),
"stsb": ("sentence1", "sentence2"),
"wnli": ("sentence1", "sentence2"),
"imdb": ("text", None),
}
model_path_dict = {
"transkimer_sst2_not_pad":'./not_pad_0.5',
}
datasets.utils.logging.set_verbosity_error()
transformers.utils.logging.set_verbosity_error()
task_name = 'sst2'
model_type = 'transkimer'
processor = processors['sst-2']()
label_list = processor.get_labels()
label_to_id = {v: i for i, v in enumerate(label_list)}
# Load pretrained model and tokenizer
model_path_key = f'{model_type}_{task_name}_not_pad'
model_path = model_path_dict[model_path_key]
config = AutoConfig.from_pretrained(model_path, num_labels=len(label_list), finetuning_task=task_name)
tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased', use_fast=True)
model = TranskimerForSequenceClassification.from_pretrained(model_path,from_tf=bool(".ckpt" in model_path),config=config,)
# Preprocessing the datasets
sentence1_key, sentence2_key = task_to_keys[task_name]
padding = False
attack = CharacterAttack(f'{model_type}_{task_name}',model,tokenizer,device='cpu',max_per=10,padding=padding,max_length=128,label_to_id=label_to_id,sentence1_key=sentence1_key,sentence2_key=sentence2_key)
def greet(text):
text_input = (text,None)
outputs,time = attack.get_prob(text_input)
_,token_remained,_ = attack.output_analysis(outputs)
return time,token_remained.item()
iface = gr.Interface(fn=greet, inputs=["text"], outputs=["number","number"])
iface.launch()