gradio / NLPEvaluation_SIGMOID.py
OnurSahh's picture
Upload NLPEvaluation_SIGMOID.py
ecda826
raw
history blame contribute delete
No virus
2.52 kB
import gradio as gr
import pandas as pd
def auth(username, password):
if username == "SIGMOID" and password == "2A4S39H7E7GR1172":
return True
else:
return False
def predict(df):
# LOAD TRAINER AND TOKENIZER AND TOKENIZE DATA
from transformers import AutoModel, AutoTokenizer, TrainingArguments, Trainer, BertForSequenceClassification
from datasets import Dataset
import numpy as np
model = BertForSequenceClassification.from_pretrained("sentiment_model", num_labels = 6)
tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-turkish-cased")
df_ids = df.pop('id')
test_dataset = Dataset.from_dict(df)
from transformers import AutoTokenizer
def tokenize_function(examples):
return tokenizer(examples["text"], padding="max_length", truncation=True)
tokenized_test_datasets = test_dataset.map(tokenize_function, batched=True)
trainer = Trainer(
model=model, # the instantiated Transformers model to be trained
)
# PREDICT TEXT VALUES USING LOADED MODEL AND EDIT DATAFRAME'S OFFANSIVE AND TARGET COLUMNS
preds = trainer.predict(tokenized_test_datasets)
max_indices = np.argmax(preds[0], axis=1)
df['offansive'] = None
df['target'] = None
for i in range(len(df)):
if max_indices[i] == 0:
df['offansive'][i] = 1
df["target"][i] = 'INSULT'
elif max_indices[i] == 1:
df['offansive'][i] = 1
df["target"][i] = 'RACIST'
elif max_indices[i] == 2:
df['offansive'][i] = 1
df["target"][i] = 'SEXIST'
elif max_indices[i] == 3:
df['offansive'][i] = 1
df["target"][i] = 'PROFANITY'
elif max_indices[i] == 4:
df['offansive'][i] = 0
df["target"][i] = 'OTHER'
elif max_indices[i] == 5:
df['offansive'][i] = 1
df["target"][i] = 'OTHER'
df['id'] = df_ids
# *********** END ***********
return df
def get_file(file):
output_file = "output_SIGMOID.csv"
# For windows users, replace path seperator
file_name = file.name.replace("\\", "/")
df = pd.read_csv(file_name, sep="|")
predict(df)
df.to_csv(output_file, index=False, sep="|")
return (output_file)
# Launch the interface with user password
iface = gr.Interface(get_file, "file", "file")
if __name__ == "__main__":
iface.launch(share=True, auth=auth)