MBTIdemo / app.py
SidneyChen's picture
Update app.py
ab33276
# -*- coding: utf-8 -*-
"""demo_0113.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz
"""
pip install -qq transformers
import pandas as pd
# from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from wordcloud import WordCloud
from tqdm import tqdm
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import re
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC,LinearSVC
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from imblearn.over_sampling import SMOTE
import plotly.express as px
import warnings
import torch
torch.backends.cudnn.benchmark = True
from torchvision import transforms, utils
import math
import random
import numpy as np
from torch import nn, autograd, optim
import numpy as np
import random
warnings.filterwarnings('ignore')
!pip install openai
!pip install gradio
import os
import openai
# Commented out IPython magic to ensure Python compatibility.
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
FOLDERNAME="Colab\ Notebooks/finalproject_test"
# %cd drive/MyDrive/$FOLDERNAME
import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
import os,re
import warnings
warnings.filterwarnings('ignore')
import nltk
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from wordcloud import WordCloud
from tqdm import tqdm, trange
import torch
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from transformers import BertTokenizer, BertForSequenceClassification
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = 'cuda'
model = torch.load('mbti_model.pt')
max_length = 512
threshold = 0.50
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
# def clean_text(posts):
# clean = []
# # lower case
# posts = posts.lower()
# # remove emali
# posts = re.sub(re.compile(r'\S+@\S+'), "", posts)
# # remove tag
# posts = re.sub(re.compile(r'@\S+'), "", posts)
# # remove '
# posts = re.sub(re.compile(r'\''), "", posts)
# # posts(|||)->list
# posts = posts.split('|||')
# # removing links and len(posts) > 5
# posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+|www\.[^\s<>"]+', s) if len(s)>5]
# posts = [re.sub(r'\'', '', s) for s in posts]
# return posts
sentence = "Share some fun facts to break the ice"
# sentence = clean_text(sentence)
def data_preprocess(sentence):
test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True)
test_input_ids = test_encodings['input_ids']
test_token_type_ids = test_encodings['token_type_ids']
test_attention_masks = test_encodings['attention_mask']
test_inputs = torch.tensor(test_input_ids).reshape(512,1).T
test_masks = torch.tensor(test_attention_masks).reshape(512,1).T
test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T
return test_inputs, test_masks
a, b = data_preprocess(sentence)
def predict(test_inputs, test_masks):
model.eval()
logit_preds,pred_labels = [],[]
with torch.no_grad():
# forward pass
test_inputs = test_inputs.to(device)
test_masks = test_masks.to(device)
outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks)
b_logit_pred = outs[0]
pred_label = torch.sigmoid(b_logit_pred)
# print(pred_label)
# converting into numpy arrays
b_logit_pred = b_logit_pred.detach().cpu().numpy()
pred_label = pred_label.to('cpu').numpy()
# print(pred_label.tolist())
# flatten output variables
# converting flattened binary values to boolean values
pred_bools = [pl>threshold for pl in pred_label]
# print(pred_bools)
mbti = ''
for i in range(4):
if i == 0:
mbti += 'E' if pred_bools[0][i] else 'I'
if i == 1:
mbti += 'S' if pred_bools[0][i] else 'N'
if i == 2:
mbti += 'T' if pred_bools[0][i] else 'F'
if i == 3:
mbti += 'J' if pred_bools[0][i] else 'P'
return mbti
predict(a, b)
import os
import openai
import gradio as gr
import random
openai.api_key = ("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")
def translation(text):
response = openai.Completion.create(
model="text-davinci-003",
# translation = '中翻英'
# text = "你好"
prompt=f"中翻英{text}",
max_tokens=500,
top_p=1,
frequency_penalty=0,
presence_penalty=0
)
return response['choices'][0]['text'].strip()
def predict_mbti(description):
text = translation(description)
text, text_masks = data_preprocess(text)
mbti = predict(text, text_masks)
return mbti
# with gr.Blocks(css=".gradio-container {background-color: red}") as demo
# demo = gr.Interface(fn=predict_mbti, #callable function
# inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format
# outputs=gr.outputs.Textbox(label = '只有我最了解你,你是一位...'),
# # outputs = [gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')],
# title = "AI-MBTI knows U.",
# description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !',
# theme = 'grass',
# ) #output format
blocks = gr.Blocks()
with blocks as demo:
desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章')
# verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"])
survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"],
label = '民意調查中...')
survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"],
label = '搜集民意中...')
object = gr.Textbox(placeholder="object")
with gr.Row():
type_btn = gr.Button("16型人格類型👨‍👧‍👦")
movie_btn = gr.Button("推薦專屬電影🍿")
output1 = gr.Textbox(label="👉根據這段描述,你的16型人格類型🪢會是...")
output2 = gr.Textbox(label="👉由你的描述與人格特質,適合你的電影🎦有...")
type_btn.click(predict_mbti, desc, output1)
# movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s")
# # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')")
# foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'")
#display the interface
demo.launch(share=True, debug=True)