Original file is located at https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz """ !pip install -qq transformers import pandas as pd # from catboost import CatBoostClassifier from sklearn.preprocessing import LabelEncoder from sklearn.feature_extraction.text import TfidfVectorizer from wordcloud import WordCloud from tqdm import tqdm import nltk from nltk.stem import WordNetLemmatizer from nltk.corpus import stopwords import re from sklearn.model_selection import train_test_split from sklearn.svm import SVC,LinearSVC from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier from xgboost import XGBClassifier import matplotlib.pyplot as plt import seaborn as sns from sklearn.metrics import accuracy_score from sklearn.naive_bayes import MultinomialNB from sklearn.experimental import enable_hist_gradient_boosting from sklearn.ensemble import HistGradientBoostingClassifier from imblearn.over_sampling import SMOTE import plotly.express as px import warnings import torch torch.backends.cudnn.benchmark = True from torchvision import transforms, utils import math import random import numpy as np from torch import nn, autograd, optim import numpy as np import random warnings.filterwarnings('ignore') !pip install openai !pip install gradio import os import openai # Commented out IPython magic to ensure Python compatibility. from google.colab import drive drive.mount("/content/drive", force_remount=True) FOLDERNAME="Colab\ Notebooks/finalproject_test" # %cd drive/MyDrive/$FOLDERNAME import time import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.metrics import f1_score, accuracy_score import os,re import warnings warnings.filterwarnings('ignore') import nltk from nltk.corpus import stopwords from nltk.stem import PorterStemmer from wordcloud import WordCloud from tqdm import tqdm, trange import torch from torch.nn import BCEWithLogitsLoss from torch.utils.data import TensorDataset, DataLoader, RandomSampler from transformers import BertTokenizer, BertForSequenceClassification device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") device = 'cuda' model = torch.load('mbti_model.pt') max_length = 512 threshold = 0.50 tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) # def clean_text(posts): # clean = [] # # lower case # posts = posts.lower() # # remove emali # posts = re.sub(re.compile(r'\S+@\S+'), "", posts) # # remove tag # posts = re.sub(re.compile(r'@\S+'), "", posts) # # remove ' # posts = re.sub(re.compile(r'\''), "", posts) # # posts(|||)->list # posts = posts.split('|||') # # removing links and len(posts) > 5 # posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+|www\.[^\s<>"]+', s) if len(s)>5] # posts = [re.sub(r'\'', '', s) for s in posts] # return posts sentence = "Share some fun facts to break the ice" # sentence = clean_text(sentence) def data_preprocess(sentence): test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True) test_input_ids = test_encodings['input_ids'] test_token_type_ids = test_encodings['token_type_ids'] test_attention_masks = test_encodings['attention_mask'] test_inputs = torch.tensor(test_input_ids).reshape(512,1).T test_masks = torch.tensor(test_attention_masks).reshape(512,1).T test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T return test_inputs, test_masks a, b = data_preprocess(sentence) def predict(test_inputs, test_masks): model.eval() logit_preds,pred_labels = [],[] with torch.no_grad(): # forward pass test_inputs = test_inputs.to(device) test_masks = test_masks.to(device) outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks) b_logit_pred = outs[0] pred_label = torch.sigmoid(b_logit_pred) # print(pred_label) # converting into numpy arrays b_logit_pred = b_logit_pred.detach().cpu().numpy() pred_label = pred_label.to('cpu').numpy() # print(pred_label.tolist()) # flatten output variables # converting flattened binary values to boolean values pred_bools = [pl>threshold for pl in pred_label] # print(pred_bools) mbti = '' for i in range(4): if i == 0: mbti += 'E' if pred_bools[0][i] else 'I' if i == 1: mbti += 'S' if pred_bools[0][i] else 'N' if i == 2: mbti += 'T' if pred_bools[0][i] else 'F' if i == 3: mbti += 'J' if pred_bools[0][i] else 'P' return mbti predict(a, b) import os import openai import gradio as gr import random openai.api_key = ("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") def translation(text): response = openai.Completion.create( model="text-davinci-003", # translation = '中翻英' # text = "你好" prompt=f"中翻英{text}", max_tokens=500, top_p=1, frequency_penalty=0, presence_penalty=0 ) return response['choices'][0]['text'].strip() def predict_mbti(description): text = translation(description) text, text_masks = data_preprocess(text) mbti = predict(text, text_masks) return mbti # with gr.Blocks(css=".gradio-container {background-color: red}") as demo # demo = gr.Interface(fn=predict_mbti, #callable function # inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format # outputs=gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), # # outputs = [gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')], # title = "AI-MBTI knows U.", # description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !', # theme = 'grass', # ) #output format blocks = gr.Blocks() with blocks as demo: desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章') # verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"]) survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"], label = '民意調查中...') survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"], label = '搜集民意中...') object = gr.Textbox(placeholder="object") with gr.Row(): type_btn = gr.Button("16型人格類型👨‍👧‍👦") movie_btn = gr.Button("推薦專屬電影🍿") output1 = gr.Textbox(label="👉根據這段描述,你的16型人格類型🪢會是...") output2 = gr.Textbox(label="👉由你的描述與人格特質,適合你的電影🎦有...") type_btn.click(predict_mbti, desc, output1) # movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s") # # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')") # foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'") #display the interface demo.launch(share=True, debug=True)