Spaces:

SidneyChen
/

MBTIdemo

Runtime error

App Files Files Community

MBTIdemo / demo_0113.py

SidneyChen

Upload demo_0113.py

727e077 over 1 year ago

raw

history blame contribute delete

No virus

7.67 kB

	# -- coding: utf-8 --
	"""demo_0113.ipynb

	Automatically generated by Colaboratory.

	Original file is located at
	https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz
	"""

	!pip install -qq transformers

	import pandas as pd
	# from catboost import CatBoostClassifier
	from sklearn.preprocessing import LabelEncoder
	from sklearn.feature_extraction.text import TfidfVectorizer
	from wordcloud import WordCloud
	from tqdm import tqdm
	import nltk
	from nltk.stem import WordNetLemmatizer
	from nltk.corpus import stopwords
	import re
	from sklearn.model_selection import train_test_split
	from sklearn.svm import SVC,LinearSVC
	from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
	from xgboost import XGBClassifier
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.metrics import accuracy_score
	from sklearn.naive_bayes import MultinomialNB
	from sklearn.experimental import enable_hist_gradient_boosting
	from sklearn.ensemble import HistGradientBoostingClassifier
	from imblearn.over_sampling import SMOTE
	import plotly.express as px
	import warnings
	import torch
	torch.backends.cudnn.benchmark = True
	from torchvision import transforms, utils
	import math
	import random
	import numpy as np
	from torch import nn, autograd, optim
	import numpy as np
	import random

	warnings.filterwarnings('ignore')

	!pip install openai

	!pip install gradio

	import os
	import openai



	# Commented out IPython magic to ensure Python compatibility.
	from google.colab import drive
	drive.mount("/content/drive", force_remount=True)
	FOLDERNAME="Colab\ Notebooks/finalproject_test"
	# %cd drive/MyDrive/$FOLDERNAME

	import time
	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.model_selection import train_test_split
	from sklearn.metrics import f1_score, accuracy_score
	import os,re
	import warnings
	warnings.filterwarnings('ignore')
	import nltk
	from nltk.corpus import stopwords
	from nltk.stem import PorterStemmer
	from wordcloud import WordCloud
	from tqdm import tqdm, trange
	import torch
	from torch.nn import BCEWithLogitsLoss
	from torch.utils.data import TensorDataset, DataLoader, RandomSampler
	from transformers import BertTokenizer, BertForSequenceClassification

	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	device = 'cuda'

	model = torch.load('mbti_model.pt')
	max_length = 512
	threshold = 0.50
	tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

	# def clean_text(posts):
	# clean = []
	# # lower case
	# posts = posts.lower()
	# # remove emali
	# posts = re.sub(re.compile(r'\S+@\S+'), "", posts)
	# # remove tag
	# posts = re.sub(re.compile(r'@\S+'), "", posts)
	# # remove '
	# posts = re.sub(re.compile(r'\''), "", posts)
	# # posts(\|\|\|)->list
	# posts = posts.split('\|\|\|')
	# # removing links and len(posts) > 5
	# posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+\|www\.[^\s<>"]+', s) if len(s)>5]
	# posts = [re.sub(r'\'', '', s) for s in posts]
	# return posts

	sentence = "Share some fun facts to break the ice"

	# sentence = clean_text(sentence)

	def data_preprocess(sentence):
	test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True)
	test_input_ids = test_encodings['input_ids']
	test_token_type_ids = test_encodings['token_type_ids']
	test_attention_masks = test_encodings['attention_mask']

	test_inputs = torch.tensor(test_input_ids).reshape(512,1).T
	test_masks = torch.tensor(test_attention_masks).reshape(512,1).T
	test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T

	return test_inputs, test_masks

	a, b = data_preprocess(sentence)

	def predict(test_inputs, test_masks):
	model.eval()
	logit_preds,pred_labels = [],[]
	with torch.no_grad():
	# forward pass
	test_inputs = test_inputs.to(device)
	test_masks = test_masks.to(device)
	outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks)
	b_logit_pred = outs[0]
	pred_label = torch.sigmoid(b_logit_pred)
	# print(pred_label)

	# converting into numpy arrays
	b_logit_pred = b_logit_pred.detach().cpu().numpy()
	pred_label = pred_label.to('cpu').numpy()
	# print(pred_label.tolist())


	# flatten output variables


	# converting flattened binary values to boolean values
	pred_bools = [pl>threshold for pl in pred_label]
	# print(pred_bools)

	mbti = ''
	for i in range(4):
	if i == 0:
	mbti += 'E' if pred_bools[0][i] else 'I'
	if i == 1:
	mbti += 'S' if pred_bools[0][i] else 'N'
	if i == 2:
	mbti += 'T' if pred_bools[0][i] else 'F'
	if i == 3:
	mbti += 'J' if pred_bools[0][i] else 'P'
	return mbti

	predict(a, b)

	import os
	import openai
	import gradio as gr
	import random

	openai.api_key = ("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")

	def translation(text):
	response = openai.Completion.create(
	model="text-davinci-003",
	# translation = '中翻英'
	# text = "你好"
	prompt=f"中翻英{text}",
	max_tokens=500,
	top_p=1,
	frequency_penalty=0,
	presence_penalty=0
	)
	return response['choices'][0]['text'].strip()

	def predict_mbti(description):
	text = translation(description)
	text, text_masks = data_preprocess(text)
	mbti = predict(text, text_masks)
	return mbti

	# with gr.Blocks(css=".gradio-container {background-color: red}") as demo
	# demo = gr.Interface(fn=predict_mbti, #callable function
	# inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format
	# outputs=gr.outputs.Textbox(label = '只有我最了解你，你是一位...'),
	# # outputs = [gr.outputs.Textbox(label = '只有我最了解你，你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')],
	# title = "AI-MBTI knows U.",
	# description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !',
	# theme = 'grass',


	# ) #output format

	blocks = gr.Blocks()

	with blocks as demo:
	desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章')
	# verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"])
	survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"],
	label = '民意調查中...')
	survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"],
	label = '搜集民意中...')
	object = gr.Textbox(placeholder="object")

	with gr.Row():
	type_btn = gr.Button("16型人格類型👨‍👧‍👦")
	movie_btn = gr.Button("推薦專屬電影🍿")


	output1 = gr.Textbox(label="👉根據這段描述，你的16型人格類型🪢會是...")
	output2 = gr.Textbox(label="👉由你的描述與人格特質，適合你的電影🎦有...")

	type_btn.click(predict_mbti, desc, output1)
	# movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s")
	# # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')")
	# foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'")

	#display the interface
	demo.launch(share=True, debug=True)