Spaces:
Runtime error
Runtime error
# -*- coding: utf-8 -*- | |
"""demo_0113.ipynb | |
Automatically generated by Colaboratory. | |
Original file is located at | |
https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz | |
""" | |
!pip install -qq transformers | |
import pandas as pd | |
# from catboost import CatBoostClassifier | |
from sklearn.preprocessing import LabelEncoder | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
from wordcloud import WordCloud | |
from tqdm import tqdm | |
import nltk | |
from nltk.stem import WordNetLemmatizer | |
from nltk.corpus import stopwords | |
import re | |
from sklearn.model_selection import train_test_split | |
from sklearn.svm import SVC,LinearSVC | |
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier | |
from xgboost import XGBClassifier | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.metrics import accuracy_score | |
from sklearn.naive_bayes import MultinomialNB | |
from sklearn.experimental import enable_hist_gradient_boosting | |
from sklearn.ensemble import HistGradientBoostingClassifier | |
from imblearn.over_sampling import SMOTE | |
import plotly.express as px | |
import warnings | |
import torch | |
torch.backends.cudnn.benchmark = True | |
from torchvision import transforms, utils | |
import math | |
import random | |
import numpy as np | |
from torch import nn, autograd, optim | |
import numpy as np | |
import random | |
warnings.filterwarnings('ignore') | |
!pip install openai | |
!pip install gradio | |
import os | |
import openai | |
# Commented out IPython magic to ensure Python compatibility. | |
from google.colab import drive | |
drive.mount("/content/drive", force_remount=True) | |
FOLDERNAME="Colab\ Notebooks/finalproject_test" | |
# %cd drive/MyDrive/$FOLDERNAME | |
import time | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import f1_score, accuracy_score | |
import os,re | |
import warnings | |
warnings.filterwarnings('ignore') | |
import nltk | |
from nltk.corpus import stopwords | |
from nltk.stem import PorterStemmer | |
from wordcloud import WordCloud | |
from tqdm import tqdm, trange | |
import torch | |
from torch.nn import BCEWithLogitsLoss | |
from torch.utils.data import TensorDataset, DataLoader, RandomSampler | |
from transformers import BertTokenizer, BertForSequenceClassification | |
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") | |
device = 'cuda' | |
model = torch.load('mbti_model.pt') | |
max_length = 512 | |
threshold = 0.50 | |
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True) | |
# def clean_text(posts): | |
# clean = [] | |
# # lower case | |
# posts = posts.lower() | |
# # remove emali | |
# posts = re.sub(re.compile(r'\S+@\S+'), "", posts) | |
# # remove tag | |
# posts = re.sub(re.compile(r'@\S+'), "", posts) | |
# # remove ' | |
# posts = re.sub(re.compile(r'\''), "", posts) | |
# # posts(|||)->list | |
# posts = posts.split('|||') | |
# # removing links and len(posts) > 5 | |
# posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+|www\.[^\s<>"]+', s) if len(s)>5] | |
# posts = [re.sub(r'\'', '', s) for s in posts] | |
# return posts | |
sentence = "Share some fun facts to break the ice" | |
# sentence = clean_text(sentence) | |
def data_preprocess(sentence): | |
test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True) | |
test_input_ids = test_encodings['input_ids'] | |
test_token_type_ids = test_encodings['token_type_ids'] | |
test_attention_masks = test_encodings['attention_mask'] | |
test_inputs = torch.tensor(test_input_ids).reshape(512,1).T | |
test_masks = torch.tensor(test_attention_masks).reshape(512,1).T | |
test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T | |
return test_inputs, test_masks | |
a, b = data_preprocess(sentence) | |
def predict(test_inputs, test_masks): | |
model.eval() | |
logit_preds,pred_labels = [],[] | |
with torch.no_grad(): | |
# forward pass | |
test_inputs = test_inputs.to(device) | |
test_masks = test_masks.to(device) | |
outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks) | |
b_logit_pred = outs[0] | |
pred_label = torch.sigmoid(b_logit_pred) | |
# print(pred_label) | |
# converting into numpy arrays | |
b_logit_pred = b_logit_pred.detach().cpu().numpy() | |
pred_label = pred_label.to('cpu').numpy() | |
# print(pred_label.tolist()) | |
# flatten output variables | |
# converting flattened binary values to boolean values | |
pred_bools = [pl>threshold for pl in pred_label] | |
# print(pred_bools) | |
mbti = '' | |
for i in range(4): | |
if i == 0: | |
mbti += 'E' if pred_bools[0][i] else 'I' | |
if i == 1: | |
mbti += 'S' if pred_bools[0][i] else 'N' | |
if i == 2: | |
mbti += 'T' if pred_bools[0][i] else 'F' | |
if i == 3: | |
mbti += 'J' if pred_bools[0][i] else 'P' | |
return mbti | |
predict(a, b) | |
import os | |
import openai | |
import gradio as gr | |
import random | |
openai.api_key = ("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") | |
def translation(text): | |
response = openai.Completion.create( | |
model="text-davinci-003", | |
# translation = '中翻英' | |
# text = "你好" | |
prompt=f"中翻英{text}", | |
max_tokens=500, | |
top_p=1, | |
frequency_penalty=0, | |
presence_penalty=0 | |
) | |
return response['choices'][0]['text'].strip() | |
def predict_mbti(description): | |
text = translation(description) | |
text, text_masks = data_preprocess(text) | |
mbti = predict(text, text_masks) | |
return mbti | |
# with gr.Blocks(css=".gradio-container {background-color: red}") as demo | |
# demo = gr.Interface(fn=predict_mbti, #callable function | |
# inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format | |
# outputs=gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), | |
# # outputs = [gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')], | |
# title = "AI-MBTI knows U.", | |
# description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !', | |
# theme = 'grass', | |
# ) #output format | |
blocks = gr.Blocks() | |
with blocks as demo: | |
desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章') | |
# verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"]) | |
survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"], | |
label = '民意調查中...') | |
survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"], | |
label = '搜集民意中...') | |
object = gr.Textbox(placeholder="object") | |
with gr.Row(): | |
type_btn = gr.Button("16型人格類型👨👧👦") | |
movie_btn = gr.Button("推薦專屬電影🍿") | |
output1 = gr.Textbox(label="👉根據這段描述,你的16型人格類型🪢會是...") | |
output2 = gr.Textbox(label="👉由你的描述與人格特質,適合你的電影🎦有...") | |
type_btn.click(predict_mbti, desc, output1) | |
# movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s") | |
# # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')") | |
# foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'") | |
#display the interface | |
demo.launch(share=True, debug=True) | |