File size: 7,665 Bytes
ebd9059
 
 
 
 
 
 
 
 
ab33276
ebd9059
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
# -*- coding: utf-8 -*-
"""demo_0113.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz
"""

pip install -qq transformers

import pandas as pd
# from catboost import CatBoostClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.feature_extraction.text import TfidfVectorizer
from wordcloud import WordCloud
from tqdm import tqdm
import nltk
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
import re
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC,LinearSVC
from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
from xgboost import XGBClassifier
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score
from sklearn.naive_bayes import MultinomialNB
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import HistGradientBoostingClassifier
from imblearn.over_sampling import SMOTE
import plotly.express as px
import warnings
import torch
torch.backends.cudnn.benchmark = True
from torchvision import transforms, utils
import math
import random
import numpy as np
from torch import nn, autograd, optim
import numpy as np
import random

warnings.filterwarnings('ignore')

!pip install openai

!pip install gradio

import os
import openai



# Commented out IPython magic to ensure Python compatibility.
from google.colab import drive
drive.mount("/content/drive", force_remount=True)
FOLDERNAME="Colab\ Notebooks/finalproject_test"
# %cd drive/MyDrive/$FOLDERNAME

import time
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, accuracy_score
import os,re
import warnings
warnings.filterwarnings('ignore')
import nltk 
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from wordcloud import WordCloud
from tqdm import tqdm, trange
import torch
from torch.nn import BCEWithLogitsLoss
from torch.utils.data import TensorDataset, DataLoader, RandomSampler
from transformers import BertTokenizer, BertForSequenceClassification

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = 'cuda'

model = torch.load('mbti_model.pt')
max_length = 512
threshold = 0.50
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)

# def clean_text(posts):
#   clean = []
#   # lower case
#   posts = posts.lower()
#   # remove emali
#   posts = re.sub(re.compile(r'\S+@\S+'), "", posts)
#   # remove tag
#   posts = re.sub(re.compile(r'@\S+'), "", posts)
#   # remove '
#   posts = re.sub(re.compile(r'\''), "", posts)
#   # posts(|||)->list
#   posts = posts.split('|||')
#   # removing links and len(posts) > 5
#   posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+|www\.[^\s<>"]+', s) if len(s)>5]
#   posts = [re.sub(r'\'', '', s) for s in posts]
#   return posts

sentence = "Share some fun facts to break the ice"

# sentence = clean_text(sentence)

def data_preprocess(sentence):
  test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True)
  test_input_ids = test_encodings['input_ids']
  test_token_type_ids = test_encodings['token_type_ids']
  test_attention_masks = test_encodings['attention_mask']

  test_inputs = torch.tensor(test_input_ids).reshape(512,1).T
  test_masks = torch.tensor(test_attention_masks).reshape(512,1).T
  test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T

  return test_inputs, test_masks

a, b = data_preprocess(sentence)

def predict(test_inputs, test_masks):
  model.eval()
  logit_preds,pred_labels = [],[]
  with torch.no_grad():
    # forward pass
    test_inputs = test_inputs.to(device)
    test_masks = test_masks.to(device)
    outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks)
    b_logit_pred = outs[0]
    pred_label = torch.sigmoid(b_logit_pred)
    # print(pred_label)

    # converting into numpy arrays 
    b_logit_pred = b_logit_pred.detach().cpu().numpy()
    pred_label = pred_label.to('cpu').numpy()
    # print(pred_label.tolist())


  # flatten output variables


  # converting flattened binary values to boolean values
  pred_bools = [pl>threshold for pl in pred_label]
  # print(pred_bools)

  mbti = ''
  for i in range(4):
    if i == 0:
      mbti += 'E' if pred_bools[0][i] else 'I'
    if i == 1:
      mbti += 'S' if pred_bools[0][i] else 'N'
    if i == 2:
      mbti += 'T' if pred_bools[0][i] else 'F'
    if i == 3:
      mbti += 'J' if pred_bools[0][i] else 'P'
  return mbti

predict(a, b)

import os
import openai
import gradio as gr
import random

openai.api_key = ("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")

def translation(text):
  response = openai.Completion.create(
    model="text-davinci-003",
    # translation = '中翻英'
    # text = "你好"
    prompt=f"中翻英{text}",
    max_tokens=500,
    top_p=1,
    frequency_penalty=0,
    presence_penalty=0
  )
  return response['choices'][0]['text'].strip()

def predict_mbti(description):
  text = translation(description)
  text, text_masks = data_preprocess(text)
  mbti = predict(text, text_masks)
  return mbti

# with gr.Blocks(css=".gradio-container {background-color: red}") as demo
  # demo = gr.Interface(fn=predict_mbti, #callable function
  #               inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format
  #               outputs=gr.outputs.Textbox(label = '只有我最了解你,你是一位...'),
  #               # outputs = [gr.outputs.Textbox(label = '只有我最了解你,你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')],
  #               title = "AI-MBTI knows U.",
  #               description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !',
  #               theme = 'grass',
                    
                   
  #                  ) #output format

blocks = gr.Blocks()

with blocks as demo:
  desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章')
  # verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"])
  survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"],
                    label = '民意調查中...')
  survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"],
                    label = '搜集民意中...')
  object = gr.Textbox(placeholder="object")

  with gr.Row():
      type_btn = gr.Button("16型人格類型👨‍👧‍👦")
      movie_btn = gr.Button("推薦專屬電影🍿")


  output1 = gr.Textbox(label="👉根據這段描述,你的16型人格類型🪢會是...")
  output2 = gr.Textbox(label="👉由你的描述與人格特質,適合你的電影🎦有...")

  type_btn.click(predict_mbti, desc, output1)
  # movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s")
  # # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')")
  # foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'")

#display the interface
demo.launch(share=True, debug=True)