Spaces:

SidneyChen
/

MBTIdemo

Runtime error

App Files Files Community

SidneyChen commited on Jan 13, 2023

Commit

727e077

1 Parent(s): a8b4549

Upload demo_0113.py

Browse files

Files changed (1) hide show

demo_0113.py +228 -0

demo_0113.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# -*- coding: utf-8 -*-
+"""demo_0113.ipynb
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1ge4fiA7yDzLAH4vl1LN4_3NxkbLGdKhz
+"""
+!pip install -qq transformers
+import pandas as pd
+# from catboost import CatBoostClassifier
+from sklearn.preprocessing import LabelEncoder
+from sklearn.feature_extraction.text import TfidfVectorizer
+from wordcloud import WordCloud
+from tqdm import tqdm
+import nltk
+from nltk.stem import WordNetLemmatizer
+from nltk.corpus import stopwords
+import re
+from sklearn.model_selection import train_test_split
+from sklearn.svm import SVC,LinearSVC
+from sklearn.ensemble import RandomForestClassifier,GradientBoostingClassifier
+from xgboost import XGBClassifier
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import accuracy_score
+from sklearn.naive_bayes import MultinomialNB
+from sklearn.experimental import enable_hist_gradient_boosting
+from sklearn.ensemble import HistGradientBoostingClassifier
+from imblearn.over_sampling import SMOTE
+import plotly.express as px
+import warnings
+import torch
+torch.backends.cudnn.benchmark = True
+from torchvision import transforms, utils
+import math
+import random
+import numpy as np
+from torch import nn, autograd, optim
+import numpy as np
+import random
+warnings.filterwarnings('ignore')
+!pip install openai
+!pip install gradio
+import os
+import openai
+# Commented out IPython magic to ensure Python compatibility.
+from google.colab import drive
+drive.mount("/content/drive", force_remount=True)
+FOLDERNAME="Colab\ Notebooks/finalproject_test"
+# %cd drive/MyDrive/$FOLDERNAME
+import time
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import f1_score, accuracy_score
+import os,re
+import warnings
+warnings.filterwarnings('ignore')
+import nltk
+from nltk.corpus import stopwords
+from nltk.stem import PorterStemmer
+from wordcloud import WordCloud
+from tqdm import tqdm, trange
+import torch
+from torch.nn import BCEWithLogitsLoss
+from torch.utils.data import TensorDataset, DataLoader, RandomSampler
+from transformers import BertTokenizer, BertForSequenceClassification
+device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+device = 'cuda'
+model = torch.load('mbti_model.pt')
+max_length = 512
+threshold = 0.50
+tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
+# def clean_text(posts):
+#   clean = []
+#   # lower case
+#   posts = posts.lower()
+#   # remove emali
+#   posts = re.sub(re.compile(r'\S+@\S+'), "", posts)
+#   # remove tag
+#   posts = re.sub(re.compile(r'@\S+'), "", posts)
+#   # remove '
+#   posts = re.sub(re.compile(r'\''), "", posts)
+#   # posts(|||)->list
+#   posts = posts.split('|||')
+#   # removing links and len(posts) > 5
+#   posts = [s for s in posts if not re.search(r'https?:\/\/[^\s<>"]+|www\.[^\s<>"]+', s) if len(s)>5]
+#   posts = [re.sub(r'\'', '', s) for s in posts]
+#   return posts
+sentence = "Share some fun facts to break the ice"
+# sentence = clean_text(sentence)
+def data_preprocess(sentence):
+  test_encodings = tokenizer.encode_plus(sentence,max_length=max_length,pad_to_max_length=True,truncation=True)
+  test_input_ids = test_encodings['input_ids']
+  test_token_type_ids = test_encodings['token_type_ids']
+  test_attention_masks = test_encodings['attention_mask']
+  test_inputs = torch.tensor(test_input_ids).reshape(512,1).T
+  test_masks = torch.tensor(test_attention_masks).reshape(512,1).T
+  test_token_types = torch.tensor(test_token_type_ids).reshape(512,1).T
+  return test_inputs, test_masks
+a, b = data_preprocess(sentence)
+def predict(test_inputs, test_masks):
+  model.eval()
+  logit_preds,pred_labels = [],[]
+  with torch.no_grad():
+    # forward pass
+    test_inputs = test_inputs.to(device)
+    test_masks = test_masks.to(device)
+    outs = model(test_inputs , token_type_ids=None, attention_mask=test_masks)
+    b_logit_pred = outs[0]
+    pred_label = torch.sigmoid(b_logit_pred)
+    # print(pred_label)
+    # converting into numpy arrays
+    b_logit_pred = b_logit_pred.detach().cpu().numpy()
+    pred_label = pred_label.to('cpu').numpy()
+    # print(pred_label.tolist())
+  # flatten output variables
+  # converting flattened binary values to boolean values
+  pred_bools = [pl>threshold for pl in pred_label]
+  # print(pred_bools)
+  mbti = ''
+  for i in range(4):
+    if i == 0:
+      mbti += 'E' if pred_bools[0][i] else 'I'
+    if i == 1:
+      mbti += 'S' if pred_bools[0][i] else 'N'
+    if i == 2:
+      mbti += 'T' if pred_bools[0][i] else 'F'
+    if i == 3:
+      mbti += 'J' if pred_bools[0][i] else 'P'
+  return mbti
+predict(a, b)
+import os
+import openai
+import gradio as gr
+import random
+openai.api_key = ("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")
+def translation(text):
+  response = openai.Completion.create(
+    model="text-davinci-003",
+    # translation = '中翻英'
+    # text = "你好"
+    prompt=f"中翻英{text}",
+    max_tokens=500,
+    top_p=1,
+    frequency_penalty=0,
+    presence_penalty=0
+  )
+  return response['choices'][0]['text'].strip()
+def predict_mbti(description):
+  text = translation(description)
+  text, text_masks = data_preprocess(text)
+  mbti = predict(text, text_masks)
+  return mbti
+# with gr.Blocks(css=".gradio-container {background-color: red}") as demo
+  # demo = gr.Interface(fn=predict_mbti, #callable function
+  #               inputs=gr.inputs.Textbox(label = '讓我來分析你最近的人格><', placeholder = '個性描述、自己的故事或是曾經發過的文章'), #input format
+  #               outputs=gr.outputs.Textbox(label = '只有我最了解你，你是一位...'),
+  #               # outputs = [gr.outputs.Textbox(label = '只有我最了解你，你是一位...'), gr.outputs.Textbox(label = '專屬推薦給你的電影🍿')],
+  #               title = "AI-MBTI knows U.",
+  #               description = 'Come on. Let us predict your MBTI type !!! We will tell you what kind of movie should you watch !',
+  #               theme = 'grass',
+  #                  ) #output format
+blocks = gr.Blocks()
+with blocks as demo:
+  desc = gr.Textbox(label = '讓我來分析你最近的人格📝', placeholder= '個性描述、自己的故事或是曾經發過的文章')
+  # verb = gr.Radio(label = '請問有聽過16型人格測驗(16pernalities)嗎 /n https://www.16personalities.com/free-personality-test', ["有", "沒有"])
+  survey = gr.Radio(["⭕️有聽過👂16型人格測驗(16pernalities)", "❌沒有聽過👂16型人格測驗(16pernalities)"],
+                    label = '民意調查中...')
+  survey2 = gr.Radio(["✅曾經做過✏️16型人格測驗(16pernalities)", "❎沒有做過✏️16型人格測驗(16pernalities)"],
+                    label = '搜集民意中...')
+  object = gr.Textbox(placeholder="object")
+  with gr.Row():
+      type_btn = gr.Button("16型人格類型👨‍👧‍👦")
+      movie_btn = gr.Button("推薦專屬電影🍿")
+  output1 = gr.Textbox(label="👉根據這段描述，你的16型人格類型🪢會是...")
+  output2 = gr.Textbox(label="👉由你的描述與人格特質，適合你的電影🎦有...")
+  type_btn.click(predict_mbti, desc, output1)
+  # movie_btn.click(None, [subject, verb, object], output2, _js="(s, v, o) => o + ' ' + v + ' ' + s")
+  # # verb.change(lambda x: x, verb, output3, _js="(x) => [...x].reverse().join('')")
+  # foo_bar_btn.click(None, [], subject, _js="(x) => x + ' foo'")
+#display the interface
+demo.launch(share=True, debug=True)