andyqin18 commited on
Commit
bcfb40b
1 Parent(s): 2d1f021

Added test model performance code

Browse files
app.py CHANGED
@@ -4,8 +4,8 @@ import numpy as np
4
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
5
 
6
  # Define global variables
7
- fine_tuned_model = "andyqin18/test-finetuned"
8
- sample_text_num = 10
9
 
10
  # Define analyze function
11
  def analyze(model_name: str, text: str, top_k=1) -> dict:
@@ -24,7 +24,7 @@ st.write("You can choose to use my fine-tuned model or pre-trained models.")
24
 
25
  # Model hub
26
  model_descrip = {
27
- fine_tuned_model: "This is a customized BERT-base finetuned model that detects multiple toxicity for a text. \
28
  Labels: toxic, severe_toxic, obscene, threat, insult, identity_hate",
29
  "distilbert-base-uncased-finetuned-sst-2-english": "This model is a fine-tune checkpoint of DistilBERT-base-uncased, fine-tuned on SST-2. \
30
  Labels: POSITIVE; NEGATIVE ",
@@ -50,7 +50,7 @@ if st.button("Analyze"):
50
  else:
51
  with st.spinner("Hang on.... Analyzing..."):
52
  # If fine-tuned
53
- if user_model == fine_tuned_model:
54
  result = analyze(user_model, user_input, top_k=2) # Top 2 labels with highest score
55
  result_dict = {
56
  "Text": [user_input],
@@ -84,7 +84,7 @@ if st.button("Analyze"):
84
  }
85
 
86
  for text in sample_texts:
87
- result = analyze(fine_tuned_model, text[:50], top_k=2)
88
  init_table_dict["Text"].append(text[:50])
89
  init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
90
  init_table_dict["Highest Score"].append(result[0][0]['score'])
 
4
  from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
5
 
6
  # Define global variables
7
+ FINE_TUNED_MODEL = "andyqin18/test-finetuned"
8
+ NUM_SAMPLE_TEXT = 10
9
 
10
  # Define analyze function
11
  def analyze(model_name: str, text: str, top_k=1) -> dict:
 
24
 
25
  # Model hub
26
  model_descrip = {
27
+ FINE_TUNED_MODEL: "This is a customized BERT-base finetuned model that detects multiple toxicity for a text. \
28
  Labels: toxic, severe_toxic, obscene, threat, insult, identity_hate",
29
  "distilbert-base-uncased-finetuned-sst-2-english": "This model is a fine-tune checkpoint of DistilBERT-base-uncased, fine-tuned on SST-2. \
30
  Labels: POSITIVE; NEGATIVE ",
 
50
  else:
51
  with st.spinner("Hang on.... Analyzing..."):
52
  # If fine-tuned
53
+ if user_model == FINE_TUNED_MODEL:
54
  result = analyze(user_model, user_input, top_k=2) # Top 2 labels with highest score
55
  result_dict = {
56
  "Text": [user_input],
 
84
  }
85
 
86
  for text in sample_texts:
87
+ result = analyze(FINE_TUNED_MODEL, text[:50], top_k=2)
88
  init_table_dict["Text"].append(text[:50])
89
  init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
90
  init_table_dict["Highest Score"].append(result[0][0]['score'])
milestone3/milestone3.py DELETED
@@ -1,41 +0,0 @@
1
- from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
2
-
3
- def analyze(model_name: str, text: str, top_k=1) -> dict:
4
- '''
5
- Output result of sentiment analysis of a text through a defined model
6
- '''
7
- model = AutoModelForSequenceClassification.from_pretrained(model_name)
8
- tokenizer = AutoTokenizer.from_pretrained(model_name)
9
- classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer, top_k=top_k)
10
- return classifier(text)
11
-
12
-
13
- user_input = "Go fuck yourself"
14
- user_model = "andyqin18/test-finetuned"
15
-
16
- # result = analyze(user_model, user_input, top_k=2)
17
-
18
- # print(result[0][0]['label'])
19
-
20
- import pandas as pd
21
- import numpy as np
22
- df = pd.read_csv("milestone3/comp/test_comment.csv")
23
- test_texts = df["comment_text"].values
24
- sample_texts = np.random.choice(test_texts, size=10, replace=False)
25
- init_table_dict = {
26
- "Text": [],
27
- "Highest Toxicity Class": [],
28
- "Highest Score": [],
29
- "Second Highest Toxicity Class": [],
30
- "Second Highest Score": []
31
- }
32
-
33
- for text in sample_texts:
34
- result = analyze(user_model, text, top_k=2)
35
- init_table_dict["Text"].append(text[:50])
36
- init_table_dict["Highest Toxicity Class"].append(result[0][0]['label'])
37
- init_table_dict["Highest Score"].append(result[0][0]['score'])
38
- init_table_dict["Second Highest Toxicity Class"].append(result[0][1]['label'])
39
- init_table_dict["Second Highest Score"].append(result[0][1]['score'])
40
-
41
- print(init_table_dict)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
milestone3/{fintune.py → vid_tutorial_code/crash_course_fintune.py} RENAMED
File without changes
milestone3/{crash_course_vid.py → vid_tutorial_code/crash_course_vid.py} RENAMED
File without changes
test_model.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import pandas as pd
3
+ from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification
4
+ import torch
5
+ from tqdm import tqdm
6
+
7
+
8
+ # Global var
9
+ TEST_SIZE = 1000
10
+ FINE_TUNED_MODEL = "andyqin18/test-finetuned"
11
+
12
+
13
+ # Define analyze function
14
+ def analyze(text: str):
15
+ '''
16
+ Input: Text string
17
+ Output: Prediction array (6x1) with threshold prob > 0.5
18
+ '''
19
+ encoding = tokenizer(text, return_tensors="pt")
20
+ encoding = {k: v.to(model.device) for k,v in encoding.items()}
21
+ outputs = model(**encoding)
22
+ logits = outputs.logits
23
+ sigmoid = torch.nn.Sigmoid()
24
+ probs = sigmoid(logits.squeeze().cpu())
25
+ predictions = np.zeros(probs.shape)
26
+ predictions[np.where(probs >= 0.5)] = 1
27
+ return predictions
28
+
29
+
30
+ # Read dataset and randomly select testing texts and respective labels
31
+ df = pd.read_csv("milestone3/comp/train.csv")
32
+ labels = df.columns[2:]
33
+ num_label = len(labels)
34
+ train_texts = df["comment_text"].values
35
+ train_labels = df[labels].values
36
+
37
+ np.random.seed(1)
38
+ small_test_texts = np.random.choice(train_texts, size=TEST_SIZE, replace=False)
39
+
40
+ np.random.seed(1)
41
+ small_test_labels_idx = np.random.choice(train_labels.shape[0], size=TEST_SIZE, replace=False)
42
+ small_test_labels = train_labels[small_test_labels_idx, :]
43
+
44
+
45
+ # Load model and tokenizer. Prepare for analysis loop
46
+ model = AutoModelForSequenceClassification.from_pretrained(FINE_TUNED_MODEL)
47
+ tokenizer = AutoTokenizer.from_pretrained(FINE_TUNED_MODEL)
48
+ total_true = 0
49
+ total_success = 0
50
+ TP, FP, TN, FN = 0, 0, 0, 0
51
+
52
+
53
+ # Analysis Loop
54
+ for comment_idx in tqdm(range(TEST_SIZE), desc="Analyzing..."):
55
+ comment = small_test_texts[comment_idx]
56
+ target = small_test_labels[comment_idx]
57
+ result = analyze(comment[:500])
58
+
59
+ # Counting TP, FP, TN, FN
60
+ for i in range(num_label):
61
+ if result[i] == target[i]:
62
+ if result[i] == 1:
63
+ TP += 1
64
+ else:
65
+ TN += 1
66
+ else:
67
+ if result[i] == 1:
68
+ FP += 1
69
+ else:
70
+ FN += 1
71
+
72
+ # Counting success prediction of 1) each label, 2) label array
73
+ num_true = (result == target).sum()
74
+ if num_true == len(labels):
75
+ total_success += 1
76
+ total_true += num_true
77
+
78
+ # Calculate performance
79
+ performance = {}
80
+ performance["label_accuracy"] = total_true/(len(labels) * TEST_SIZE)
81
+ performance["prediction_accuracy"] = total_success/TEST_SIZE
82
+ performance["precision"] = TP / (TP + FP)
83
+ performance["recall"] = TP / (TP + FN)
84
+ print(performance)