NativeVex commited on
Commit
459ba42
1 Parent(s): 6ac528f

nudge display settings

Browse files
Files changed (1) hide show
  1. language_models_project/app.py +61 -22
language_models_project/app.py CHANGED
@@ -2,15 +2,22 @@ import streamlit as st # Web App
2
  from main import classify
3
  import pandas as pd
4
 
5
- #demo_phrases = """ Here are some examples:
6
- #this is a phrase
7
- #is it neutral
8
- #nothing else to say
9
- #man I'm so damn angry
10
- #sarcasm lol
11
- #I love this product
12
- #"""
13
- demo_phrases = pd.read_csv('./train.csv')['comment_text'].head(6).astype(str).str.cat(sep='\n')
 
 
 
 
 
 
 
14
  # title
15
  st.title("Sentiment Analysis")
16
 
@@ -23,7 +30,7 @@ model_name = st.selectbox(
23
  "finiteautomata/bertweet-base-sentiment-analysis",
24
  "ahmedrachid/FinancialBERT-Sentiment-Analysis",
25
  "finiteautomata/beto-sentiment-analysis",
26
- "NativeVex/custom-fine-tuned"
27
  ],
28
  )
29
 
@@ -32,18 +39,30 @@ input_sentences = st.text_area("Sentences", value=demo_phrases, height=200)
32
  data = input_sentences.split("\n")
33
 
34
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
 
35
  model_path = "bin/model4"
36
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
37
  tokenizer = AutoTokenizer.from_pretrained(model_path)
38
 
39
- from typing import List
40
  import torch
41
  import numpy as np
42
  import pandas as pd
43
 
44
- def infer(text: str) -> List[float]:
 
 
 
 
 
 
 
 
 
 
 
45
  encoding = tokenizer(text, return_tensors="pt")
46
- encoding = {k: v.to(model.device) for k,v in encoding.items()}
47
  outputs = model(**encoding)
48
  logits = outputs.logits
49
  sigmoid = torch.nn.Sigmoid()
@@ -51,27 +70,47 @@ def infer(text: str) -> List[float]:
51
  predictions = np.zeros(probs.shape)
52
  predictions[np.where(probs >= 0.5)] = 1
53
  predictions = pd.Series(predictions == 1)
54
- predictions.index = ["toxic", "severe_toxic", "obscene", "threat", "insult", "identity_hate"]
55
- return [{"label": str(predictions), "score": str(probs)}]
 
 
 
 
 
 
 
56
 
57
 
58
  def wrapper(*args, **kwargs):
 
 
 
 
59
  if args[0] != "NativeVex/custom-fine-tuned":
60
  return classify(*args, **kwargs)
61
  else:
62
  return infer(text=args[1])
63
 
64
-
65
 
66
  if st.button("Classify"):
67
- st.write("Please allow a few minutes for the model to run/download")
68
- for i in range(len(data)):
69
- j = wrapper(model_name.strip(), data[i])[0]
70
- sentiment = j["label"]
71
- confidence = j["score"]
 
 
 
 
 
 
72
  st.write(
73
- f"{i}. {data[i]} :: Classification - {sentiment} with confidence {confidence}"
 
 
74
  )
 
 
75
 
76
 
77
  st.markdown(
 
2
  from main import classify
3
  import pandas as pd
4
 
5
+ # demo_phrases = """ Here are some examples:
6
+ # this is a phrase
7
+ # is it neutral
8
+ # nothing else to say
9
+ # man I'm so damn angry
10
+ # sarcasm lol
11
+ # I love this product
12
+ # """
13
+ #demo_phrases = (
14
+ # pd.read_csv("./train.csv")["comment_text"].head(6).astype(str).str.cat(sep="\n")
15
+ #)
16
+
17
+ df = pd.read_csv("./train.csv")
18
+ toxic = df[df['Toxic'] == 1]['comment_text'].head(3)
19
+ normal = df[df['Toxic'] == 0]['comment_text'].head(3)
20
+ demo_phrases = pd.concat([toxic, normal]).astype(str).str.cat(sep="\n")
21
  # title
22
  st.title("Sentiment Analysis")
23
 
 
30
  "finiteautomata/bertweet-base-sentiment-analysis",
31
  "ahmedrachid/FinancialBERT-Sentiment-Analysis",
32
  "finiteautomata/beto-sentiment-analysis",
33
+ "NativeVex/custom-fine-tuned",
34
  ],
35
  )
36
 
 
39
  data = input_sentences.split("\n")
40
 
41
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
42
+
43
  model_path = "bin/model4"
44
  model = AutoModelForSequenceClassification.from_pretrained(model_path)
45
  tokenizer = AutoTokenizer.from_pretrained(model_path)
46
 
47
+ from typing import List, Dict
48
  import torch
49
  import numpy as np
50
  import pandas as pd
51
 
52
+
53
+ def infer(text: str) -> List[Dict[str, float]]:
54
+ """Use custom model to infer sentiment
55
+
56
+ Args:
57
+ text (str): text to infer
58
+
59
+ Returns:
60
+ List[Dict[str, float]]: list of dictionaries with {sentiment:
61
+ probability} score pairs
62
+
63
+ """
64
  encoding = tokenizer(text, return_tensors="pt")
65
+ encoding = {k: v.to(model.device) for k, v in encoding.items()}
66
  outputs = model(**encoding)
67
  logits = outputs.logits
68
  sigmoid = torch.nn.Sigmoid()
 
70
  predictions = np.zeros(probs.shape)
71
  predictions[np.where(probs >= 0.5)] = 1
72
  predictions = pd.Series(predictions == 1)
73
+ predictions.index = [
74
+ "toxic",
75
+ "severe_toxic",
76
+ "obscene",
77
+ "threat",
78
+ "insult",
79
+ "identity_hate",
80
+ ]
81
+ return [{"label": predictions, "score": probs}]
82
 
83
 
84
  def wrapper(*args, **kwargs):
85
+ """Wrapper function to use custom model
86
+
87
+ Behaves as a switchboard to redirect if custom model is selected
88
+ """
89
  if args[0] != "NativeVex/custom-fine-tuned":
90
  return classify(*args, **kwargs)
91
  else:
92
  return infer(text=args[1])
93
 
 
94
 
95
  if st.button("Classify"):
96
+ if not model_name.strip() == "NativeVex/custom-fine-tuned":
97
+ st.write("Please allow a few minutes for the model to run/download")
98
+ for i in range(len(data)):
99
+ # j = wrapper(model_name.strip(), data[i])[0]
100
+ j = classify(model_name.strip(), data[i])[0]
101
+ sentiment = j["label"]
102
+ confidence = j["score"]
103
+ st.write(
104
+ f"{i}. {data[i]} :: Classification - {sentiment} with confidence {confidence}"
105
+ )
106
+ else:
107
  st.write(
108
+ "To render the dataframe, all inputs must be sequentially"
109
+ "processed before displaying. Please allow a few minutes for longer"
110
+ "inputs."
111
  )
112
+ j = pd.DataFrame([infer(text=i) for i in data])
113
+ st.dataframe(data=j)
114
 
115
 
116
  st.markdown(