bjorn-hommel commited on
Commit
51082bd
1 Parent(s): daea372

handling local env; added state-management

Browse files
Files changed (4) hide show
  1. .gitignore +1 -0
  2. README.md +1 -1
  3. app.py +49 -39
  4. requirements.txt +2 -1
.gitignore CHANGED
@@ -1,2 +1,3 @@
 
1
  tmp.ipynb
2
  __pycache__
 
1
+ .env
2
  tmp.ipynb
3
  __pycache__
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: 🎭
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: streamlit
7
- python_version: 3.8.16
8
  sdk_version: 1.17.0
9
  app_file: app.py
10
  pinned: false
 
4
  colorFrom: blue
5
  colorTo: yellow
6
  sdk: streamlit
7
+ python_version: 3.10.6
8
  sdk_version: 1.17.0
9
  app_file: app.py
10
  pinned: false
app.py CHANGED
@@ -5,10 +5,12 @@ import streamlit as st
5
  import pandas as pd
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  from transformers import pipeline
 
8
  from plotly.subplots import make_subplots
9
  import plotly.graph_objects as go
10
  import plotly.express as px
11
 
 
12
 
13
  def z_score(y, mean=.04853076, sd=.9409466):
14
  return (y - mean) / sd
@@ -90,35 +92,37 @@ covariate_columns = {
90
  'rater_group': 'Rater Group',
91
  }
92
 
93
- df = (
94
- pd
95
- .read_feather(path='data.feather').query('partition == "test" | partition == "dev"')
96
- .melt(
97
- value_vars=['sentiment_model', 'desirability_model'],
98
- var_name='x_group',
99
- value_name='x',
100
- id_vars=['mean_z', 'text', 'content_domain', 'language', 'rater_group', 'study', 'instrument']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
101
  )
102
- .replace(
103
- to_replace={
104
- 'en': 'English',
105
- 'de': 'German',
106
- 'other': 'Other',
107
- 'personality': 'Personality',
108
- 'laypeople': 'Laypeople',
109
- 'students': 'Students',
110
- 'sentiment_model': 'Sentiment Model',
111
- 'desirability_model': 'Desirability Model'
112
- }
113
- )
114
- .rename(columns=covariate_columns)
115
- .rename(
116
- columns={
117
- 'mean_z': 'Human-ratings',
118
- 'x': 'Machine-ratings',
119
- }
120
  )
121
- )
122
 
123
  st.markdown("""
124
  # NLP for Item Desirability Ratings
@@ -145,19 +149,19 @@ with st.spinner('Processing...'):
145
  if os.environ.get('item-desirability'):
146
  model_path = 'magnolia-psychometrics/item-desirability'
147
  else:
148
- model_path = '/nlp/nlp/models/finetuned/twitter-xlm-roberta-base-regressive-desirability-ft-4'
149
 
150
  auth_token = os.environ.get('item-desirability') or True
151
 
152
- if 'tokenizer' not in globals():
153
- tokenizer = AutoTokenizer.from_pretrained(
154
  pretrained_model_name_or_path=model_path,
155
  use_fast=True,
156
  use_auth_token=auth_token
157
  )
158
 
159
- if 'model' not in globals():
160
- model = AutoModelForSequenceClassification.from_pretrained(
161
  pretrained_model_name_or_path=model_path,
162
  num_labels=1,
163
  ignore_mismatched_sizes=True,
@@ -165,9 +169,15 @@ with st.spinner('Processing...'):
165
  )
166
 
167
  ## sentiment model
168
- if 'classifier' not in globals():
169
- sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
170
- classifier = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=sentiment_model, use_fast=False, top_k=3)
 
 
 
 
 
 
171
 
172
  input_text = st.text_input(
173
  label='Estimate item desirability:',
@@ -177,14 +187,14 @@ with st.spinner('Processing...'):
177
 
178
  if input_text:
179
 
180
- classifier_output = classifier(input_text)
181
  classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
182
  classifier_score = classifier_output_dict['positive'] - classifier_output_dict['negative']
183
 
184
- inputs = tokenizer(input_text, padding=True, return_tensors='pt')
185
 
186
  with torch.no_grad():
187
- score = model(**inputs).logits.squeeze().tolist()
188
  z = z_score(score)
189
 
190
  p1 = indicator_plot(
@@ -231,6 +241,6 @@ if show_covariates:
231
  else:
232
  option = None
233
 
234
- plot = scatter_plot(df, option)
235
 
236
  st.plotly_chart(plot, theme=None, use_container_width=True)
 
5
  import pandas as pd
6
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
7
  from transformers import pipeline
8
+ from dotenv import load_dotenv
9
  from plotly.subplots import make_subplots
10
  import plotly.graph_objects as go
11
  import plotly.express as px
12
 
13
+ load_dotenv()
14
 
15
  def z_score(y, mean=.04853076, sd=.9409466):
16
  return (y - mean) / sd
 
92
  'rater_group': 'Rater Group',
93
  }
94
 
95
+ if 'df' not in st.session_state:
96
+ st.session_state.df = (
97
+ pd
98
+ .read_feather(path='data.feather').query('partition == "test" | partition == "dev"')
99
+ .melt(
100
+ value_vars=['sentiment_model', 'desirability_model'],
101
+ var_name='x_group',
102
+ value_name='x',
103
+ id_vars=['mean_z', 'text', 'content_domain', 'language', 'rater_group', 'study', 'instrument']
104
+ )
105
+ .replace(
106
+ to_replace={
107
+ 'en': 'English',
108
+ 'de': 'German',
109
+ 'other': 'Other',
110
+ 'personality': 'Personality',
111
+ 'laypeople': 'Laypeople',
112
+ 'students': 'Students',
113
+ 'sentiment_model': 'Sentiment Model',
114
+ 'desirability_model': 'Desirability Model'
115
+ }
116
+ )
117
+ .rename(columns=covariate_columns)
118
+ .rename(
119
+ columns={
120
+ 'mean_z': 'Human-ratings',
121
+ 'x': 'Machine-ratings',
122
+ }
123
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  )
125
+
126
 
127
  st.markdown("""
128
  # NLP for Item Desirability Ratings
 
149
  if os.environ.get('item-desirability'):
150
  model_path = 'magnolia-psychometrics/item-desirability'
151
  else:
152
+ model_path = os.getenv('model_path')
153
 
154
  auth_token = os.environ.get('item-desirability') or True
155
 
156
+ if 'tokenizer' not in st.session_state:
157
+ st.session_state.tokenizer = AutoTokenizer.from_pretrained(
158
  pretrained_model_name_or_path=model_path,
159
  use_fast=True,
160
  use_auth_token=auth_token
161
  )
162
 
163
+ if 'model' not in st.session_state:
164
+ st.session_state.model = AutoModelForSequenceClassification.from_pretrained(
165
  pretrained_model_name_or_path=model_path,
166
  num_labels=1,
167
  ignore_mismatched_sizes=True,
 
169
  )
170
 
171
  ## sentiment model
172
+ if 'classifier' not in st.session_state:
173
+ st.session_state.sentiment_model = 'cardiffnlp/twitter-xlm-roberta-base-sentiment'
174
+ st.session_state.classifier = pipeline(
175
+ task='sentiment-analysis',
176
+ model=st.session_state.sentiment_model,
177
+ tokenizer=st.session_state.sentiment_model,
178
+ use_fast=False,
179
+ top_k=3
180
+ )
181
 
182
  input_text = st.text_input(
183
  label='Estimate item desirability:',
 
187
 
188
  if input_text:
189
 
190
+ classifier_output = st.session_state.classifier(input_text)
191
  classifier_output_dict = {x['label']: x['score'] for x in classifier_output[0]}
192
  classifier_score = classifier_output_dict['positive'] - classifier_output_dict['negative']
193
 
194
+ inputs = st.session_state.tokenizer(text=input_text, padding=True, return_tensors='pt')
195
 
196
  with torch.no_grad():
197
+ score = st.session_state.model(**inputs).logits.squeeze().tolist()
198
  z = z_score(score)
199
 
200
  p1 = indicator_plot(
 
241
  else:
242
  option = None
243
 
244
+ plot = scatter_plot(st.session_state.df, option)
245
 
246
  st.plotly_chart(plot, theme=None, use_container_width=True)
requirements.txt CHANGED
@@ -3,4 +3,5 @@ transformers
3
  plotly
4
  dash
5
  statsmodels
6
- sentencepiece
 
 
3
  plotly
4
  dash
5
  statsmodels
6
+ sentencepiece
7
+ python-dotenv