mtyrrell commited on
Commit
c30cd54
1 Parent(s): c3446e2

sensitivity level

Browse files
.DS_Store CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
 
app.py CHANGED
@@ -40,14 +40,17 @@ def main():
40
  st.markdown(
41
  """
42
  1. **Download the Excel Template file (below).**
43
- 2. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
44
- 3. **Upload the template file in the area to the right (or click browse files).**
 
45
 
46
  The tool will immediately start processing the uploaded application data. This can take considerable time
47
  depending on the number of applications and the length of text in each. For example, a file with 500 applications
48
  could be expected to take approximately 20 minutes.
49
 
50
- *Note - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
 
 
51
  """
52
  )
53
  # Excel file download
@@ -58,6 +61,24 @@ def main():
58
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
59
  )
60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  with st.expander("ℹ️ - About this app", expanded=False):
62
  st.write(
63
  """
@@ -65,7 +86,7 @@ def main():
65
 
66
  The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
67
  The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
68
- human assessors with an extremely low false negative rate (<6%).
69
 
70
  """)
71
  st.image('images/pipeline.png')
@@ -75,7 +96,7 @@ def main():
75
  if uploaded_file is not None:
76
  try:
77
  if not st.session_state['data_processed']:
78
- st.session_state['df'] = process_data(uploaded_file)
79
  st.session_state['data_processed'] = True
80
 
81
  df = st.session_state['df']
 
40
  st.markdown(
41
  """
42
  1. **Download the Excel Template file (below).**
43
+ 2. **[OPTIONAL]: Select the desired filtering sensitivity level (below).**
44
+ 3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
45
+ 4. **Upload the template file in the area to the right (or click browse files).**
46
 
47
  The tool will immediately start processing the uploaded application data. This can take considerable time
48
  depending on the number of applications and the length of text in each. For example, a file with 500 applications
49
  could be expected to take approximately 20 minutes.
50
 
51
+ ***NOTE (1)** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
52
+
53
+ ***NOTE (2)** - as of April 2024 this app running as a **test version**, NOT on a GPU. So the process can take up to 30 minutes for 20 applications.*
54
  """
55
  )
56
  # Excel file download
 
61
  mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
62
  )
63
 
64
+ # get sensitivity level for use in review / reject (ref. process_data function)
65
+ sens_options = {
66
+ "Low": 4,
67
+ "Medium": 5,
68
+ "High": 7,
69
+ }
70
+
71
+ sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
72
+ help = 'Increasing the level of sensitivity results in more \
73
+ applications being filtered out. At the same time, this also \
74
+ increases the probability of false negatives (FNs). The rate of \
75
+ FNs at the lowest setting is approximately 6 percent, and \
76
+ approaches 13 percent at the highest setting. ',
77
+ options = list(sens_options.keys()),
78
+ horizontal = False)
79
+
80
+ sens_level = sens_options[sens_input]
81
+
82
  with st.expander("ℹ️ - About this app", expanded=False):
83
  st.write(
84
  """
 
86
 
87
  The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
88
  The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
89
+ human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
90
 
91
  """)
92
  st.image('images/pipeline.png')
 
96
  if uploaded_file is not None:
97
  try:
98
  if not st.session_state['data_processed']:
99
+ st.session_state['df'] = process_data(uploaded_file, sens_level)
100
  st.session_state['data_processed'] = True
101
 
102
  df = st.session_state['df']
modules/__pycache__/utils.cpython-38.pyc CHANGED
Binary files a/modules/__pycache__/utils.cpython-38.pyc and b/modules/__pycache__/utils.cpython-38.pyc differ
 
modules/utils.py CHANGED
@@ -83,7 +83,7 @@ def predict_category(df, model_name, progress_bar, repo, profile, multilabel=Fal
83
 
84
 
85
  # Main function to process data
86
- def process_data(uploaded_file):
87
  df = pd.read_excel(uploaded_file)
88
  # Column renaming and initial processing
89
 
@@ -156,6 +156,6 @@ def process_data(uploaded_file):
156
 
157
  # Further data processing and actions
158
  df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10, 0), axis=1)
159
- df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] < 4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation') else 'REVIEW', axis=1)
160
 
161
  return df
 
83
 
84
 
85
  # Main function to process data
86
+ def process_data(uploaded_file, sens_level):
87
  df = pd.read_excel(uploaded_file)
88
  # Column renaming and initial processing
89
 
 
156
 
157
  # Further data processing and actions
158
  df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10, 0), axis=1)
159
+ df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] < sens_level or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation') else 'REVIEW', axis=1)
160
 
161
  return df
processed_applications.csv CHANGED
The diff for this file is too large to render. See raw diff