Spaces:
Sleeping
Sleeping
sensitivity level
Browse files- .DS_Store +0 -0
- app.py +26 -5
- modules/__pycache__/utils.cpython-38.pyc +0 -0
- modules/utils.py +2 -2
- processed_applications.csv +0 -0
.DS_Store
CHANGED
Binary files a/.DS_Store and b/.DS_Store differ
|
|
app.py
CHANGED
@@ -40,14 +40,17 @@ def main():
|
|
40 |
st.markdown(
|
41 |
"""
|
42 |
1. **Download the Excel Template file (below).**
|
43 |
-
2. **
|
44 |
-
3. **
|
|
|
45 |
|
46 |
The tool will immediately start processing the uploaded application data. This can take considerable time
|
47 |
depending on the number of applications and the length of text in each. For example, a file with 500 applications
|
48 |
could be expected to take approximately 20 minutes.
|
49 |
|
50 |
-
|
|
|
|
|
51 |
"""
|
52 |
)
|
53 |
# Excel file download
|
@@ -58,6 +61,24 @@ def main():
|
|
58 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
59 |
)
|
60 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
with st.expander("ℹ️ - About this app", expanded=False):
|
62 |
st.write(
|
63 |
"""
|
@@ -65,7 +86,7 @@ def main():
|
|
65 |
|
66 |
The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
|
67 |
The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
|
68 |
-
human assessors
|
69 |
|
70 |
""")
|
71 |
st.image('images/pipeline.png')
|
@@ -75,7 +96,7 @@ def main():
|
|
75 |
if uploaded_file is not None:
|
76 |
try:
|
77 |
if not st.session_state['data_processed']:
|
78 |
-
st.session_state['df'] = process_data(uploaded_file)
|
79 |
st.session_state['data_processed'] = True
|
80 |
|
81 |
df = st.session_state['df']
|
|
|
40 |
st.markdown(
|
41 |
"""
|
42 |
1. **Download the Excel Template file (below).**
|
43 |
+
2. **[OPTIONAL]: Select the desired filtering sensitivity level (below).**
|
44 |
+
3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
|
45 |
+
4. **Upload the template file in the area to the right (or click browse files).**
|
46 |
|
47 |
The tool will immediately start processing the uploaded application data. This can take considerable time
|
48 |
depending on the number of applications and the length of text in each. For example, a file with 500 applications
|
49 |
could be expected to take approximately 20 minutes.
|
50 |
|
51 |
+
***NOTE (1)** - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
|
52 |
+
|
53 |
+
***NOTE (2)** - as of April 2024 this app running as a **test version**, NOT on a GPU. So the process can take up to 30 minutes for 20 applications.*
|
54 |
"""
|
55 |
)
|
56 |
# Excel file download
|
|
|
61 |
mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
|
62 |
)
|
63 |
|
64 |
+
# get sensitivity level for use in review / reject (ref. process_data function)
|
65 |
+
sens_options = {
|
66 |
+
"Low": 4,
|
67 |
+
"Medium": 5,
|
68 |
+
"High": 7,
|
69 |
+
}
|
70 |
+
|
71 |
+
sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
|
72 |
+
help = 'Increasing the level of sensitivity results in more \
|
73 |
+
applications being filtered out. At the same time, this also \
|
74 |
+
increases the probability of false negatives (FNs). The rate of \
|
75 |
+
FNs at the lowest setting is approximately 6 percent, and \
|
76 |
+
approaches 13 percent at the highest setting. ',
|
77 |
+
options = list(sens_options.keys()),
|
78 |
+
horizontal = False)
|
79 |
+
|
80 |
+
sens_level = sens_options[sens_input]
|
81 |
+
|
82 |
with st.expander("ℹ️ - About this app", expanded=False):
|
83 |
st.write(
|
84 |
"""
|
|
|
86 |
|
87 |
The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
|
88 |
The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
|
89 |
+
human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
|
90 |
|
91 |
""")
|
92 |
st.image('images/pipeline.png')
|
|
|
96 |
if uploaded_file is not None:
|
97 |
try:
|
98 |
if not st.session_state['data_processed']:
|
99 |
+
st.session_state['df'] = process_data(uploaded_file, sens_level)
|
100 |
st.session_state['data_processed'] = True
|
101 |
|
102 |
df = st.session_state['df']
|
modules/__pycache__/utils.cpython-38.pyc
CHANGED
Binary files a/modules/__pycache__/utils.cpython-38.pyc and b/modules/__pycache__/utils.cpython-38.pyc differ
|
|
modules/utils.py
CHANGED
@@ -83,7 +83,7 @@ def predict_category(df, model_name, progress_bar, repo, profile, multilabel=Fal
|
|
83 |
|
84 |
|
85 |
# Main function to process data
|
86 |
-
def process_data(uploaded_file):
|
87 |
df = pd.read_excel(uploaded_file)
|
88 |
# Column renaming and initial processing
|
89 |
|
@@ -156,6 +156,6 @@ def process_data(uploaded_file):
|
|
156 |
|
157 |
# Further data processing and actions
|
158 |
df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10, 0), axis=1)
|
159 |
-
df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] <
|
160 |
|
161 |
return df
|
|
|
83 |
|
84 |
|
85 |
# Main function to process data
|
86 |
+
def process_data(uploaded_file, sens_level):
|
87 |
df = pd.read_excel(uploaded_file)
|
88 |
# Column renaming and initial processing
|
89 |
|
|
|
156 |
|
157 |
# Further data processing and actions
|
158 |
df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10, 0), axis=1)
|
159 |
+
df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] < sens_level or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation') else 'REVIEW', axis=1)
|
160 |
|
161 |
return df
|
processed_applications.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|