Spaces:

mtyrrell
/

maf_prefilter_app

Sleeping

App Files Files Community

mtyrrell commited on Apr 26

Commit

c30cd54

•

1 Parent(s): c3446e2

sensitivity level

Browse files

Files changed (5) hide show

.DS_Store +0 -0
app.py +26 -5
modules/__pycache__/utils.cpython-38.pyc +0 -0
modules/utils.py +2 -2
processed_applications.csv +0 -0

.DS_Store CHANGED Viewed

Binary files a/.DS_Store and b/.DS_Store differ

app.py CHANGED Viewed

@@ -40,14 +40,17 @@ def main():
                 st.markdown(
                     """
                     1. **Download the Excel Template file (below).**
-                    2. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
-                    3. **Upload the template file in the area to the right (or click browse files).**
                     The tool will immediately start processing the uploaded application data. This can take considerable time
                     depending on the number of applications and the length of text in each. For example, a file with 500 applications
                     could be expected to take approximately 20 minutes.
-                    *Note - you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
                     """
                 )
             # Excel file download
@@ -58,6 +61,24 @@ def main():
                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
             )
         with st.expander("ℹ️ - About this app", expanded=False):
             st.write(
                 """
@@ -65,7 +86,7 @@ def main():
                 The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
                 The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
-                human assessors with an extremely low false negative rate (<6%).
                 """)
             st.image('images/pipeline.png')
@@ -75,7 +96,7 @@ def main():
         if uploaded_file is not None:
             try:
                 if not st.session_state['data_processed']:
-                    st.session_state['df'] = process_data(uploaded_file)
                     st.session_state['data_processed'] = True
                 df = st.session_state['df']

                 st.markdown(
                     """
                     1. **Download the Excel Template file (below).**
+                    2. **[OPTIONAL]: Select the desired filtering sensitivity level (below).**
+                    3. **Copy/paste the requisite application data in the template file. Best practice is to 'paste as values'.**
+                    4. **Upload the template file in the area to the right (or click browse files).**
                     The tool will immediately start processing the uploaded application data. This can take considerable time
                     depending on the number of applications and the length of text in each. For example, a file with 500 applications
                     could be expected to take approximately 20 minutes.
+                    ***NOTE (1)** -  you can also simply rename the column headers in your own file. The headers must match the column names in the template for the tool to run properly.*
+                    ***NOTE (2)** - as of April 2024 this app running as a **test version**, NOT on a GPU. So the process can take up to 30 minutes for 20 applications.*
                     """
                 )
             # Excel file download
                 mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
             )
+            # get sensitivity level for use in review / reject (ref. process_data function)
+            sens_options = {
+                "Low": 4,
+                "Medium": 5,
+                "High": 7,
+            }
+            sens_input = st.sidebar.radio(label = 'Select the Sensitivity Level [OPTIONAL]',
+                                    help = 'Increasing the level of sensitivity results in more \
+                                    applications being filtered out. At the same time, this also \
+                                    increases the probability of false negatives (FNs). The rate of \
+                                    FNs at the lowest setting is approximately 6 percent, and \
+                                    approaches 13 percent at the highest setting. ',
+                                    options = list(sens_options.keys()),
+                                    horizontal = False)
+            sens_level = sens_options[sens_input]
         with st.expander("ℹ️ - About this app", expanded=False):
             st.write(
                 """
                 The tool functions by running selected text fields from the application through a series of 8 LLMs fine-tuned for text classification (ref. diagram below).
                 The resulting output classifications are used to compute a score and a suggested pre-filtering action. The tool has been tested against
+                human assessors and exhibits an extremely low false negative rate (<6%) at a Sensitivity Level of 'Low' (i.e. rejection threshold for predicted score < 4).
                 """)
             st.image('images/pipeline.png')
         if uploaded_file is not None:
             try:
                 if not st.session_state['data_processed']:
+                    st.session_state['df'] = process_data(uploaded_file, sens_level)
                     st.session_state['data_processed'] = True
                 df = st.session_state['df']

modules/__pycache__/utils.cpython-38.pyc CHANGED Viewed

Binary files a/modules/__pycache__/utils.cpython-38.pyc and b/modules/__pycache__/utils.cpython-38.pyc differ

modules/utils.py CHANGED Viewed

@@ -83,7 +83,7 @@ def predict_category(df, model_name, progress_bar, repo, profile, multilabel=Fal
 # Main function to process data
-def process_data(uploaded_file):
     df = pd.read_excel(uploaded_file)
     # Column renaming and initial processing
@@ -156,6 +156,6 @@ def process_data(uploaded_file):
     # Further data processing and actions
     df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10, 0), axis=1)
-    df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] < 4 or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation') else 'REVIEW', axis=1)
     return df

 # Main function to process data
+def process_data(uploaded_file, sens_level):
     df = pd.read_excel(uploaded_file)
     # Column renaming and initial processing
     # Further data processing and actions
     df['pred_score'] = df.apply(lambda x: round((x['fin_lab2']*2 + x['scope_lab1']*2 + x['scope_lab2']*2 + x['tech_lab1'] + x['tech_lab3'] + x['bar_lab2'])/9*10, 0), axis=1)
+    df['pred_action'] = df.apply(lambda x: 'REJECT' if (x['pred_score'] < sens_level or x['LANG'] != 'en-US' or x['ADAPMIT'] == 'Adaptation') else 'REVIEW', axis=1)
     return df

processed_applications.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff