ppsingh commited on
Commit
5a88cd7
1 Parent(s): b78b722

Delete appStore/sector.py

Browse files
Files changed (1) hide show
  1. appStore/sector.py +0 -168
appStore/sector.py DELETED
@@ -1,168 +0,0 @@
1
- # set path
2
- import glob, os, sys;
3
- sys.path.append('../utils')
4
-
5
- #import needed libraries
6
- import seaborn as sns
7
- import matplotlib.pyplot as plt
8
- import numpy as np
9
- import pandas as pd
10
- import streamlit as st
11
- from utils.sector_classifier import load_sectorClassifier, sector_classification
12
- import logging
13
- logger = logging.getLogger(__name__)
14
- from utils.config import get_classifier_params
15
- from utils.preprocessing import paraLengthCheck
16
- from io import BytesIO
17
- import xlsxwriter
18
- import plotly.express as px
19
-
20
-
21
- # Declare all the necessary variables
22
- classifier_identifier = 'sector'
23
- params = get_classifier_params(classifier_identifier)
24
-
25
- @st.cache_data
26
- def to_excel(df,sectorlist):
27
- len_df = len(df)
28
- output = BytesIO()
29
- writer = pd.ExcelWriter(output, engine='xlsxwriter')
30
- df.to_excel(writer, index=False, sheet_name='Sheet1')
31
- workbook = writer.book
32
- worksheet = writer.sheets['Sheet1']
33
- worksheet.data_validation('S2:S{}'.format(len_df),
34
- {'validate': 'list',
35
- 'source': ['No', 'Yes', 'Discard']})
36
- worksheet.data_validation('X2:X{}'.format(len_df),
37
- {'validate': 'list',
38
- 'source': sectorlist + ['Blank']})
39
- worksheet.data_validation('T2:T{}'.format(len_df),
40
- {'validate': 'list',
41
- 'source': sectorlist + ['Blank']})
42
- worksheet.data_validation('U2:U{}'.format(len_df),
43
- {'validate': 'list',
44
- 'source': sectorlist + ['Blank']})
45
- worksheet.data_validation('V2:V{}'.format(len_df),
46
- {'validate': 'list',
47
- 'source': sectorlist + ['Blank']})
48
- worksheet.data_validation('W2:U{}'.format(len_df),
49
- {'validate': 'list',
50
- 'source': sectorlist + ['Blank']})
51
- writer.save()
52
- processed_data = output.getvalue()
53
- return processed_data
54
-
55
- def app():
56
-
57
- ### Main app code ###
58
- with st.container():
59
-
60
- if 'key1' in st.session_state:
61
- df = st.session_state.key1
62
- classifier = load_sectorClassifier(classifier_name=params['model_name'])
63
- st.session_state['{}_classifier'.format(classifier_identifier)] = classifier
64
-
65
- if sum(df['Target Label'] == 'TARGET') > 100:
66
- warning_msg = ": This might take sometime, please sit back and relax."
67
- else:
68
- warning_msg = ""
69
-
70
- df = sector_classification(haystack_doc=df,
71
- threshold= params['threshold'])
72
-
73
- st.session_state.key1 = df
74
-
75
-
76
- # # st.write(df)
77
- # threshold= params['threshold']
78
- # truth_df = df.drop(['text'],axis=1)
79
- # truth_df = truth_df.astype(float) >= threshold
80
- # truth_df = truth_df.astype(str)
81
- # categories = list(truth_df.columns)
82
-
83
- # placeholder = {}
84
- # for val in categories:
85
- # placeholder[val] = dict(truth_df[val].value_counts())
86
- # count_df = pd.DataFrame.from_dict(placeholder)
87
- # count_df = count_df.T
88
- # count_df = count_df.reset_index()
89
- # # st.write(count_df)
90
- # placeholder = []
91
- # for i in range(len(count_df)):
92
- # placeholder.append([count_df.iloc[i]['index'],count_df['True'][i],'Yes'])
93
- # placeholder.append([count_df.iloc[i]['index'],count_df['False'][i],'No'])
94
- # count_df = pd.DataFrame(placeholder, columns = ['category','count','truth_value'])
95
- # # st.write("Total Paragraphs: {}".format(len(df)))
96
- # fig = px.bar(count_df, x='category', y='count',
97
- # color='truth_value')
98
- # # c1, c2 = st.columns([1,1])
99
- # # with c1:
100
- # st.plotly_chart(fig,use_container_width= True)
101
-
102
- # truth_df['labels'] = truth_df.apply(lambda x: {i if x[i]=='True' else None for i in categories}, axis=1)
103
- # truth_df['labels'] = truth_df.apply(lambda x: list(x['labels'] -{None}),axis=1)
104
- # # st.write(truth_df)
105
- # df = pd.concat([df,truth_df['labels']],axis=1)
106
- # df['Validation'] = 'No'
107
- # df['Sector1'] = 'Blank'
108
- # df['Sector2'] = 'Blank'
109
- # df['Sector3'] = 'Blank'
110
- # df['Sector4'] = 'Blank'
111
- # df['Sector5'] = 'Blank'
112
- # df_xlsx = to_excel(df,categories)
113
- # st.download_button(label='📥 Download Current Result',
114
- # data=df_xlsx ,
115
- # # file_name= 'file_sector.xlsx')
116
- # else:
117
- # st.info("🤔 No document found, please try to upload it at the sidebar!")
118
- # logging.warning("Terminated as no document provided")
119
-
120
- # # Creating truth value dataframe
121
- # if 'key' in st.session_state:
122
- # if st.session_state.key is not None:
123
- # df = st.session_state.key
124
- # st.markdown("###### Select the threshold for classifier ######")
125
- # c4, c5 = st.columns([1,1])
126
-
127
- # with c4:
128
- # threshold = st.slider("Threshold", min_value=0.00, max_value=1.0,
129
- # step=0.01, value=0.5,
130
- # help = "Keep High Value if want refined result, low if dont want to miss anything" )
131
- # sectors =set(df.columns)
132
- # removecols = {'Validation','Sector1','Sector2','Sector3','Sector4',
133
- # 'Sector5','text'}
134
- # sectors = list(sectors - removecols)
135
-
136
- # placeholder = {}
137
- # for val in sectors:
138
- # temp = df[val].astype(float) > threshold
139
- # temp = temp.astype(str)
140
- # placeholder[val] = dict(temp.value_counts())
141
-
142
- # count_df = pd.DataFrame.from_dict(placeholder)
143
- # count_df = count_df.T
144
- # count_df = count_df.reset_index()
145
- # placeholder = []
146
- # for i in range(len(count_df)):
147
- # placeholder.append([count_df.iloc[i]['index'],count_df['False'][i],'False'])
148
- # placeholder.append([count_df.iloc[i]['index'],count_df['True'][i],'True'])
149
-
150
- # count_df = pd.DataFrame(placeholder, columns = ['sector','count','truth_value'])
151
- # fig = px.bar(count_df, x='sector', y='count',
152
- # color='truth_value',
153
- # height=400)
154
- # st.write("")
155
- # st.plotly_chart(fig)
156
-
157
- # df['Validation'] = 'No'
158
- # df['Sector1'] = 'Blank'
159
- # df['Sector2'] = 'Blank'
160
- # df['Sector3'] = 'Blank'
161
- # df['Sector4'] = 'Blank'
162
- # df['Sector5'] = 'Blank'
163
- # df_xlsx = to_excel(df,sectors)
164
- # st.download_button(label='📥 Download Current Result',
165
- # data=df_xlsx ,
166
- # file_name= 'file_sector.xlsx')
167
-
168
-