gseetha04 commited on
Commit
5fcdc9c
1 Parent(s): 9bdb04e

scriptcomm

Browse files
Files changed (1) hide show
  1. ST_BusT2KG_demo_final.py +544 -0
ST_BusT2KG_demo_final.py ADDED
@@ -0,0 +1,544 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # import all packages
2
+ import requests
3
+ import streamlit as st
4
+ from sklearn.model_selection import StratifiedKFold
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.model_selection import KFold
7
+ # tokenizer
8
+ from transformers import AutoTokenizer, DistilBertTokenizerFast
9
+ # sequence tagging model + training-related
10
+ from transformers import DistilBertForTokenClassification, Trainer, TrainingArguments
11
+ import numpy as np
12
+ import pandas as pd
13
+ import torch
14
+ import json
15
+ import sys
16
+ import os
17
+ #from datasets import load_metric
18
+ from sklearn.metrics import classification_report
19
+ from pandas import read_csv
20
+ from sklearn.linear_model import LogisticRegression
21
+ import sklearn.model_selection
22
+ from sklearn.feature_extraction.text import TfidfTransformer
23
+ from sklearn.feature_extraction.text import CountVectorizer
24
+ from sklearn.naive_bayes import MultinomialNB
25
+ from sklearn.model_selection import GridSearchCV
26
+ from sklearn.pipeline import Pipeline, FeatureUnion
27
+ import math
28
+ from sklearn.metrics import accuracy_score
29
+ from sklearn.metrics import precision_recall_fscore_support
30
+ from sklearn.model_selection import train_test_split
31
+ import json
32
+ import re
33
+ import numpy as np
34
+ import pandas as pd
35
+ import re
36
+ import nltk
37
+ #stemmer = nltk.SnowballStemmer("english")
38
+ #from nltk.corpus import stopwords
39
+ import string
40
+ from sklearn.model_selection import train_test_split
41
+ # import seaborn as sns
42
+ # from sklearn.metrics import confusion_matrix
43
+ # from sklearn.metrics import classification_report, ConfusionMatrixDisplay
44
+ from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoConfig
45
+ import torch
46
+ from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
47
+ import itertools
48
+ import json
49
+ import glob
50
+ from transformers import TextClassificationPipeline, TFAutoModelForSequenceClassification, AutoTokenizer
51
+ from transformers import pipeline
52
+ import pickle
53
+ import urllib.request
54
+ from sklearn.feature_extraction.text import TfidfTransformer
55
+ from sklearn.feature_extraction.text import CountVectorizer
56
+ #from PyPDF2 import PdfReader
57
+ #from urllib.request import urlopen
58
+ #from tabulate import tabulate
59
+ import csv
60
+ import gdown
61
+ import zipfile
62
+ import wget
63
+ import pdfplumber
64
+ import pathlib
65
+ import shutil
66
+ import webbrowser
67
+ from streamlit.components.v1 import html
68
+ import streamlit.components.v1 as components
69
+ from PyPDF2 import PdfReader
70
+
71
+
72
+ #device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
73
+
74
+ # from git import Repo
75
+
76
+ # Repo.clone_from('https://github.com/gseetha04/IMA-weights.git', branch='master')
77
+
78
+ def main():
79
+
80
+ st.title("Text to Causal Knowledge Graph")
81
+ st.sidebar.title("Please upload your text documents in one file here:")
82
+ k=2
83
+ seed = 1
84
+ k1= 5
85
+
86
+ uploaded_file = st.sidebar.file_uploader("Choose a file", type = "pdf")
87
+ text_list = []
88
+ causal_sents = []
89
+
90
+ reader = PdfReader(uploaded_file)
91
+
92
+ for page in reader.pages:
93
+ text = page.extract_text()
94
+ text_list.append(text)
95
+
96
+ text_list_final = [x.replace('\n', '') for x in text_list]
97
+ text_list_final = re.sub('"', '', str(text_list_final))
98
+
99
+ sentences = nltk.sent_tokenize(text_list_final)
100
+
101
+ result =[]
102
+ for i in sentences:
103
+ result1 = i.lower()
104
+ result2 = re.sub(r'[^\w\s]','',result1)
105
+ result.append(result2)
106
+
107
+ tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
108
+ model_path = "checkpoint-2850"
109
+
110
+ model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
111
+
112
+ pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
113
+ for sent in result:
114
+ pred = pipe1(sent)
115
+ for lab in pred:
116
+ if lab['label'] == 'causal': #causal
117
+ causal_sents.append(sent)
118
+
119
+ model_name = "distilbert-base-uncased"
120
+ tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
121
+ model_path1 = "DistilBertforTokenClassification"
122
+
123
+ model = DistilBertForTokenClassification.from_pretrained(model_path1, id2label={0:'CT',1:'E',2:'C',3:'O'}) #len(unique_tags),, num_labels= 7,
124
+ pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True
125
+
126
+ sentence_pred = []
127
+ class_list = []
128
+ entity_list = []
129
+ for k in causal_sents:
130
+ pred= pipe(k)
131
+ #st.write(pred)
132
+ for i in pred:
133
+
134
+ sentence_pred.append(k)
135
+ class_list.append(i['word'])
136
+ entity_list.append(i['entity_group'])
137
+
138
+ filename = 'Checkpoint-classification.sav'
139
+ count_vect = CountVectorizer(ngram_range=[1,3])
140
+ tfidf_transformer=TfidfTransformer()
141
+ loaded_model = pickle.load(open(filename, 'rb'))
142
+ loaded_vectorizer = pickle.load(open('vectorizefile_classification.pickle', 'rb'))
143
+
144
+ pipeline_test_output = loaded_vectorizer.transform(class_list)
145
+ predicted = loaded_model.predict(pipeline_test_output)
146
+ pred1 = predicted
147
+ level0 = []
148
+ count =0
149
+ for i in predicted:
150
+ if i == 3:
151
+ level0.append('Non-Performance')
152
+ count +=1
153
+ else:
154
+ level0.append('Performance')
155
+ count +=1
156
+
157
+ list_pred = {0: 'Customers',1:'Employees',2:'Investors',3:'Non-performance',4:'Society',5:'Unclassified'}
158
+ pred_val = [list_pred[i] for i in pred1]
159
+
160
+ #print('count',count)
161
+
162
+ sent_id, unique = pd.factorize(sentence_pred)
163
+
164
+ final_list = pd.DataFrame(
165
+ {'Id': sent_id,
166
+ 'Full sentence': sentence_pred,
167
+ 'Component': class_list,
168
+ 'cause/effect': entity_list,
169
+ 'Label_level1': level0,
170
+ 'Label_level2': pred_val
171
+ })
172
+ s = final_list['Component'].shift(-1)
173
+ m = s.str.startswith('##', na=False)
174
+ final_list.loc[m, 'Component'] += (' ' + s[m])
175
+
176
+
177
+ final_list1 = final_list[~final_list['Component'].astype(str).str.startswith('##')]
178
+
179
+ li = []
180
+ uni = final_list1['Id'].unique()
181
+ for i in uni:
182
+ df_new = final_list1[final_list1['Id'] == i]
183
+ uni1 = df_new['Id'].unique()
184
+ if 'E' not in df_new.values:
185
+ li.append(uni1)
186
+ out = np.concatenate(li).ravel()
187
+ li_pan = pd.DataFrame(out,columns=['Id'])
188
+ df3 = pd.merge(final_list1, li_pan[['Id']], on='Id', how='left', indicator=True) \
189
+ .query("_merge == 'left_only'") \
190
+ .drop('_merge',1)
191
+
192
+ df = df3.groupby(['Id','Full sentence','cause/effect', 'Label_level1', 'Label_level2'])['Component'].apply(', '.join).reset_index()
193
+
194
+ df["cause/effect"].replace({"C": "cause", "E": "effect"}, inplace=True)
195
+ df_final = df[df['cause/effect'] != 'CT']
196
+ df['New string'] = df_final['Component'].replace(r'[##]+', ' ', regex=True)
197
+ df_final = df_final.drop('Component',1)
198
+ df_final.insert(2, "Component", df['New string'], True)
199
+
200
+ df_final.to_csv('predictions.csv')
201
+
202
+ count_NP_NP = 0
203
+ count_NP_investor = 0
204
+ count_NP_customer = 0
205
+ count_NP_employees = 0
206
+ count_NP_society = 0
207
+
208
+ count_inv_np = 0
209
+ count_inv_investor = 0
210
+ count_inv_customer = 0
211
+ count_inv_employee = 0
212
+ count_inv_society = 0
213
+
214
+ count_cus_np = 0
215
+ count_cus_investor = 0
216
+ count_cus_customer = 0
217
+ count_cus_employee = 0
218
+ count_cus_society = 0
219
+
220
+ count_emp_np = 0
221
+ count_emp_investor = 0
222
+ count_emp_customer = 0
223
+ count_emp_employee = 0
224
+ count_emp_society = 0
225
+
226
+ count_soc_np = 0
227
+ count_soc_investor = 0
228
+ count_soc_customer = 0
229
+ count_soc_employee = 0
230
+ count_soc_society = 0
231
+ for i in range(0,df_final['Id'].max()):
232
+ j = df_final.loc[df_final['Id'] == i]
233
+ cause_tab = j.loc[j['cause/effect'] == 'cause']
234
+ effect_tab = j.loc[j['cause/effect'] == 'effect']
235
+ cause_coun_NP = (cause_tab.Label_level2 == 'Non-performance').sum()
236
+ effect_coun_NP = (effect_tab.Label_level2 == 'Non-performance').sum()
237
+
238
+ if (cause_coun_NP > 0) and (effect_coun_NP > 0):
239
+ count_NP = cause_coun_NP if cause_coun_NP >= effect_coun_NP else effect_coun_NP
240
+ else:
241
+ count_NP = 0
242
+ effect_NP_inv = (effect_tab.Label_level2 == 'Investors').sum()
243
+ if (cause_coun_NP > 0) and (effect_NP_inv > 0):
244
+ count_NP_inv = cause_coun_NP if cause_coun_NP >= effect_NP_inv else effect_NP_inv
245
+ else:
246
+ count_NP_inv = 0
247
+ effect_NP_cus = (effect_tab.Label_level2 == 'Customers').sum()
248
+ if (cause_coun_NP > 0) and (effect_NP_cus > 0):
249
+ count_NP_cus = cause_coun_NP if cause_coun_NP >= effect_NP_cus else effect_NP_cus
250
+ else:
251
+ count_NP_cus = 0
252
+ effect_NP_emp = (effect_tab.Label_level2 == 'Employees').sum()
253
+ if (cause_coun_NP > 0) and (effect_NP_emp > 0):
254
+ count_NP_emp = cause_coun_NP if cause_coun_NP >= effect_NP_emp else effect_NP_emp
255
+ else:
256
+ count_NP_emp = 0
257
+ effect_NP_soc = (effect_tab.Label_level2 == 'Society').sum()
258
+ if (cause_coun_NP > 0) and (effect_NP_soc > 0):
259
+ count_NP_soc = cause_coun_NP if cause_coun_NP >= effect_NP_soc else effect_NP_soc
260
+ else:
261
+ count_NP_soc = 0
262
+
263
+ cause_coun_inv = (cause_tab.Label_level2 == 'Investors').sum()
264
+ effect_coun_inv = (effect_tab.Label_level2 == 'Non-performance').sum()
265
+ if (cause_coun_inv > 0) and (effect_coun_inv > 0):
266
+ count_NP_inv = cause_coun_inv if cause_coun_inv >= effect_coun_inv else effect_coun_inv
267
+ else:
268
+ count_NP_inv = 0
269
+
270
+ effect_inv_inv = (effect_tab.Label_level2 == 'Investors').sum()
271
+ if (cause_coun_inv > 0) and (effect_inv_inv > 0):
272
+ count_inv_inv = cause_coun_inv if cause_coun_inv >= effect_inv_inv else effect_inv_inv
273
+ else:
274
+ count_inv_inv = 0
275
+ effect_inv_cus = (effect_tab.Label_level2 == 'Customers').sum()
276
+ if (cause_coun_inv > 0) and (effect_inv_cus > 0):
277
+ count_inv_cus = cause_coun_inv if cause_coun_inv >= effect_inv_cus else effect_inv_cus
278
+ else:
279
+ count_inv_cus = 0
280
+ effect_inv_emp = (effect_tab.Label_level2 == 'Employees').sum()
281
+ if (cause_coun_inv > 0) and (effect_inv_emp > 0):
282
+ count_inv_emp = cause_coun_inv if cause_coun_inv >= effect_inv_emp else effect_inv_emp
283
+ else:
284
+ count_inv_emp = 0
285
+
286
+ effect_inv_soc = (effect_tab.Label_level2 == 'Society').sum()
287
+ if (cause_coun_inv > 0) and (effect_inv_soc > 0):
288
+ count_inv_soc = cause_coun_inv if cause_coun_inv >= effect_inv_soc else effect_inv_soc
289
+ else:
290
+ count_inv_soc = 0
291
+
292
+ cause_coun_cus = (cause_tab.Label_level2 == 'Customers').sum()
293
+ effect_coun_cus = (effect_tab.Label_level2 == 'Non-performance').sum()
294
+ if (cause_coun_cus > 0) and (effect_coun_cus > 0):
295
+ count_NP_cus = cause_coun_cus if cause_coun_cus >= effect_coun_cus else effect_coun_cus
296
+ else:
297
+ count_NP_cus = 0
298
+
299
+ effect_cus_inv = (effect_tab.Label_level2 == 'Investors').sum()
300
+ if (cause_coun_cus > 0) and (effect_cus_inv > 0):
301
+ count_cus_inv = cause_coun_cus if cause_coun_cus >= effect_cus_inv else effect_cus_inv
302
+ else:
303
+ count_cus_inv = 0
304
+
305
+ effect_cus_cus = (effect_tab.Label_level2 == 'Customers').sum()
306
+ if (cause_coun_cus > 0) and (effect_cus_cus > 0):
307
+ count_cus_cus = cause_coun_cus if cause_coun_cus >= effect_cus_cus else effect_cus_cus
308
+ else:
309
+ count_cus_cus = 0
310
+
311
+ effect_cus_emp = (effect_tab.Label_level2 == 'Employees').sum()
312
+ if (cause_coun_cus > 0) and (effect_cus_emp > 0):
313
+ count_cus_emp = cause_coun_cus if cause_coun_cus >= effect_cus_emp else effect_cus_emp
314
+ else:
315
+ count_cus_emp = 0
316
+
317
+ effect_cus_soc = (effect_tab.Label_level2 == 'Society').sum()
318
+ if (cause_coun_cus > 0) and (effect_cus_soc > 0):
319
+ count_cus_soc = cause_coun_cus if cause_coun_cus >= effect_cus_soc else effect_cus_soc
320
+ else:
321
+ count_cus_soc = 0
322
+
323
+ cause_coun_emp = (cause_tab.Label_level2 == 'Employees').sum()
324
+ effect_coun_emp = (effect_tab.Label_level2 == 'Non-performance').sum()
325
+ if (cause_coun_emp > 0) and (effect_coun_emp > 0):
326
+ count_NP_emp = cause_coun_emp if cause_coun_emp >= effect_coun_emp else effect_coun_emp
327
+ else:
328
+ count_NP_emp = 0
329
+
330
+ effect_emp_inv = (effect_tab.Label_level2 == 'Investors').sum()
331
+ if (cause_coun_emp > 0) and (effect_emp_inv > 0):
332
+ count_emp_inv = cause_coun_emp if cause_coun_emp >= effect_emp_inv else effect_emp_inv
333
+ else:
334
+ count_emp_inv = 0
335
+
336
+ effect_emp_cus = (effect_tab.Label_level2 == 'Customers').sum()
337
+ if (cause_coun_emp > 0) and (effect_emp_cus > 0):
338
+ count_emp_cus = cause_coun_emp if cause_coun_emp >= effect_emp_cus else effect_emp_cus
339
+ else:
340
+ count_emp_cus = 0
341
+
342
+ effect_emp_emp = (effect_tab.Label_level2 == 'Employees').sum()
343
+ if (cause_coun_emp > 0) and (effect_emp_emp > 0):
344
+ count_emp_emp = cause_coun_emp if cause_coun_emp >= effect_emp_emp else effect_emp_emp
345
+ else:
346
+ count_emp_emp = 0
347
+
348
+ effect_emp_soc = (effect_tab.Label_level2 == 'Society').sum()
349
+ if (cause_coun_emp > 0) and (effect_emp_soc > 0):
350
+ count_emp_soc = cause_coun_emp if cause_coun_emp >= effect_emp_soc else effect_emp_soc
351
+ else:
352
+ count_emp_soc = 0
353
+
354
+ cause_coun_soc = (cause_tab.Label_level2 == 'Society').sum()
355
+ effect_coun_soc = (effect_tab.Label_level2 == 'Non-performance').sum()
356
+ if (cause_coun_soc > 0) and (effect_coun_soc > 0):
357
+ count_NP_soc = cause_coun_soc if cause_coun_soc >= effect_coun_soc else effect_coun_soc
358
+ else:
359
+ count_NP_soc = 0
360
+
361
+ effect_soc_inv = (effect_tab.Label_level2 == 'Investors').sum()
362
+ if (cause_coun_soc > 0) and (effect_soc_inv > 0):
363
+ count_soc_inv = cause_coun_soc if cause_coun_soc >= effect_soc_inv else effect_soc_inv
364
+ else:
365
+ count_soc_inv = 0
366
+
367
+ effect_soc_cus = (effect_tab.Label_level2 == 'Customers').sum()
368
+ if (cause_coun_soc > 0) and (effect_soc_cus > 0):
369
+ count_soc_cus = cause_coun_soc if cause_coun_soc >= effect_soc_cus else effect_soc_cus
370
+ else:
371
+ count_soc_cus = 0
372
+
373
+ effect_soc_emp = (effect_tab.Label_level2 == 'Employees').sum()
374
+ if (cause_coun_soc > 0) and (effect_soc_emp > 0):
375
+ count_soc_emp = cause_coun_soc if cause_coun_soc >= effect_soc_emp else effect_soc_emp
376
+ else:
377
+ count_soc_emp = 0
378
+
379
+ effect_soc_soc = (effect_tab.Label_level2 == 'Society').sum()
380
+ if (cause_coun_soc > 0) and (effect_soc_soc > 0):
381
+ count_soc_soc = cause_coun_soc if cause_coun_soc >= effect_soc_soc else effect_soc_soc
382
+ else:
383
+ count_soc_soc = 0
384
+
385
+ count_NP_NP = count_NP_NP + count_NP
386
+ count_NP_investor = count_NP_investor + count_NP_inv
387
+ count_NP_customer = count_NP_customer + count_NP_cus
388
+ count_NP_employees = count_NP_employees + count_NP_emp
389
+ count_NP_society = count_NP_society + count_NP_soc
390
+
391
+ count_inv_np = count_inv_np + count_NP_inv
392
+ count_inv_investor = count_inv_investor + count_inv_inv
393
+ count_inv_customer = count_inv_customer + count_inv_cus
394
+ count_inv_employee = count_inv_employee + count_inv_emp
395
+ count_inv_society = count_inv_society + count_inv_soc
396
+
397
+ count_cus_np = count_cus_np + count_NP_cus
398
+ count_cus_investor = count_cus_investor + count_cus_inv
399
+ count_cus_customer = count_cus_customer + count_cus_cus
400
+ count_cus_employee = count_cus_employee + count_cus_emp
401
+ count_cus_society = count_cus_society + count_cus_soc
402
+
403
+ count_emp_np = count_emp_np + count_NP_emp
404
+ count_emp_investor = count_emp_investor + count_emp_inv
405
+ count_emp_customer = count_emp_customer + count_emp_cus
406
+ count_emp_employee = count_emp_employee + count_emp_emp
407
+ count_emp_society = count_emp_society + count_emp_soc
408
+
409
+ count_soc_np = count_soc_np + count_NP_soc
410
+ count_soc_investor = count_soc_investor + count_soc_inv
411
+ count_soc_customer = count_soc_customer + count_soc_cus
412
+ count_soc_employee = count_soc_employee + count_soc_emp
413
+ count_soc_society = count_soc_society + count_soc_soc
414
+
415
+ df_tab = pd.DataFrame(columns = ['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'],index=['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'], dtype=object)
416
+
417
+ df_tab.loc['Non-performance'] = [count_NP_NP, count_NP_investor, count_NP_customer, count_NP_employees, count_NP_society]
418
+ df_tab.loc['Investors'] = [count_inv_np, count_inv_investor, count_inv_customer, count_inv_employee, count_inv_society]
419
+ df_tab.loc['Customers'] = [count_cus_np, count_cus_investor, count_cus_customer, count_cus_employee, count_cus_society]
420
+ df_tab.loc['Employees'] = [count_emp_np, count_emp_investor, count_emp_customer, count_emp_employee, count_emp_society]
421
+ df_tab.loc['Society'] = [count_soc_np, count_soc_investor, count_soc_customer, count_soc_employee, count_soc_society]
422
+
423
+
424
+ # df_tab = pd.DataFrame({
425
+ # 'Non-performance': [count_NP_NP, count_NP_investor, count_NP_customer, count_NP_employees, count_NP_society],
426
+ # 'Investors': [count_inv_np, count_inv_investor, count_inv_customer, count_inv_employee, count_inv_society],
427
+ # 'Customers': [count_cus_np, count_cus_investor, count_cus_customer, count_cus_employee, count_cus_society],
428
+ # 'Employees': [count_emp_np, count_emp_investor, count_emp_customer, count_emp_employee, count_emp_society],
429
+ # 'Society': [count_soc_np, count_soc_investor, count_soc_customer, count_soc_employee, count_soc_society]},
430
+ # index=['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'])
431
+
432
+ df_tab.to_csv('final_data.csv')
433
+
434
+ df = pd.read_csv('final_data.csv', index_col=0)
435
+
436
+ # Convert to JSON format
437
+ json_data = []
438
+ for row in df.index:
439
+ for col in df.columns:
440
+ json_data.append({
441
+ 'source': row,
442
+ 'target': col,
443
+ 'value': int(df.loc[row, col])
444
+ })
445
+
446
+ # Write JSON to file
447
+ with open('smalljson.json', 'w') as f:
448
+ json.dump(json_data, f)
449
+
450
+ csv_file = "predictions.csv"
451
+ json_file = "ch.json"
452
+
453
+ # Open the CSV file and read the data
454
+ with open(csv_file, "r") as f:
455
+ csv_data = csv.DictReader(f)
456
+
457
+ # Convert the CSV data to a list of dictionaries
458
+ data_list = []
459
+ for row in csv_data:
460
+ data_list.append(dict(row))
461
+
462
+ # Convert the list of dictionaries to JSON
463
+ json_data = json.dumps(data_list)
464
+
465
+ # Write the JSON data to a file
466
+ with open(json_file, "w") as f:
467
+ f.write(json_data)
468
+
469
+ def convert_df(df):
470
+
471
+ #IMPORTANT: Cache the conversion to prevent computation on every rerun
472
+
473
+ return df.to_csv().encode('utf-8')
474
+
475
+
476
+
477
+ csv1 = convert_df(df_final.astype(str))
478
+ csv2 = convert_df(df_tab.astype(str))
479
+
480
+ with st.container():
481
+ st.download_button(label="Download the detailed result table",data=csv1,file_name='results.csv',mime='text/csv')
482
+ st.download_button(label="Download the result table",data=csv2,file_name='final_data.csv',mime='text/csv')
483
+
484
+ # # LINK TO THE CSS FILE
485
+ # def tree_css(file_name):
486
+ # with open('/Users/seetha/Downloads/tree.css')as f:
487
+ # st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
488
+ #
489
+ # def div_css(file_name):
490
+ # with open('/Users/seetha/Downloads/div.css')as f:
491
+ # st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
492
+ #
493
+ # def side_css(file_name):
494
+ # with open('/Users/seetha/Downloads/side.css')as f:
495
+ # st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True)
496
+ #
497
+ # tree_css('tree.css')
498
+ # div_css('div.css')
499
+ # side_css('side.css')
500
+
501
+ STREAMLIT_STATIC_PATH = pathlib.Path(st.__path__[0]) / 'static'
502
+ CSS_PATH = (STREAMLIT_STATIC_PATH / "css1")
503
+ if not CSS_PATH.is_dir():
504
+ CSS_PATH.mkdir()
505
+
506
+ css_file = CSS_PATH / "tree.css"
507
+ css_file1 = CSS_PATH / "div.css"
508
+ css_file2 = CSS_PATH / "side.css"
509
+ jso_file = CSS_PATH / "smalljson.json"
510
+ if not css_file.exists():
511
+ shutil.copy("tree.css", css_file)
512
+ shutil.copy("div.css", css_file1)
513
+ shutil.copy("side.css", css_file2)
514
+ shutil.copy("smalljson.json", jso_file)
515
+
516
+ HtmlFile = open("index.html", 'r', encoding='utf-8')
517
+ source_code = HtmlFile.read()
518
+ #print(source_code)
519
+ components.html(source_code)
520
+ # # Define your javascript
521
+ # my_js = """
522
+ # alert("Hello World");
523
+ # """
524
+
525
+ # Wrapt the javascript as html code
526
+ #my_html = f"<script>{my_js}</script>"
527
+
528
+
529
+ # with st.container():
530
+ # # Execute your app
531
+ # st.title("Visualization example")
532
+ # # components.html(source_code)
533
+ # #html(my_html)
534
+ # #webbrowser.open('https://webpages.charlotte.edu/ltotapal/')
535
+ # # embed streamlit docs in a streamlit app
536
+ # #components.iframe("https://webpages.charlotte.edu/ltotapal/")
537
+ # st.markdown('<a href="https://webpages.charlotte.edu/ltotapal/" target="_self">Text to Knowledge graph link</a>', unsafe_allow_html=True)
538
+
539
+
540
+
541
+
542
+
543
+ if __name__ == '__main__':
544
+ main()