Spaces:
Sleeping
Sleeping
# import all packages | |
import requests | |
import streamlit as st | |
from sklearn.model_selection import StratifiedKFold | |
from sklearn.model_selection import train_test_split | |
from sklearn.model_selection import KFold | |
# tokenizer | |
from transformers import AutoTokenizer, DistilBertTokenizerFast | |
# sequence tagging model + training-related | |
from transformers import DistilBertForTokenClassification, Trainer, TrainingArguments | |
import numpy as np | |
import pandas as pd | |
import torch | |
import json | |
import sys | |
import os | |
#from datasets import load_metric | |
from sklearn.metrics import classification_report | |
from pandas import read_csv | |
from sklearn.linear_model import LogisticRegression | |
import sklearn.model_selection | |
from sklearn.feature_extraction.text import TfidfTransformer | |
from sklearn.feature_extraction.text import CountVectorizer | |
#from sklearn.naive_bayes import MultinomialNB | |
#from sklearn.model_selection import GridSearchCV | |
from sklearn.pipeline import Pipeline, FeatureUnion | |
import math | |
from sklearn.metrics import accuracy_score | |
from sklearn.metrics import precision_recall_fscore_support | |
from sklearn.model_selection import train_test_split | |
#from sklearn.metrics import Scorer | |
#from sklearn.metrics import SCORERS | |
import json | |
import re | |
import numpy as np | |
import pandas as pd | |
import re | |
import nltk | |
nltk.download("punkt") | |
#stemmer = nltk.SnowballStemmer("english") | |
#from nltk.corpus import stopwords | |
import string | |
from sklearn.model_selection import train_test_split | |
# import seaborn as sns | |
# from sklearn.metrics import confusion_matrix | |
# from sklearn.metrics import classification_report, ConfusionMatrixDisplay | |
from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoConfig | |
import torch | |
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler | |
import itertools | |
import json | |
import glob | |
from transformers import TextClassificationPipeline, TFAutoModelForSequenceClassification, AutoTokenizer | |
from transformers import pipeline | |
import pickle | |
import urllib.request | |
# from sklearn.feature_extraction.text import TfidfTransformer | |
# from sklearn.feature_extraction.text import CountVectorizer | |
#from PyPDF2 import PdfReader | |
#from urllib.request import urlopen | |
#from tabulate import tabulate | |
import csv | |
#import gdown | |
import pdfplumber | |
import pathlib | |
import shutil | |
import webbrowser | |
from streamlit.components.v1 import html | |
import streamlit.components.v1 as components | |
from PyPDF2 import PdfReader | |
from huggingface_hub import HfApi | |
import io | |
from datasets import load_dataset | |
import joblib | |
# from huggingface_hub import Repository | |
# from git import repo | |
# api = HfApi() | |
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') | |
# from git import Repo | |
# Repo.clone_from('https://github.com/gseetha04/IMA-weights.git', branch='master') | |
def main(): | |
st.title("Text to Causal Knowledge Graph") | |
st.sidebar.title("Please upload your text documents in one file here:") | |
k=2 | |
seed = 1 | |
k1= 5 | |
text_list = [] | |
causal_sents = [] | |
uploaded_file = None | |
try: | |
uploaded_file = st.sidebar.file_uploader("Choose a file", type = "pdf") | |
except: | |
uploaded_file = PdfReader('sample_anno.pdf') | |
st.error("Please upload your own PDF to be analyzed") | |
# try: | |
# #if uploaded_file is not None: | |
# uploaded_file = st.sidebar.file_uploader("Choose a file", type = "pdf") | |
# #st.stop() | |
# except: | |
# #uploaded_file = PdfReader('sample_anno.pdf') | |
# st.error("Please upload your own PDF to be analyzed") | |
#except: | |
#if uploaded_file | |
#st.write("Upload a pdf file...") | |
#st.stop() | |
if uploaded_file is not None: | |
reader = PdfReader(uploaded_file) | |
for page in reader.pages: | |
text = page.extract_text() | |
text_list.append(text) | |
else: | |
st.error("Please upload your own PDF to be analyzed") | |
st.stop() | |
text_list_final = [x.replace('\n', '') for x in text_list] | |
text_list_final = re.sub('"', '', str(text_list_final)) | |
sentences = nltk.sent_tokenize(text_list_final) | |
result =[] | |
for i in sentences: | |
result1 = i.lower() | |
result2 = re.sub(r'[^\w\s]','',result1) | |
result.append(result2) | |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased | |
model_path = "checkpoint-2850" | |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'}) | |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer) | |
for sent in result: | |
pred = pipe1(sent) | |
for lab in pred: | |
if lab['label'] == 'causal': #causal | |
causal_sents.append(sent) | |
model_name = "distilbert-base-cased" | |
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name) | |
model_path1 = "DistilBertforTokenclassification" | |
model = DistilBertForTokenClassification.from_pretrained(model_path1) #len(unique_tags),, num_labels= 7, , id2label={0:'CT',1:'E',2:'C',3:'O'} | |
pipe = pipeline('ner', model=model, tokenizer=tokenizer,aggregation_strategy='simple') #grouped_entities=True | |
sentence_pred = [] | |
class_list = [] | |
entity_list = [] | |
for k in causal_sents: | |
pred= pipe(k) | |
#st.write(pred) | |
for i in pred: | |
sentence_pred.append(k) | |
class_list.append(i['word']) | |
entity_list.append(i['entity_group']) | |
#filename = 'Checkpoint-classification.sav' | |
filename = 'model.bin' | |
count_vect = CountVectorizer(ngram_range=(1,3)) | |
tfidf_transformer=TfidfTransformer() | |
#loaded_model = pickle.load(open(filename, 'rb')) | |
#loaded_model = pickle.load(open(filename, 'rb')) | |
loaded_model = joblib.load(filename) | |
#loaded_vectorizer = dill.load(open('vectorizefile_classification.pickle', 'rb')) | |
#loaded_vectorizer = pickle.load(open('vectorizefile_classification.pickle', 'rb')) | |
from sklearn.pipeline import Pipeline | |
pipeline1 = Pipeline([('count_vect',count_vect),('tfidf_transformer',tfidf_transformer)]) | |
pipeline_test_output = pipeline1.fit_transform(class_list) | |
#pipeline_test_output = loaded_vectorizer.transform(class_list) | |
predicted = loaded_model.predict(pipeline_test_output) | |
pred1 = predicted | |
level0 = [] | |
count =0 | |
for i in predicted: | |
if i == 3: | |
level0.append('Non-Performance') | |
count +=1 | |
else: | |
level0.append('Performance') | |
count +=1 | |
list_pred = {0: 'Customers',1:'Employees',2:'Investors',3:'Non-performance',4:'Society',5:'Unclassified'} | |
pred_val = [list_pred[i] for i in pred1] | |
#print('count',count) | |
for ind,(sent,preds) in enumerate(zip(class_list,pred_val)): | |
if 'customers' in sent or 'client' in sent or 'consumer' in sent or 'user' in sent: | |
pred_val[ind] = 'Customers' | |
elif 'investor' in sent or 'finance' in sent or 'shareholder' in sent or 'stockholder' in sent or 'owners' in sent: | |
pred_val[ind] = 'Investors' | |
elif 'employee' in sent or 'worker' in sent or 'staff' in sent: | |
pred_val[ind] = 'Employees' | |
elif 'society' in sent or 'societal' in sent or 'social responsib*' in sent or 'social performance' in sent or 'community' in sent: | |
pred_val[ind] = 'Society' | |
sent_id, unique = pd.factorize(sentence_pred) | |
final_list = pd.DataFrame( | |
{'Id': sent_id, | |
'Fullsentence': sentence_pred, | |
'Component': class_list, | |
'causeOrEffect': entity_list, | |
'Labellevel1': level0, | |
'Labellevel2': pred_val | |
}) | |
s = final_list['Component'].shift(-1) | |
m = s.str.startswith('##', na=False) | |
final_list.loc[m, 'Component'] += (' ' + s[m]) | |
final_list1 = final_list[~final_list['Component'].astype(str).str.startswith('##')] | |
li = [] | |
uni = final_list1['Id'].unique() | |
for i in uni: | |
df_new = final_list1[final_list1['Id'] == i] | |
uni1 = df_new['Id'].unique() | |
if 'E' not in df_new.values: | |
li.append(uni1) | |
out = np.concatenate(li).ravel() | |
li_pan = pd.DataFrame(out,columns=['Id']) | |
df3 = pd.merge(final_list1, li_pan[['Id']], on='Id', how='left', indicator=True) \ | |
.query("_merge == 'left_only'") \ | |
.drop("_merge",axis=1) | |
#df = df3.groupby(['Id','Fullsentence','causeOrEffect', 'Labellevel1', 'Labellevel2'])['Component'].apply(', '.join).reset_index() | |
#st.write(df) | |
#df = df3 | |
df3["causeOrEffect"].replace({"C": "cause", "E": "effect"}, inplace=True) | |
df_final = df3[df3['causeOrEffect'] != 'CT'] | |
df3['New string'] = df_final['Component'].replace(r'[##]+', ' ', regex=True) | |
df_final = df_final.drop("Component",axis=1) | |
df_final.insert(2, "Component", df3['New string'], True) | |
df_final1 = df_final[df_final['Component'].str.split().str.len().gt(1)] | |
#st.write(df_final[df_final['Component'].str.len() != 1]) | |
#df_final1.to_csv('predictions.csv') | |
# buffer = io.BytesIO() | |
# with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer: | |
# df_final.to_excel(writer, sheet_name="Sheet1", index=False) | |
# writer.close() | |
count_NP_NP = 0 | |
count_NP_investor = 0 | |
count_NP_customer = 0 | |
count_NP_employees = 0 | |
count_NP_society = 0 | |
count_inv_np = 0 | |
count_inv_investor = 0 | |
count_inv_customer = 0 | |
count_inv_employee = 0 | |
count_inv_society = 0 | |
count_cus_np = 0 | |
count_cus_investor = 0 | |
count_cus_customer = 0 | |
count_cus_employee = 0 | |
count_cus_society = 0 | |
count_emp_np = 0 | |
count_emp_investor = 0 | |
count_emp_customer = 0 | |
count_emp_employee = 0 | |
count_emp_society = 0 | |
count_soc_np = 0 | |
count_soc_investor = 0 | |
count_soc_customer = 0 | |
count_soc_employee = 0 | |
count_soc_society = 0 | |
for i in range(0,df_final['Id'].max()): | |
j = df_final.loc[df_final['Id'] == i] | |
cause_tab = j.loc[j['causeOrEffect'] == 'cause'] | |
effect_tab = j.loc[j['causeOrEffect'] == 'effect'] | |
cause_coun_NP = (cause_tab.Labellevel2 == 'Non-performance').sum() | |
effect_coun_NP = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
if (cause_coun_NP > 0) and (effect_coun_NP > 0): | |
count_NP = cause_coun_NP if cause_coun_NP >= effect_coun_NP else effect_coun_NP | |
else: | |
count_NP = 0 | |
effect_NP_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
if (cause_coun_NP > 0) and (effect_NP_inv > 0): | |
count_NP_inv = cause_coun_NP if cause_coun_NP >= effect_NP_inv else effect_NP_inv | |
else: | |
count_NP_inv = 0 | |
effect_NP_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
if (cause_coun_NP > 0) and (effect_NP_cus > 0): | |
count_NP_cus = cause_coun_NP if cause_coun_NP >= effect_NP_cus else effect_NP_cus | |
else: | |
count_NP_cus = 0 | |
effect_NP_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
if (cause_coun_NP > 0) and (effect_NP_emp > 0): | |
count_NP_emp = cause_coun_NP if cause_coun_NP >= effect_NP_emp else effect_NP_emp | |
else: | |
count_NP_emp = 0 | |
effect_NP_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
if (cause_coun_NP > 0) and (effect_NP_soc > 0): | |
count_NP_soc = cause_coun_NP if cause_coun_NP >= effect_NP_soc else effect_NP_soc | |
else: | |
count_NP_soc = 0 | |
cause_coun_inv = (cause_tab.Labellevel2 == 'Investors').sum() | |
effect_coun_inv = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
if (cause_coun_inv > 0) and (effect_coun_inv > 0): | |
count_NP_inv = cause_coun_inv if cause_coun_inv >= effect_coun_inv else effect_coun_inv | |
else: | |
count_NP_inv = 0 | |
effect_inv_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
if (cause_coun_inv > 0) and (effect_inv_inv > 0): | |
count_inv_inv = cause_coun_inv if cause_coun_inv >= effect_inv_inv else effect_inv_inv | |
else: | |
count_inv_inv = 0 | |
effect_inv_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
if (cause_coun_inv > 0) and (effect_inv_cus > 0): | |
count_inv_cus = cause_coun_inv if cause_coun_inv >= effect_inv_cus else effect_inv_cus | |
else: | |
count_inv_cus = 0 | |
effect_inv_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
if (cause_coun_inv > 0) and (effect_inv_emp > 0): | |
count_inv_emp = cause_coun_inv if cause_coun_inv >= effect_inv_emp else effect_inv_emp | |
else: | |
count_inv_emp = 0 | |
effect_inv_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
if (cause_coun_inv > 0) and (effect_inv_soc > 0): | |
count_inv_soc = cause_coun_inv if cause_coun_inv >= effect_inv_soc else effect_inv_soc | |
else: | |
count_inv_soc = 0 | |
cause_coun_cus = (cause_tab.Labellevel2 == 'Customers').sum() | |
effect_coun_cus = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
if (cause_coun_cus > 0) and (effect_coun_cus > 0): | |
count_NP_cus = cause_coun_cus if cause_coun_cus >= effect_coun_cus else effect_coun_cus | |
else: | |
count_NP_cus = 0 | |
effect_cus_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
if (cause_coun_cus > 0) and (effect_cus_inv > 0): | |
count_cus_inv = cause_coun_cus if cause_coun_cus >= effect_cus_inv else effect_cus_inv | |
else: | |
count_cus_inv = 0 | |
effect_cus_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
if (cause_coun_cus > 0) and (effect_cus_cus > 0): | |
count_cus_cus = cause_coun_cus if cause_coun_cus >= effect_cus_cus else effect_cus_cus | |
else: | |
count_cus_cus = 0 | |
effect_cus_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
if (cause_coun_cus > 0) and (effect_cus_emp > 0): | |
count_cus_emp = cause_coun_cus if cause_coun_cus >= effect_cus_emp else effect_cus_emp | |
else: | |
count_cus_emp = 0 | |
effect_cus_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
if (cause_coun_cus > 0) and (effect_cus_soc > 0): | |
count_cus_soc = cause_coun_cus if cause_coun_cus >= effect_cus_soc else effect_cus_soc | |
else: | |
count_cus_soc = 0 | |
cause_coun_emp = (cause_tab.Labellevel2 == 'Employees').sum() | |
effect_coun_emp = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
if (cause_coun_emp > 0) and (effect_coun_emp > 0): | |
count_NP_emp = cause_coun_emp if cause_coun_emp >= effect_coun_emp else effect_coun_emp | |
else: | |
count_NP_emp = 0 | |
effect_emp_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
if (cause_coun_emp > 0) and (effect_emp_inv > 0): | |
count_emp_inv = cause_coun_emp if cause_coun_emp >= effect_emp_inv else effect_emp_inv | |
else: | |
count_emp_inv = 0 | |
effect_emp_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
if (cause_coun_emp > 0) and (effect_emp_cus > 0): | |
count_emp_cus = cause_coun_emp if cause_coun_emp >= effect_emp_cus else effect_emp_cus | |
else: | |
count_emp_cus = 0 | |
effect_emp_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
if (cause_coun_emp > 0) and (effect_emp_emp > 0): | |
count_emp_emp = cause_coun_emp if cause_coun_emp >= effect_emp_emp else effect_emp_emp | |
else: | |
count_emp_emp = 0 | |
effect_emp_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
if (cause_coun_emp > 0) and (effect_emp_soc > 0): | |
count_emp_soc = cause_coun_emp if cause_coun_emp >= effect_emp_soc else effect_emp_soc | |
else: | |
count_emp_soc = 0 | |
cause_coun_soc = (cause_tab.Labellevel2 == 'Society').sum() | |
effect_coun_soc = (effect_tab.Labellevel2 == 'Non-performance').sum() | |
if (cause_coun_soc > 0) and (effect_coun_soc > 0): | |
count_NP_soc = cause_coun_soc if cause_coun_soc >= effect_coun_soc else effect_coun_soc | |
else: | |
count_NP_soc = 0 | |
effect_soc_inv = (effect_tab.Labellevel2 == 'Investors').sum() | |
if (cause_coun_soc > 0) and (effect_soc_inv > 0): | |
count_soc_inv = cause_coun_soc if cause_coun_soc >= effect_soc_inv else effect_soc_inv | |
else: | |
count_soc_inv = 0 | |
effect_soc_cus = (effect_tab.Labellevel2 == 'Customers').sum() | |
if (cause_coun_soc > 0) and (effect_soc_cus > 0): | |
count_soc_cus = cause_coun_soc if cause_coun_soc >= effect_soc_cus else effect_soc_cus | |
else: | |
count_soc_cus = 0 | |
effect_soc_emp = (effect_tab.Labellevel2 == 'Employees').sum() | |
if (cause_coun_soc > 0) and (effect_soc_emp > 0): | |
count_soc_emp = cause_coun_soc if cause_coun_soc >= effect_soc_emp else effect_soc_emp | |
else: | |
count_soc_emp = 0 | |
effect_soc_soc = (effect_tab.Labellevel2 == 'Society').sum() | |
if (cause_coun_soc > 0) and (effect_soc_soc > 0): | |
count_soc_soc = cause_coun_soc if cause_coun_soc >= effect_soc_soc else effect_soc_soc | |
else: | |
count_soc_soc = 0 | |
count_NP_NP = count_NP_NP + count_NP | |
count_NP_investor = count_NP_investor + count_NP_inv | |
count_NP_customer = count_NP_customer + count_NP_cus | |
count_NP_employees = count_NP_employees + count_NP_emp | |
count_NP_society = count_NP_society + count_NP_soc | |
count_inv_np = count_inv_np + count_NP_inv | |
count_inv_investor = count_inv_investor + count_inv_inv | |
count_inv_customer = count_inv_customer + count_inv_cus | |
count_inv_employee = count_inv_employee + count_inv_emp | |
count_inv_society = count_inv_society + count_inv_soc | |
count_cus_np = count_cus_np + count_NP_cus | |
count_cus_investor = count_cus_investor + count_cus_inv | |
count_cus_customer = count_cus_customer + count_cus_cus | |
count_cus_employee = count_cus_employee + count_cus_emp | |
count_cus_society = count_cus_society + count_cus_soc | |
count_emp_np = count_emp_np + count_NP_emp | |
count_emp_investor = count_emp_investor + count_emp_inv | |
count_emp_customer = count_emp_customer + count_emp_cus | |
count_emp_employee = count_emp_employee + count_emp_emp | |
count_emp_society = count_emp_society + count_emp_soc | |
count_soc_np = count_soc_np + count_NP_soc | |
count_soc_investor = count_soc_investor + count_soc_inv | |
count_soc_customer = count_soc_customer + count_soc_cus | |
count_soc_employee = count_soc_employee + count_soc_emp | |
count_soc_society = count_soc_society + count_soc_soc | |
df_tab = pd.DataFrame(columns = ['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'],index=['Non-performance', 'Investors', 'Customers', 'Employees', 'Society'], dtype=object) | |
df_tab.loc['Non-performance'] = [count_NP_NP, count_NP_investor, count_NP_customer, count_NP_employees, count_NP_society] | |
df_tab.loc['Investors'] = [count_inv_np, count_inv_investor, count_inv_customer, count_inv_employee, count_inv_society] | |
df_tab.loc['Customers'] = [count_cus_np, count_cus_investor, count_cus_customer, count_cus_employee, count_cus_society] | |
df_tab.loc['Employees'] = [count_emp_np, count_emp_investor, count_emp_customer, count_emp_employee, count_emp_society] | |
df_tab.loc['Society'] = [count_soc_np, count_soc_investor, count_soc_customer, count_soc_employee, count_soc_society] | |
# df_tab = pd.DataFrame({ | |
# 'Non-performance': [count_NP_NP, count_NP_investor, count_NP_customer, count_NP_employees, count_NP_society], | |
# 'Investors': [count_inv_np, count_inv_investor, count_inv_customer, count_inv_employee, count_inv_society], | |
# 'Customers': [count_cus_np, count_cus_investor, count_cus_customer, count_cus_employee, count_cus_society], | |
# 'Employees': [count_emp_np, count_emp_investor, count_emp_customer, count_emp_employee, count_emp_society], | |
# 'Society': [count_soc_np, count_soc_investor, count_soc_customer, count_soc_employee, count_soc_society]}, | |
# index=['Non-performance', 'Investors', 'Customers', 'Employees', 'Society']) | |
#df_tab.to_csv('final_data.csv') | |
buffer = io.BytesIO() | |
with pd.ExcelWriter(buffer, engine="xlsxwriter") as writer: | |
df_tab.to_excel(writer,sheet_name="count_result",index=False) | |
df_final1.to_excel(writer,sheet_name="Detailed_results",index=False) | |
writer.close() | |
df = pd.read_csv('final_data.csv', index_col=0) | |
#474-515 | |
# # Convert to JSON format | |
json_data = [] | |
for row in df_tab.index: | |
for col in df_tab.columns: | |
json_data.append({ | |
'source': row, | |
'target': col, | |
'value': int(df.loc[row, col]) | |
}) | |
# Write JSON to file | |
with open('ch.json', 'w+') as f: | |
json.dump(json_data, f) | |
# # repo.git_pull() | |
# # repo.git_add("ch.json") | |
# # repo.git_commit(commit_message="add ch.json :)") | |
# # repo.push() | |
# csv_file = "predictions.csv" | |
# #json_file = "smalljson.json" | |
# # Open the CSV file and read the data | |
# with open(csv_file, "r") as f: | |
# csv_data = csv.DictReader(f) | |
# # Convert the CSV data to a list of dictionaries | |
# data_list = [] | |
# for row in csv_data: | |
# data_list.append(dict(row)) | |
# # Convert the list of dictionaries to JSON | |
# json_data = json.dumps(data_list) | |
# # Write the JSON data to a file | |
# #with open("smalljson.json", "r+") as fi: | |
# #data = fi.read() | |
# #fi.seek(0) | |
# with open('smalljson.json','w+') as fi: | |
# # data = json.load(fi) | |
# #st.write(data) | |
# # fi.seek(0) | |
# fi.write(json_data) | |
# fi.truncate() | |
# file_jso.truncate(0) | |
# file_jso.write(json_data) | |
# st.write('after',file_jso.read()) | |
# file_jso.close() | |
# api.upload_file( | |
# path_or_fileobj=json_data, | |
# path_in_repo="ch.json", | |
# repo_id="Seetha/IMA-pipeline-streamlit", | |
# repo_type="dataset", | |
# token="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", | |
# ) | |
#dataset = load_dataset('json', data_files='smalljson.json') | |
def convert_df(df): | |
#IMPORTANT: Cache the conversion to prevent computation on every rerun | |
return df.to_csv().encode('utf-8') | |
csv1 = convert_df(df_final1.astype(str)) | |
csv2 = convert_df(df_tab.astype(str)) | |
with st.container(): | |
#st.download_button(label="Download the detailed result table_csv",data=csv1,file_name='results.csv',mime='text/csv') | |
# st.download_button(label="Download the result table_csv",data=csv2,file_name='final_data.csv',mime='text/csv') | |
#st.download_button(label="Download the detailed result table",data=buffer,file_name="df_final.xlsx",mime="application/vnd.ms-excel") | |
st.download_button(label="Download the result table",data=buffer,file_name="t2cg_outputs.xlsx",mime="application/vnd.ms-excel") | |
# repo_dir = 'IMA-pipeline-streamlit' | |
# repo = Repo(repo_dir) | |
# file_list = [ | |
# '/app/ima-pipeline-streamlit/results.csv', | |
# '/app/ima-pipeline-streamlit/final_data.csv' | |
# ] | |
# commit_message = 'Add the generated files to Github' | |
# repo.index.add(file_list) | |
# repo.index.commit(commit_message) | |
# origin = repo.remote('origin') | |
# origin.push() | |
# # LINK TO THE CSS FILE | |
# def tree_css(file_name): | |
# with open('tree.css')as f: | |
# st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True) | |
# def div_css(file_name): | |
# with open('div.css')as f: | |
# st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True) | |
# def side_css(file_name): | |
# with open('side.css')as f: | |
# st.markdown(f"<style>{f.read()}</style>", unsafe_allow_html = True) | |
# tree_css('tree.css') | |
# div_css('div.css') | |
# side_css('side.css') | |
# STREAMLIT_STATIC_PATH = pathlib.Path(st.__path__[0]) / 'static' | |
# CSS_PATH = (STREAMLIT_STATIC_PATH / "css") | |
# if not CSS_PATH.is_dir(): | |
# CSS_PATH.mkdir() | |
# # css_file = CSS_PATH / "tree.css" | |
# # css_file1 = CSS_PATH / "div.css" | |
# # css_file2 = CSS_PATH / "side.css" | |
# css_file2 = CSS_PATH / "ch.json" | |
# if not css_file2.exists(): | |
# shutil.copy("css/ch.json", css_file2) | |
# shutil.copy("assets/css/tree.css", css_file) | |
# shutil.copy("assets/css/div.css", css_file1) | |
# shutil.copy("assets/css/side.css", css_file2) | |
# HtmlFile = open("index.html", 'r', encoding='utf-8') | |
# source_code = HtmlFile.read() | |
# #print(source_code) | |
# components.html(source_code) | |
# # # Define your javascript | |
# my_js = """ | |
# alert("Hello World"); | |
# """ | |
# Wrapt the javascript as html code | |
#my_html = f"<script>{my_js}</script>" | |
# with st.container(): | |
# # Execute your app | |
# st.title("Visualization example") | |
# # components.html(source_code) | |
# #html(my_html) | |
# #webbrowser.open('https://webpages.charlotte.edu/ltotapal/') | |
# # embed streamlit docs in a streamlit app | |
# #components.iframe("https://webpages.charlotte.edu/ltotapal/") | |
# st.markdown('<a href="https://webpages.charlotte.edu/ltotapal/" target="_self">Text to Knowledge graph link</a>', unsafe_allow_html=True) | |
if __name__ == '__main__': | |
main() | |