Seetha commited on
Commit
dda76b7
1 Parent(s): 4d0fc21

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -16
app.py CHANGED
@@ -8,10 +8,7 @@ from sklearn.model_selection import KFold
8
  from transformers import AutoTokenizer, DistilBertTokenizerFast
9
  # sequence tagging model + training-related
10
  from transformers import DistilBertForTokenClassification, Trainer, TrainingArguments
11
- import numpy as np
12
- import pandas as pd
13
  import torch
14
- import json
15
  import sys
16
  import os
17
  from sklearn.metrics import classification_report
@@ -22,28 +19,22 @@ from sklearn.feature_extraction.text import TfidfTransformer
22
  from sklearn.feature_extraction.text import CountVectorizer
23
  from sklearn.pipeline import Pipeline, FeatureUnion
24
  import math
25
- from sklearn.metrics import accuracy_score
26
- from sklearn.metrics import precision_recall_fscore_support
27
- from sklearn.model_selection import train_test_split
28
  import json
29
  import re
30
  import numpy as np
31
  import pandas as pd
32
- import re
33
  import nltk
34
  nltk.download("punkt")
35
  import string
36
  from sklearn.model_selection import train_test_split
37
  from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoConfig
38
- import torch
39
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
40
  import itertools
41
- import json
42
- import glob
43
  from transformers import TextClassificationPipeline, TFAutoModelForSequenceClassification, AutoTokenizer
44
  from transformers import pipeline
45
- import pickle
46
- import urllib.request
47
  import csv
48
  import pdfplumber
49
  import pathlib
@@ -55,6 +46,7 @@ from PyPDF2 import PdfReader
55
  from huggingface_hub import HfApi
56
  import io
57
  from datasets import load_dataset
 
58
 
59
  import huggingface_hub
60
  from huggingface_hub import Repository
@@ -62,8 +54,8 @@ from datetime import datetime
62
  import pathlib as Path
63
  from requests import get
64
  import urllib.request
65
- import gradio as gr
66
- from gradio import inputs, outputs
67
  from datasets import load_dataset
68
  from huggingface_hub import HfApi, list_models
69
  import os
@@ -130,7 +122,8 @@ def main():
130
  result1 = i.lower()
131
  result2 = re.sub(r'[^\w\s]','',result1)
132
  result.append(result2)
133
-
 
134
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
135
 
136
  model_path = "checkpoint-2850"
@@ -144,6 +137,9 @@ def main():
144
  if lab['label'] == 'causal': #causal
145
  causal_sents.append(sent)
146
 
 
 
 
147
  model_name = "distilbert-base-cased"
148
  tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
149
 
@@ -165,7 +161,10 @@ def main():
165
  sentence_pred.append(k)
166
  class_list.append(i['word'])
167
  entity_list.append(i['entity_group'])
168
-
 
 
 
169
  # filename = 'Checkpoint-classification.sav'
170
  # loaded_model = pickle.load(open(filename, 'rb'))
171
  # loaded_vectorizer = pickle.load(open('vectorizefile_classification.pickle', 'rb'))
@@ -191,6 +190,9 @@ def main():
191
 
192
  predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
193
  predicted = np.argmax(predictions,axis=1)
 
 
 
194
  pred1 = predicted
195
  level0 = []
196
  count =0
@@ -574,4 +576,5 @@ def main():
574
 
575
 
576
  if __name__ == '__main__':
 
577
  main()
 
8
  from transformers import AutoTokenizer, DistilBertTokenizerFast
9
  # sequence tagging model + training-related
10
  from transformers import DistilBertForTokenClassification, Trainer, TrainingArguments
 
 
11
  import torch
 
12
  import sys
13
  import os
14
  from sklearn.metrics import classification_report
 
19
  from sklearn.feature_extraction.text import CountVectorizer
20
  from sklearn.pipeline import Pipeline, FeatureUnion
21
  import math
22
+ # from sklearn.metrics import accuracy_score
23
+ # from sklearn.metrics import precision_recall_fscore_support
 
24
  import json
25
  import re
26
  import numpy as np
27
  import pandas as pd
 
28
  import nltk
29
  nltk.download("punkt")
30
  import string
31
  from sklearn.model_selection import train_test_split
32
  from transformers import AutoTokenizer, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoConfig
 
33
  from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
34
  import itertools
 
 
35
  from transformers import TextClassificationPipeline, TFAutoModelForSequenceClassification, AutoTokenizer
36
  from transformers import pipeline
37
+ import pickle
 
38
  import csv
39
  import pdfplumber
40
  import pathlib
 
46
  from huggingface_hub import HfApi
47
  import io
48
  from datasets import load_dataset
49
+ import time
50
 
51
  import huggingface_hub
52
  from huggingface_hub import Repository
 
54
  import pathlib as Path
55
  from requests import get
56
  import urllib.request
57
+ # import gradio as gr
58
+ # from gradio import inputs, outputs
59
  from datasets import load_dataset
60
  from huggingface_hub import HfApi, list_models
61
  import os
 
122
  result1 = i.lower()
123
  result2 = re.sub(r'[^\w\s]','',result1)
124
  result.append(result2)
125
+
126
+ print("--- %s seconds ---" % (time.time() - start_time))
127
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
128
 
129
  model_path = "checkpoint-2850"
 
137
  if lab['label'] == 'causal': #causal
138
  causal_sents.append(sent)
139
 
140
+ st.write('causal sentence classification finished')
141
+ st.write("--- %s seconds ---" % (time.time() - start_time))
142
+
143
  model_name = "distilbert-base-cased"
144
  tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
145
 
 
161
  sentence_pred.append(k)
162
  class_list.append(i['word'])
163
  entity_list.append(i['entity_group'])
164
+
165
+ st.write('causality extraction finished')
166
+ st.write("--- %s seconds ---" % (time.time() - start_time))
167
+
168
  # filename = 'Checkpoint-classification.sav'
169
  # loaded_model = pickle.load(open(filename, 'rb'))
170
  # loaded_vectorizer = pickle.load(open('vectorizefile_classification.pickle', 'rb'))
 
190
 
191
  predictions = loaded_model.predict(pad_sequences(tokenizer.texts_to_sequences(class_list),maxlen=MAX_SEQUENCE_LENGTH))
192
  predicted = np.argmax(predictions,axis=1)
193
+
194
+ st.write('stakeholder taxonomy finished')
195
+ st.write("--- %s seconds ---" % (time.time() - start_time))
196
  pred1 = predicted
197
  level0 = []
198
  count =0
 
576
 
577
 
578
  if __name__ == '__main__':
579
+ start_time = time.time()
580
  main()