Seetha commited on
Commit
a005e0a
1 Parent(s): 2381533

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -3
app.py CHANGED
@@ -108,7 +108,6 @@ def main():
108
  for page in reader.pages:
109
  text = page.extract_text()
110
  text_list.append(text)
111
- st.write('PDF pages read')
112
  else:
113
  st.error("Please upload your own PDF to be analyzed")
114
  st.stop()
@@ -118,20 +117,20 @@ def main():
118
 
119
  sentences = nltk.sent_tokenize(text_list_final)
120
 
121
- st.write('tokeznization completed')
122
  result =[]
123
  for i in sentences:
124
  result1 = i.lower()
125
  result2 = re.sub(r'[^\w\s]','',result1)
126
  result.append(result2)
127
 
128
- print("--- %s seconds ---" % (time.time() - start_time))
129
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
130
 
131
  model_path = "checkpoint-2850"
132
 
133
  model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
134
 
 
135
  pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
136
  for sent in result:
137
  pred = pipe1(sent)
 
108
  for page in reader.pages:
109
  text = page.extract_text()
110
  text_list.append(text)
 
111
  else:
112
  st.error("Please upload your own PDF to be analyzed")
113
  st.stop()
 
117
 
118
  sentences = nltk.sent_tokenize(text_list_final)
119
 
 
120
  result =[]
121
  for i in sentences:
122
  result1 = i.lower()
123
  result2 = re.sub(r'[^\w\s]','',result1)
124
  result.append(result2)
125
 
126
+ st.write("--- %s seconds ---" % (time.time() - start_time))
127
  tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
128
 
129
  model_path = "checkpoint-2850"
130
 
131
  model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
132
 
133
+ st.write('base sequence classification loaded')
134
  pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
135
  for sent in result:
136
  pred = pipe1(sent)