Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -108,7 +108,6 @@ def main():
|
|
108 |
for page in reader.pages:
|
109 |
text = page.extract_text()
|
110 |
text_list.append(text)
|
111 |
-
st.write('PDF pages read')
|
112 |
else:
|
113 |
st.error("Please upload your own PDF to be analyzed")
|
114 |
st.stop()
|
@@ -118,20 +117,20 @@ def main():
|
|
118 |
|
119 |
sentences = nltk.sent_tokenize(text_list_final)
|
120 |
|
121 |
-
st.write('tokeznization completed')
|
122 |
result =[]
|
123 |
for i in sentences:
|
124 |
result1 = i.lower()
|
125 |
result2 = re.sub(r'[^\w\s]','',result1)
|
126 |
result.append(result2)
|
127 |
|
128 |
-
|
129 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
130 |
|
131 |
model_path = "checkpoint-2850"
|
132 |
|
133 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
134 |
|
|
|
135 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
136 |
for sent in result:
|
137 |
pred = pipe1(sent)
|
|
|
108 |
for page in reader.pages:
|
109 |
text = page.extract_text()
|
110 |
text_list.append(text)
|
|
|
111 |
else:
|
112 |
st.error("Please upload your own PDF to be analyzed")
|
113 |
st.stop()
|
|
|
117 |
|
118 |
sentences = nltk.sent_tokenize(text_list_final)
|
119 |
|
|
|
120 |
result =[]
|
121 |
for i in sentences:
|
122 |
result1 = i.lower()
|
123 |
result2 = re.sub(r'[^\w\s]','',result1)
|
124 |
result.append(result2)
|
125 |
|
126 |
+
st.write("--- %s seconds ---" % (time.time() - start_time))
|
127 |
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") #bert-base-uncased
|
128 |
|
129 |
model_path = "checkpoint-2850"
|
130 |
|
131 |
model = AutoModelForSequenceClassification.from_pretrained(model_path,id2label={0:'non-causal',1:'causal'})
|
132 |
|
133 |
+
st.write('base sequence classification loaded')
|
134 |
pipe1 = pipeline("text-classification", model=model,tokenizer=tokenizer)
|
135 |
for sent in result:
|
136 |
pred = pipe1(sent)
|