ArchitSharma
commited on
Commit
•
379c547
1
Parent(s):
687dee7
Update utils.py
Browse files
utils.py
CHANGED
@@ -2,7 +2,7 @@ import re
|
|
2 |
import requests
|
3 |
import docx2txt
|
4 |
from io import StringIO
|
5 |
-
from PyPDF2 import
|
6 |
|
7 |
from bs4 import BeautifulSoup
|
8 |
from nltk.tokenize import sent_tokenize
|
@@ -99,7 +99,7 @@ def preprocess_text_for_abstractive_summarization(tokenizer, text):
|
|
99 |
|
100 |
|
101 |
def read_pdf(file):
|
102 |
-
pdfReader =
|
103 |
count = pdfReader.numPages
|
104 |
all_page_text = ""
|
105 |
for i in range(count):
|
|
|
2 |
import requests
|
3 |
import docx2txt
|
4 |
from io import StringIO
|
5 |
+
from PyPDF2 import PdfReader
|
6 |
|
7 |
from bs4 import BeautifulSoup
|
8 |
from nltk.tokenize import sent_tokenize
|
|
|
99 |
|
100 |
|
101 |
def read_pdf(file):
|
102 |
+
pdfReader = PdfReader(file)
|
103 |
count = pdfReader.numPages
|
104 |
all_page_text = ""
|
105 |
for i in range(count):
|