File size: 742 Bytes
e539f46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
from PyPDF2 import PdfReader
from io import BytesIO
import logging
logger = logging.getLogger(__name__)
def parse_pdf(file_content):
try:
reader = PdfReader(BytesIO(file_content))
text = ""
for page in reader.pages:
extracted = page.extract_text()
if extracted:
text += extracted
return text
except Exception as e:
logger.error(f"PDF parse error: {e}")
return ""
def parse_text(file_content):
try:
return file_content.decode("utf-8")
except UnicodeDecodeError:
try:
return file_content.decode("latin1")
except Exception as e:
logger.error(f"Text parse error: {e}")
return ""
|