File size: 742 Bytes
e539f46
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
from PyPDF2 import PdfReader
from io import BytesIO
import logging

logger = logging.getLogger(__name__)

def parse_pdf(file_content):
    try:
        reader = PdfReader(BytesIO(file_content))
        text = ""
        for page in reader.pages:
            extracted = page.extract_text()
            if extracted:
                text += extracted
        return text
    except Exception as e:
        logger.error(f"PDF parse error: {e}")
        return ""

def parse_text(file_content):
    try:
        return file_content.decode("utf-8")
    except UnicodeDecodeError:
        try:
            return file_content.decode("latin1")
        except Exception as e:
            logger.error(f"Text parse error: {e}")
            return ""