HuggingDocsLLM / backend_utils /file_handlers.py
Jofthomas's picture
Jofthomas HF staff
initial commit
88768cb
from abc import ABC, abstractmethod
from PyPDF2 import PdfReader
class FileHandler(ABC):
"""Abstract base class for file handlers."""
@abstractmethod
def read_file(self, file):
"""Read the file and extract the text.
Parameters:
file (UploadedFile): The file to read.
Returns:
str: The extracted text.
"""
pass
class PDFHandler(FileHandler):
def read_file(self, file):
try:
pdf_reader = PdfReader(file)
text = ""
for page in pdf_reader.pages:
page_text = page.extract_text()
if page_text:
text += page_text
return text
except Exception as e:
print(f"Error reading file: {e}")
return "" # return an empty string if an error occurs
class FileHandlerFactory:
@staticmethod
def get_file_handler(file_type):
if file_type == "application/pdf":
return PDFHandler()
else:
raise ValueError("Invalid file type")