Spaces:

Iskabore
/

multilingual-text-summarizer

Sleeping

create summarizer module

9057a10 7 months ago

1.42 kB

	import PyPDF2
	import os
	from langdetect import detect


	def read_txt_file(filepath: str) -> str:
	"""Read content from a .txt file."""
	try:
	with open(filepath, "r", encoding="utf-8") as f:
	content = f.read()
	return content
	except Exception as e:
	print(f"❌ Error reading TXT file: {e}")
	return ""


	def read_pdf_file(filepath: str) -> str:
	"""Extract text from a PDF file using PyPDF2."""
	try:
	with open(filepath, "rb") as file:
	pdf_reader = PyPDF2.PdfReader(file)
	text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	text += page.extract_text()
	return text.strip()
	except Exception as e:
	print(f"❌ Error reading PDF file: {e}")
	return ""


	def read_file(filepath: str) -> str:
	"""Read a file (txt or pdf) and return its content as text."""
	if os.path.splitext(filepath)[1].lower() == ".txt":
	return read_txt_file(filepath)
	elif os.path.splitext(filepath)[1].lower() == ".pdf":
	return read_pdf_file(filepath)
	else:
	print(f"❌ Unsupported file type: {filepath}")
	return ""


	def detect_language(text: str) -> str:
	"""Detect the language of the given text using langdetect."""
	try:
	return detect(text)
	except Exception:
	return "unknown"