Spaces:

tronskel
/

interview_question

No application file

Upload 46 files

2875866 verified 4 months ago

581 Bytes

	import PyPDF2
	from docx import Document

	def extract_text_from_file(uploaded_file):
	"""Handle PDF and DOCX file parsing"""
	text = ""

	if uploaded_file.type == "application/pdf":
	pdf_reader = PyPDF2.PdfReader(uploaded_file)
	text = "\n".join([page.extract_text() for page in pdf_reader.pages])

	elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
	doc = Document(uploaded_file)
	text = "\n".join([para.text for para in doc.paragraphs])

	return text