Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	| # text_extractor.py | |
| import os | |
| import docx2txt | |
| import PyPDF2 | |
| def extract_text_from_file(file_path): | |
| ext = os.path.splitext(file_path)[1].lower() | |
| if ext == ".pdf": | |
| try: | |
| with open(file_path, "rb") as f: | |
| reader = PyPDF2.PdfReader(f) | |
| return " ".join([page.extract_text() or "" for page in reader.pages]) | |
| except: | |
| return "[Error extracting PDF text]" | |
| elif ext == ".docx": | |
| try: | |
| return docx2txt.process(file_path) | |
| except: | |
| return "[Error extracting DOCX text]" | |
| else: | |
| return "[Unsupported file type]" | |
