Spaces:
Paused
Paused
File size: 401 Bytes
2e748b3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 |
from pathlib import Path
from pdfminer.high_level import extract_text
PDF_DIR = "data"
OUT_DIR = "converted_txt"
Path(OUT_DIR).mkdir(exist_ok=True)
for pdf_path in Path(PDF_DIR).glob("*.pdf"):
text = extract_text(str(pdf_path))
output_file = Path(OUT_DIR) / (pdf_path.stem + ".txt")
output_file.write_text(text, encoding="utf-8")
print(f"β
{pdf_path.name} β {output_file.name}")
|