PaperShow / Paper2Poster /get_poster_text.py
ZaynZhu
Clean version without large assets
7c08dc3
from utils.poster_eval_utils import get_poster_text
from pathlib import Path
import argparse
import os
def write_poster_texts(root_folder: str | Path) -> dict:
"""
For each immediate subfolder of `root_folder`, look for a file named `poster.png`.
If found, call `get_poster_text(path)` and save the returned text to `poster_text.md`
in that same subfolder.
Returns a summary dict with counts and any errors.
"""
root = Path(root_folder)
processed = 0
missing = 0
errors: list[tuple[Path, str]] = []
for subdir in root.iterdir():
if not subdir.is_dir():
continue
if os.path.exists(subdir / "poster_text.md"):
print(f"Skipping {subdir.name} as poster_text.md already exists.")
continue
print(f"Processing {subdir.name}...")
poster_path = subdir / "poster.png"
if not poster_path.exists():
print(f"Missing poster.png in {subdir.name}.")
missing += 1
continue
try:
text = get_poster_text(poster_path, False) # assumes this function is available
out_path = subdir / "poster_text.md"
# Ensure we always write UTF-8 with a trailing newline.
Path(out_path).write_text((text or "").rstrip() + "\n", encoding="utf-8")
processed += 1
except Exception as e: # keep going even if one folder fails
errors.append((poster_path, str(e)))
return {
"processed": processed,
"missing_poster_png": missing,
"errors": errors,
}
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Extract poster texts from images.")
parser.add_argument("--root_folder", type=str, help="Root folder containing subfolders with posters.")
args = parser.parse_args()
result = write_poster_texts(args.root_folder)
print(f"Processed {result['processed']} posters.")
print(f"Missing poster.png files: {result['missing_poster_png']}")
if result['errors']:
print("Errors encountered:")
for path, error in result['errors']:
print(f" {path}: {error}")