Spaces:

dohyune
/

boxlabel

Build error

App Files Files Community

dohyune commited on Aug 28, 2025

Commit

fbfae11

verified ·

1 Parent(s): 715aec7

Update app.py

Browse files

Files changed (1) hide show

app.py +223 -204

app.py CHANGED Viewed

@@ -1,221 +1,240 @@
 import streamlit as st
 import pandas as pd
-import io, zipfile, re, html, json
-st.set_page_config(page_title="📦 박스라벨 자동 생성기 (HWPX 필드 평문화)", layout="wide")
-st.title("📦 박스라벨 자동 생성기 — HWPX **필드 제거/평문화 방식**")
-# ================= 공통 유틸 =================
-def compute_year_range(series: pd.Series) -> str:
     s = series.astype(str).fillna("")
-    valid = s[~s.isin(["", "0", "0000"])]
-    if len(valid) == 0:
         return "0000-0000"
-    valid_int = pd.to_numeric(valid, errors="coerce").dropna().astype(int)
-    if len(valid_int) == 0:
         return "0000-0000"
-    return f"{valid_int.min():04d}-{valid_int.max():04d}"
-def build_merged_df(df: pd.DataFrame) -> pd.DataFrame:
     df = df.copy()
     df["박스번호"] = df["박스번호"].astype(str).str.zfill(4)
-    if "제목" in df.columns:
-        df["제목"] = df["제목"].astype(str)
-    # 생산연도(범위) = 종료연도 그룹 범위
     if "종료연도" in df.columns:
-        prod_df = df.groupby("박스번호")["종료연도"].apply(compute_year_range).reset_index()
-        prod_df.columns = ["박스번호", "생산연도"]
     else:
-        prod_df = pd.DataFrame({"박스번호": df["박스번호"].unique(), "생산연도": "0000-0000"})
-    # 목록(관리번호 + 제목)
     has_mgmt = "관리번호" in df.columns
     list_rows = []
     for box, g in df.groupby("박스번호"):
-        lines = [f"- {r['관리번호']} {r['제목']}" if has_mgmt else f"- {r['제목']}"
                  for _, r in g.iterrows()]
-        list_rows.append({"박스번호": box, "목록": "\r\n".join(lines)})
     list_df = pd.DataFrame(list_rows)
-    meta_cols = ["박스번호","종료연도","보존기간","단위업무","기록물철","제목"]
-    meta_exist = [c for c in meta_cols if c in df.columns]
-    meta_df = df.groupby("박스번호", as_index=False).first()[meta_exist] if meta_exist \
-              else pd.DataFrame({"박스번호": df["박스번호"].unique()})
-    return meta_df.merge(list_df, on="박스번호", how="left").merge(prod_df, on="박스번호", how="left")
-def _runs_plain(text: str) -> str:
-    return f"<hp:run><hp:t>{html.escape('' if text is None else str(text))}</hp:t></hp:run>"
-def _runs_list(text: str) -> str:
-    if text is None: return ""
-    lines = str(text).replace("\r\n", "\n").split("\n")
-    parts = []
-    for i, ln in enumerate(lines):
-        if i > 0:
-            parts.append("<hp:lineBreak/>")
-        parts.append(f"<hp:run><hp:t>{html.escape(ln)}</hp:t></hp:run>")
-    return "".join(parts)
-# =============== HWPX 쓰기 (mimetype 맨앞/무압축) ===============
-def write_hwpx_like_src(zin: zipfile.ZipFile, writer_fn) -> bytes:
-    out = io.BytesIO()
-    zout = zipfile.ZipFile(out, "w")
-    if "mimetype" in zin.namelist():
-        zi = zipfile.ZipInfo("mimetype")
-        zi.compress_type = zipfile.ZIP_STORED
-        zout.writestr(zi, zin.read("mimetype"))
-    for e in zin.infolist():
-        if e.filename == "mimetype":
-            continue
-        data = zin.read(e.filename)
-        if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
-            try:
-                s = data.decode("utf-8", errors="ignore")
-                s2 = writer_fn(e.filename, s)
-                data = s2.encode("utf-8")
-            except Exception:
-                pass
-        zi = zipfile.ZipInfo(e.filename)
-        zi.compress_type = zipfile.ZIP_DEFLATED
-        zout.writestr(zi, data)
-    zout.close(); out.seek(0)
-    return out.getvalue()
-# =============== 필드 평문화(제거) 치환 ===============
-# 한글은 필드가 보통 이렇게 들어갑니다:
-# <hp:run> ... <hp:fieldBegin name="키" .../> ... </hp:run>
-#  (중간에 여러 run/텍스트)
-# <hp:run> ... <hp:fieldEnd/> ... </hp:run>
-# => 아래 정규식으로 "fieldBegin run ~ fieldEnd run" 전체를 값 run들로 대체합니다.
-FIELD_RANGE_RE_TMPL = (
-    r'(<hp:run[^>]*>[^<]*'
-    r'<hp:fieldBegin[^>]*name="{name}"[^>]*/>'
-    r'.*?</hp:run>)'
-    r'(.*?)'
-    r'(<hp:run[^>]*>.*?<hp:fieldEnd[^>]*/>.*?</hp:run>)'
-)
-def apply_field_flatten(hwpx_bytes: bytes, mapping: dict, collect_debug=False):
-    dbg = {"mode":"field-flatten","files_touched":[], "field_hits":{}} if collect_debug else None
-    zin = zipfile.ZipFile(io.BytesIO(hwpx_bytes), "r")
-    # 실제 존재하는 name만 추출
-    present = set()
-    for e in zin.infolist():
-        if e.filename.startswith("Contents/") and e.filename.endswith(".xml"):
-            try:
-                s = zin.read(e.filename).decode("utf-8", errors="ignore")
-                for k in mapping.keys():
-                    if f'name="{k}"' in s:
-                        present.add(k)
-            except:
-                pass
-    def writer(fname: str, xml: str) -> str:
-        changed = False
-        for k in present:
-            val = mapping.get(k, "")
-            is_list = bool(re.match(r"^(목록|list)\d+$", k, re.IGNORECASE))
-            replacement_runs = _runs_list(val) if is_list else _runs_plain(val)
-            pat = re.compile(FIELD_RANGE_RE_TMPL.format(name=re.escape(k)), re.DOTALL)
-            xml2, n = pat.subn(replacement_runs, xml)
-            if n:
-                changed = True
-                xml = xml2
-                if dbg: dbg["field_hits"][k] = dbg["field_hits"].get(k, 0) + 1
-        if changed and dbg and fname not in dbg["files_touched"]:
-            dbg["files_touched"].append(fname)
-        return xml
-    out = write_hwpx_like_src(zin, writer)
-    zin.close()
-    return (out, dbg) if collect_debug else (out, None)
-# ================= UI =================
-with st.expander("사용법", expanded=True):
-    st.markdown("""
-- 템플릿은 **한글 필드컨트롤**이어야 합니다. (예: `name="박스번호1"`)
-- 이 앱은 필드 구간을 **평문화(필드 제거)** 하여 값 run들로 바꿉니다. → 한글 뷰어에서 **항상 보임**.
-- 라벨 한 페이지에 N개면, 필드명은 `박스번호1..N`, `종료연도1..N`, `보존기간1..N`, `단위업무1..N`, `기록물철1..N`, `목록1..N`.
-""")
-tpl_file = st.file_uploader("📄 HWPX 템플릿 업로드", type=["hwpx"])
-batch_size = st.number_input("템플릿의 라벨 세트 개수 (한 페이지 N개)", min_value=1, max_value=12, value=3, step=1)
-data_file = st.file_uploader("📊 데이터 업로드 (Excel/CSV)", type=["xlsx","xls","csv"])
-if tpl_file and data_file:
-    tpl_bytes = tpl_file.read()
-    df = pd.read_csv(data_file) if data_file.name.lower().endswith(".csv") else pd.read_excel(data_file)
-    if "박스번호" not in df.columns:
-        st.error("❌ 필수 컬럼 '박스번호'가 없습니다.")
-        st.stop()
-    st.success("✅ 위치 매핑 완료 (엑셀 측)")
     st.dataframe(df.head(10), use_container_width=True)
-    merged = build_merged_df(df)
-    box_list = merged["박스번호"].astype(str).str.zfill(4).unique().tolist()
-    st.subheader("🔎 업로드된 박스번호 목록")
-    st.write(f"총 **{len(box_list)}**개")
-    st.dataframe(pd.DataFrame({"박스번호": box_list}), use_container_width=True, height=240)
-    selected = st.multiselect("생성할 박스번호 선택 (비우면 전체 생성)", options=box_list)
-    work = merged[merged["박스번호"].isin(selected)] if selected else merged
-    rows = work.sort_values("박스번호").to_dict(orient="records")
-    # 1페이지 프리뷰
-    st.subheader("🧪 1페이지 매핑 프리뷰")
-    keys = ["박스번호","종료연도","보존기간","단위업무","기록물철","목록"]
-    n = int(batch_size)
-    preview = {}
-    for i in range(n):
-        if i < len(rows):
-            r = rows[i]
-            for k in keys:
-                preview[f"{k}{i+1}"] = r.get("생산연도","") if k=="종료연도" else r.get(k,"")
-        else:
-            for k in keys:
-                preview[f"{k}{i+1}"] = ""
-    st.dataframe(
-        pd.DataFrame([{"필드명":k, "값 앞부분":str(v)[:120]} for k,v in sorted(preview.items())]),
-        use_container_width=True, height=320
-    )
-    if st.button("🚀 라벨 생성 (페이지별 HWPX ZIP)"):
-        mem_zip = io.BytesIO()
-        zout = zipfile.ZipFile(mem_zip, "w", zipfile.ZIP_DEFLATED)
-        pages = (len(rows) + n - 1) // n
-        all_dbg = []
-        for p in range(pages):
-            chunk = rows[p*n:(p+1)*n]
-            mapping = {}
-            for i in range(n):
-                if i < len(chunk):
-                    r = chunk[i]
-                    for k in keys:
-                        mapping[f"{k}{i+1}"] = r.get("생산연도","") if k=="종료연도" else r.get(k,"")
-                else:
-                    for k in keys:
-                        mapping[f"{k}{i+1}"] = ""
-            out_hwpx, dbg = apply_field_flatten(tpl_bytes, mapping, collect_debug=True)
-            all_dbg.append({"page": p+1, "stats": dbg})
-            name = "_".join([r.get("박스번호","") for r in chunk]) if chunk else f"empty_{p+1}"
-            zout.writestr(f"label_{name}.hwpx", out_hwpx)
-        zout.close(); mem_zip.seek(0)
-        st.download_button("⬇️ ZIP 다운로드", data=mem_zip, file_name="labels_by_page.zip", mime="application/zip")
-        st.download_button("⬇️ 디버그(JSON)", data=json.dumps(all_dbg, ensure_ascii=False, indent=2),
-                           file_name="debug.json", mime="application/json")
-st.caption("필드 구간을 통째로 값 run들로 교체합니다. (필드 제거 → 값이 확실히 보입니다)")

 import streamlit as st
 import pandas as pd
+from reportlab.pdfgen import canvas
+from reportlab.pdfbase import pdfmetrics
+from reportlab.pdfbase.ttfonts import TTFont
+from reportlab.lib.pagesizes import A4
+from reportlab.lib.units import mm
+from io import BytesIO
+import math
+st.set_page_config(page_title="📦 박스라벨 PDF 출력기", layout="wide")
+st.title("📦 박스라벨 PDF 출력기 (라벨 규격 커스텀 / 한국어 폰트 업로드)")
+with st.expander("사용 방법", expanded=True):
+    st.markdown("""
+1. **엑셀/CSV 업로드** → 필수 컬럼: `박스번호` / 권장: `종료연도`, `보존기간`, `단위업무`, `기록물철`, `제목`, `관리번호`
+2. (선택) **TTF 폰트 업로드**(예: 나눔고딕, 본고딕, 맑은 고딕 등). 업로드 안 하면 기본 폰트 사용(영문 위주).
+3. **라벨 규격**(페이지 여백, 라벨 가로/세로, 행/열, 라벨 간격)을 입력.
+4. **텍스트 배치**(라벨 안쪽 패딩, 폰트 크기, 줄 간격 등) 조정.
+5. **PDF 생성** → 라벨 용지(Formtec 등)에 인쇄.
+    """)
+# -----------------
+# 데이터 로드
+# -----------------
+file = st.file_uploader("📊 데이터 업로드 (Excel/CSV)", type=["xlsx","xls","csv"])
+df = None
+if file:
+    if file.name.lower().endswith(".csv"):
+        df = pd.read_csv(file)
+    else:
+        df = pd.read_excel(file)
+# 필수 컬럼 검사
+if df is not None and "박스번호" not in df.columns:
+    st.error("❌ 필수 컬럼 '박스번호'가 없습니다.")
+    st.stop()
+# -----------------
+# 폰트 설정
+# -----------------
+st.subheader("🔤 폰트 설정")
+font_file = st.file_uploader("한국어 폰트(TTF) 업로드 (예: NanumGothic.ttf / MalgunGothic.ttf)", type=["ttf"])
+font_name = "BaseFont"
+if font_file:
+    try:
+        font_bytes = font_file.read()
+        # 메모리 등록: ReportLab은 파일 경로가 필요 → 임시 파일 만들기보다 메모리 레지스터 트릭
+        # 하지만 TTFont는 파일 경로 요구 → 임시파일 저장
+        import tempfile
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".ttf")
+        tmp.write(font_bytes); tmp.flush()
+        pdfmetrics.registerFont(TTFont("UserKorean", tmp.name))
+        font_name = "UserKorean"
+        st.success("✅ 폰트 등록 완료: UserKorean")
+    except Exception as e:
+        st.warning(f"폰트 등록 실패. 기본 폰트 사용합니다. (사유: {e})")
+else:
+    # 내장 기본 폰트 (영문 중심)
+    font_name = "Helvetica"
+# -----------------
+# 라벨/페이지 레이아웃
+# -----------------
+st.subheader("📐 라벨 규격 (mm 단위)")
+colA, colB, colC = st.columns(3)
+with colA:
+    page_size = st.selectbox("페이지 크기", ["A4"], index=0)
+with colB:
+    margin_left = st.number_input("왼쪽 여백(mm)", 5.0, 50.0, 10.0, 0.5)
+    margin_top = st.number_input("상단 여백(mm)", 5.0, 50.0, 10.0, 0.5)
+with colC:
+    rows = st.number_input("행 수", 1, 20, 10, 1)
+    cols = st.number_input("열 수", 1, 10, 3, 1)
+colD, colE, colF = st.columns(3)
+with colD:
+    label_w = st.number_input("라벨 가로(mm)", 20.0, 210.0, 70.0, 0.5)
+with colE:
+    label_h = st.number_input("라벨 세로(mm)", 10.0, 297.0, 25.0, 0.5)
+with colF:
+    gap_x = st.number_input("가로 간격(mm)", 0.0, 20.0, 3.0, 0.5)
+    gap_y = st.number_input("세로 간격(mm)", 0.0, 20.0, 3.0, 0.5)
+# -----------------
+# 라벨 내부 텍스트 배치
+# -----------------
+st.subheader("🧱 라벨 내부 레이아웃")
+col1, col2, col3 = st.columns(3)
+with col1:
+    pad_x = st.number_input("내부 패딩 X(mm)", 0.0, 20.0, 2.0, 0.5)
+    pad_y = st.number_input("내부 패딩 Y(mm)", 0.0, 20.0, 2.0, 0.5)
+with col2:
+    fs_big = st.number_input("폰트 크기(큰 제목)", 6, 40, 16, 1)
+    fs_mid = st.number_input("폰트 크기(중간)", 6, 40, 11, 1)
+with col3:
+    fs_small = st.number_input("폰트 크기(작게/목록)", 6, 20, 9, 1)
+    line_gap = st.number_input("줄 간격(배수)", 0.8, 2.0, 1.2, 0.1)
+st.caption("💡 Formtec 3203 비슷한 설정 예시: 가로 70, 세로 25, 열 3, 행 10, 여백 10/10, 간격 3/3 (프린터마다 약간 조정)")
+# -----------------
+# 텍스트 생성 함수
+# -----------------
+def year_range(series):
     s = series.astype(str).fillna("")
+    v = s[~s.isin(["", "0", "0000"])]
+    if len(v) == 0:
         return "0000-0000"
+    nums = pd.to_numeric(v, errors="coerce").dropna().astype(int)
+    if len(nums) == 0:
         return "0000-0000"
+    return f"{nums.min():04d}-{nums.max():04d}"
+def build_records(df: pd.DataFrame):
     df = df.copy()
     df["박스번호"] = df["박스번호"].astype(str).str.zfill(4)
+    # 생산연도(범위)
     if "종료연도" in df.columns:
+        yr = df.groupby("박스번호")["종료연도"].apply(year_range).reset_index()
+        yr.columns = ["박스번호", "생산연도"]
     else:
+        yr = pd.DataFrame({"박스번호": df["박스번호"].unique(), "생산연도": "0000-0000"})
+    # 목록
     has_mgmt = "관리번호" in df.columns
     list_rows = []
     for box, g in df.groupby("박스번호"):
+        lines = [f"- {r['관리번호']} {r.get('제목','')}" if has_mgmt else f"- {r.get('제목','')}"
                  for _, r in g.iterrows()]
+        list_rows.append({"박스번호": box, "목록": "\n".join(lines)})
     list_df = pd.DataFrame(list_rows)
+    # 대표 메타
+    cols = ["박스번호","보존기간","단위업무","기록물철","제목"]
+    meta_exist = [c for c in cols if c in df.columns]
+    meta = df.groupby("박스번호", as_index=False).first()[meta_exist] if meta_exist else pd.DataFrame({"박스번호": df["박스번호"].unique()})
+    merged = meta.merge(list_df, on="박스번호", how="left").merge(yr, on="박스번호", how="left")
+    return merged.sort_values("박스번호").to_dict(orient="records")
+def draw_label(c: canvas.Canvas, x, y, w, h, rec, font_name, fs_big, fs_mid, fs_small, line_gap):
+    """
+    좌표계: reportlab은 좌하단이 원점.
+    x,y = 라벨 좌하단. w,h = 라벨 크기.
+    """
+    # 여백
+    inner_x = x + pad_x * mm
+    inner_y = y + pad_y * mm
+    inner_w = w - 2 * pad_x * mm
+    inner_h = h - 2 * pad_y * mm
+    # 상단 굵은 줄: 박스번호
+    c.setFont(font_name, fs_big)
+    boxno = rec.get("박스번호", "")
+    c.drawString(inner_x, inner_y + inner_h - fs_big*1.1, f"{boxno}")
+    # 2행: (생산연도/보존기간)
+    c.setFont(font_name, fs_mid)
+    prod = rec.get("생산연도","")
+    keep = rec.get("보존기간","") or ""
+    line_y = inner_y + inner_h - fs_big*1.1 - fs_mid*1.5
+    c.drawString(inner_x, line_y, f"{prod}   {keep}")
+    # 3행: 단위업무 / 기록물철 (있으면)
+    line_y -= fs_mid * 1.2
+    unit = rec.get("단위업무","") or ""
+    series = rec.get("기록물철","") or ""
+    if unit or series:
+        c.setFont(font_name, fs_mid)
+        c.drawString(inner_x, line_y, f"{unit}  {series}")
+        line_y -= fs_mid * 1.0
+    # 목록(여러 줄, 작은 글씨)
+    c.setFont(font_name, fs_small)
+    list_text = rec.get("목록","") or ""
+    for ln in list_text.split("\n"):
+        if line_y < inner_y + fs_small * 1.2:  # 라벨 하단 넘어가면 중단
+            break
+        c.drawString(inner_x, line_y, ln)
+        line_y -= fs_small * line_gap
+def make_pdf(records):
+    buffer = BytesIO()
+    if page_size == "A4":
+        pw, ph = A4
+    else:
+        pw, ph = A4
+    c = canvas.Canvas(buffer, pagesize=(pw, ph))
+    c.setAuthor("BoxLabel")
+    c.setTitle("Box Labels")
+    pdfmetrics.getFont(font_name)  # ensure registered
+    # 좌표/크기(mm → pt)
+    L = margin_left * mm
+    T = margin_top * mm
+    W = label_w * mm
+    H = label_h * mm
+    GX = gap_x * mm
+    GY = gap_y * mm
+    per_page = int(rows * cols)
+    total_pages = math.ceil(len(records) / per_page) if records else 1
+    idx = 0
+    for p in range(total_pages):
+        for r in range(int(rows)):
+            for ccol in range(int(cols)):
+                if idx >= len(records):
+                    break
+                # 좌표 계산 (좌하단 원점이므로 상단에서 내려오게 Y를 조정)
+                x = L + ccol * (W + GX)
+                y_top = ph - T - r * (H + GY)
+                y = y_top - H
+                draw_label(c, x, y, W, H, records[idx], font_name, fs_big, fs_mid, fs_small, line_gap)
+                idx += 1
+            if idx >= len(records):
+                break
+        c.showPage()
+    c.save()
+    buffer.seek(0)
+    return buffer
+# -----------------
+# 메인 동작
+# -----------------
+if df is not None:
+    # 미리보기
+    st.subheader("📋 데이터 미리보기")
     st.dataframe(df.head(10), use_container_width=True)
+    records = build_records(df)
+    st.write(f"총 **{len(records)}**개 박스가 감지되었습니다.")
+    default_sel = [r["박스번호"] for r in records]
+    sel = st.multiselect("생성할 박스번호 선택 (비우면 전체)", options=default_sel)
+    if sel:
+        records = [r for r in records if r["박스번호"] in set(sel)]
+    if st.button("🚀 PDF 생성"):
+        pdf = make_pdf(records)
+        st.download_button("⬇️ PDF 다운로드", data=pdf.getvalue(), file_name="box_labels.pdf", mime="application/pdf")