TuringsSolutions commited on
Commit
061f36e
·
verified ·
1 Parent(s): 7363746

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +50 -4
app.py CHANGED
@@ -17,6 +17,7 @@ import gradio as gr
17
 
18
  # ---- Parsers ----
19
  from docx import Document
 
20
 
21
  # ---- Embeddings ----
22
  # We try sentence-transformers. If unavailable (or offline), we fall back to HashingVectorizer.
@@ -37,8 +38,53 @@ def _load_st_model():
37
  except Exception as e:
38
  return None
39
 
40
- def read_docx(docx_path: str) -> List[str]:
41
- doc = Document(docx_path)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  paras = [p.text.strip() for p in doc.paragraphs]
43
  paras = [p for p in paras if p and not p.isspace()]
44
  return paras
@@ -295,7 +341,7 @@ def process_pipeline(docx_file, units_mode, K, iters, beta, bins, tau, seed):
295
  return gr.update(value="# Please upload a .docx file."), None, None, None, None
296
 
297
  # Read file
298
- paras = read_docx(docx_file.name)
299
  units = paragraphs_to_units(paras, mode=units_mode)
300
 
301
  if len(units) == 0:
@@ -380,7 +426,7 @@ with gr.Blocks(title="Constellation Harvest Regularization (CHR)") as demo:
380
 
381
  with gr.Row():
382
  with gr.Column(scale=1):
383
- docx_file = gr.File(label=".docx document", file_types=[".docx"])
384
  units_mode = gr.Radio(choices=["paragraphs", "sentences"], value="paragraphs", label="Unit granularity")
385
  K = gr.Slider(2, 24, value=8, step=1, label="K (number of constellations)")
386
  iters = gr.Slider(5, 100, value=30, step=1, label="Iterations")
 
17
 
18
  # ---- Parsers ----
19
  from docx import Document
20
+ import traceback
21
 
22
  # ---- Embeddings ----
23
  # We try sentence-transformers. If unavailable (or offline), we fall back to HashingVectorizer.
 
38
  except Exception as e:
39
  return None
40
 
41
+ def _resolve_file_input(file_obj):
42
+ """Return (bytes_io, display_name) for a variety of Gradio/HF file input shapes.
43
+ Supports: tempfile objects, dicts with 'name'/'path'/'data', raw path strings, or bytes.
44
+ """
45
+ import io, os
46
+ # 1) Dict shape (some Gradio environments)
47
+ if isinstance(file_obj, dict):
48
+ # Prefer an on-disk path if present
49
+ for key in ("path", "name"):
50
+ p = file_obj.get(key)
51
+ if isinstance(p, str) and os.path.exists(p):
52
+ with open(p, "rb") as f:
53
+ return io.BytesIO(f.read()), os.path.basename(p)
54
+ # Raw bytes in 'data'
55
+ data = file_obj.get("data")
56
+ if isinstance(data, (bytes, bytearray)):
57
+ return io.BytesIO(bytes(data)), file_obj.get("orig_name", "upload.docx")
58
+ # 2) Tempfile-like object
59
+ if hasattr(file_obj, "read") and hasattr(file_obj, "name"):
60
+ try:
61
+ file_obj.seek(0)
62
+ content = file_obj.read()
63
+ if isinstance(content, (bytes, bytearray)):
64
+ return io.BytesIO(content), os.path.basename(getattr(file_obj, "name", "upload.docx"))
65
+ except Exception:
66
+ pass
67
+ # Fallback: open by path
68
+ p = getattr(file_obj, "name", None)
69
+ if isinstance(p, str) and os.path.exists(p):
70
+ with open(p, "rb") as f:
71
+ return io.BytesIO(f.read()), os.path.basename(p)
72
+ # 3) Path string
73
+ if isinstance(file_obj, str) and os.path.exists(file_obj):
74
+ with open(file_obj, "rb") as f:
75
+ import os
76
+ return io.BytesIO(f.read()), os.path.basename(file_obj)
77
+ # 4) Raw bytes
78
+ if isinstance(file_obj, (bytes, bytearray)):
79
+ return io.BytesIO(bytes(file_obj)), "upload.docx"
80
+ # Unknown shape
81
+ return None, "upload.docx"
82
+
83
+ def read_docx_any(file_obj) -> List[str]:
84
+ bio, _ = _resolve_file_input(file_obj)
85
+ if bio is None:
86
+ raise ValueError("Could not read uploaded .docx file; unsupported input shape.")
87
+ doc = Document(bio)
88
  paras = [p.text.strip() for p in doc.paragraphs]
89
  paras = [p for p in paras if p and not p.isspace()]
90
  return paras
 
341
  return gr.update(value="# Please upload a .docx file."), None, None, None, None
342
 
343
  # Read file
344
+ paras = read_docx_any(docx_file)
345
  units = paragraphs_to_units(paras, mode=units_mode)
346
 
347
  if len(units) == 0:
 
426
 
427
  with gr.Row():
428
  with gr.Column(scale=1):
429
+ docx_file = gr.File(label=".docx document", file_types=[".docx"], file_count="single")
430
  units_mode = gr.Radio(choices=["paragraphs", "sentences"], value="paragraphs", label="Unit granularity")
431
  K = gr.Slider(2, 24, value=8, step=1, label="K (number of constellations)")
432
  iters = gr.Slider(5, 100, value=30, step=1, label="Iterations")