Spaces:

Natwar
/

Text_Summarization_Multipurpose

Running

App Files Files Community

Natwar commited on 8 days ago

Commit

f01d54d

verified ·

1 Parent(s): 1a39287

Update app.py

Browse files

Files changed (1) hide show

app.py +88 -92

app.py CHANGED Viewed

@@ -5,77 +5,84 @@ import warnings
 warnings.filterwarnings("ignore")
-def install_package(package, version=None):
-    package_spec = f"{package}=={version}" if version else package
-    print(f"Installing {package_spec}...")
     try:
-        if package == "torch":
-            # CPU-only build (~190MB) instead of CUDA (~900MB) to avoid disk quota errors
-            subprocess.check_call([
-                sys.executable, "-m", "pip", "install", "--no-cache-dir",
-                "torch", "--index-url", "https://download.pytorch.org/whl/cpu"
-            ])
         else:
-            subprocess.check_call([
-                sys.executable, "-m", "pip", "install", "--no-cache-dir", package_spec
-            ])
-    except subprocess.CalledProcessError as e:
-        print(f"Failed to install {package_spec}: {e}")
-        raise
-def force_install(package_spec):
-    """Install a package unconditionally, overriding whatever version is present."""
-    print(f"Force-installing {package_spec}...")
-    try:
-        subprocess.check_call([
-            sys.executable, "-m", "pip", "install", "--no-cache-dir", package_spec
-        ])
-    except subprocess.CalledProcessError as e:
-        print(f"Failed to force-install {package_spec}: {e}")
-        raise
-# Phase 1 — install missing packages.
-# Notes:
-#   - tokenizers is NOT pre-pinned here; transformers 4.46.3 pulls tokenizers 0.20.3
-#     which already ships native cp313 wheels, so no Rust compilation is needed.
-#   - transformers is pinned to 4.46.3 (last v4 release) because v5 dropped the
-#     "summarization" pipeline task entirely.
-required_packages = {
-    "gradio": None,
-    "torch": None,
-    "transformers": "4.46.3",
-}
-for package, version in required_packages.items():
-    try:
-        __import__(package)
-        print(f"{package} is already installed.")
-    except ImportError:
-        install_package(package, version)
-# Phase 2 — fix the huggingface_hub version AFTER transformers has run.
-#
-# Problem: transformers 4.46.3 requires huggingface-hub<1.0, so pip picks
-#   the latest <1.0 release (currently 0.36.x).  Starting around hub 0.30,
-#   get_session() returns an httpx.Client when httpx is present on the system.
-#   transformers' own hub.py calls get_session().head(..., allow_redirects=...),
-#   which is a requests-style kwarg that httpx rejects with:
-#     TypeError: Client.head() got an unexpected keyword argument 'allow_redirects'
-#
-# Fix: force hub back to 0.28.1 — the last release that uses requests (not httpx)
-#   for get_session(), while still satisfying:
-#     - transformers 4.46.3 requirement: >=0.23.2, <1.0   ✓
-#     - gradio requirement:              >=0.28.1           ✓
-force_install("huggingface_hub==0.28.1")
-# Now safe to import everything
-import gradio as gr
-import torch
-from transformers import pipeline
-# Load default summarization model
 DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
 AVAILABLE_MODELS = {
@@ -86,7 +93,7 @@ AVAILABLE_MODELS = {
 }
 print(f"Loading default model: {DEFAULT_MODEL}")
-summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1)  # device=-1 forces CPU
 EXAMPLE_TEXTS = {
     "news_article": (
@@ -120,29 +127,26 @@ EXAMPLE_TEXTS = {
 def summarize_text(text, model_name, summary_length, num_beams):
     if not text.strip():
         return "Please provide some text to summarize."
     try:
         global summarizer
         summarizer = pipeline("summarization", model=model_name, device=-1)
         length_mapping = {
             "very_short": (30, 50),
-            "short": (50, 70),
-            "medium": (70, 100),
-            "long": (100, 130),
         }
-        min_length, max_length = length_mapping.get(summary_length, (70, 100))
-        summary = summarizer(
             text,
-            max_length=int(max_length),
-            min_length=int(min_length),
             num_beams=int(num_beams),
             do_sample=False,
         )
-        return summary[0]["summary_text"]
-    except Exception as e:
-        return f"Error: {str(e)}"
 def count_words(text):
@@ -153,6 +157,8 @@ def paste_example(example_type):
     return EXAMPLE_TEXTS.get(example_type, "")
 with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Multimodel Text Summarization")
     gr.Markdown(
@@ -165,10 +171,9 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
                 lines=12,
                 label="Text to Summarize",
                 placeholder="Paste or type your text here...",
-                show_label=True,
                 elem_id="text_input",
             )
-            word_counter = gr.Markdown("0 words", elem_id="word_counter")
             text_input.change(count_words, inputs=[text_input], outputs=[word_counter])
             with gr.Row():
@@ -193,9 +198,7 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
                     value="medium",
                     label="Summary Length",
                 )
-                num_beams = gr.Slider(
-                    minimum=1, maximum=8, value=4, step=1, label="Beam Size"
-                )
             summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
@@ -207,19 +210,12 @@ with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as
                 placeholder="Your summary will appear here...",
             )
-    # Events
     model_choice.change(
         fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
         inputs=[model_choice],
         outputs=[model_info],
     )
-    example_load_btn.click(
-        fn=paste_example,
-        inputs=[example_dropdown],
-        outputs=[text_input],
-    )
     summarize_button.click(
         fn=summarize_text,
         inputs=[text_input, model_choice, summary_length, num_beams],

 warnings.filterwarnings("ignore")
+def run_pip(*args):
+    """Run a pip command and raise on failure."""
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
+# ── Phase 1: Install packages ────────────────────────────────────────────────
+# Order and pins matter — see comments below.
+print("=== Installing gradio (if needed) ===")
+try:
+    import gradio  # noqa: F401
+    print("gradio already installed.")
+except ImportError:
+    run_pip("gradio")
+print("=== Installing torch (CPU-only, ~190 MB vs ~900 MB for CUDA) ===")
+try:
+    import torch  # noqa: F401
+    print("torch already installed.")
+except ImportError:
+    run_pip("torch", "--index-url", "https://download.pytorch.org/whl/cpu")
+print("=== Installing transformers 4.46.3 ===")
+# Pin to last v4 release — transformers 5.x removed the 'summarization' pipeline task.
+# This also pulls tokenizers 0.20.3 (native cp313 wheel, no Rust needed) and
+# huggingface-hub 0.36.x as a side-effect; we fix the hub version in Phase 2.
+try:
+    import transformers as _tf
+    if _tf.__version__ != "4.46.3":
+        raise ImportError("wrong version")
+    print("transformers 4.46.3 already installed.")
+except (ImportError, AttributeError):
+    run_pip("transformers==4.46.3")
+# ── Phase 2: Patch transformers/utils/hub.py BEFORE importing it ─────────────
+#
+# Root cause: transformers 4.46.3 calls
+#     get_session().head(..., allow_redirects=False, ...)
+# In this environment get_session() returns an httpx.Client (because httpx is
+# installed as a gradio dependency and the hub version that transformers pulled
+# switches to httpx when it is available).  httpx uses `follow_redirects=`,
+# not `allow_redirects=`, so the call raises:
+#     TypeError: Client.head() got an unexpected keyword argument 'allow_redirects'
+#
+# Fix: rewrite every `allow_redirects=` → `follow_redirects=` in hub.py on
+# disk *before* Python imports it, so no module reload is needed.
+def patch_transformers_hub():
     try:
+        import importlib.util
+        spec = importlib.util.find_spec("transformers")
+        if spec is None:
+            print("Warning: could not locate transformers package for patching.")
+            return
+        pkg_dir = os.path.dirname(spec.origin)
+        hub_path = os.path.join(pkg_dir, "utils", "hub.py")
+        with open(hub_path, "r", encoding="utf-8") as f:
+            src = f.read()
+        if "allow_redirects=" in src:
+            patched = src.replace("allow_redirects=", "follow_redirects=")
+            with open(hub_path, "w", encoding="utf-8") as f:
+                f.write(patched)
+            print(f"Patched {hub_path}: allow_redirects → follow_redirects")
         else:
+            print("transformers hub.py already clean — no patch needed.")
+    except Exception as exc:
+        print(f"Warning: hub.py patch failed ({exc}). Will try to continue anyway.")
+patch_transformers_hub()
+# ── Phase 3: Safe imports (transformers is now patched on disk) ───────────────
+import gradio as gr          # noqa: E402
+import torch                  # noqa: E402
+from transformers import pipeline  # noqa: E402
+# ── App setup ────────────────────────────────────────────────────────────────
 DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
 AVAILABLE_MODELS = {
 }
 print(f"Loading default model: {DEFAULT_MODEL}")
+summarizer = pipeline("summarization", model=DEFAULT_MODEL, device=-1)  # device=-1 → CPU
 EXAMPLE_TEXTS = {
     "news_article": (
 def summarize_text(text, model_name, summary_length, num_beams):
     if not text.strip():
         return "Please provide some text to summarize."
     try:
         global summarizer
         summarizer = pipeline("summarization", model=model_name, device=-1)
         length_mapping = {
             "very_short": (30, 50),
+            "short":      (50, 70),
+            "medium":     (70, 100),
+            "long":       (100, 130),
         }
+        min_len, max_len = length_mapping.get(summary_length, (70, 100))
+        result = summarizer(
             text,
+            max_length=int(max_len),
+            min_length=int(min_len),
             num_beams=int(num_beams),
             do_sample=False,
         )
+        return result[0]["summary_text"]
+    except Exception as exc:
+        return f"Error: {exc}"
 def count_words(text):
     return EXAMPLE_TEXTS.get(example_type, "")
+# ── Gradio UI ─────────────────────────────────────────────────────────────────
 with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Multimodel Text Summarization")
     gr.Markdown(
                 lines=12,
                 label="Text to Summarize",
                 placeholder="Paste or type your text here...",
                 elem_id="text_input",
             )
+            word_counter = gr.Markdown("0 words")
             text_input.change(count_words, inputs=[text_input], outputs=[word_counter])
             with gr.Row():
                     value="medium",
                     label="Summary Length",
                 )
+                num_beams = gr.Slider(minimum=1, maximum=8, value=4, step=1, label="Beam Size")
             summarize_button = gr.Button("Generate Summary", variant="primary", size="lg")
                 placeholder="Your summary will appear here...",
             )
     model_choice.change(
         fn=lambda x: f"**Model info:** {AVAILABLE_MODELS.get(x, 'Custom model')}",
         inputs=[model_choice],
         outputs=[model_info],
     )
+    example_load_btn.click(fn=paste_example, inputs=[example_dropdown], outputs=[text_input])
     summarize_button.click(
         fn=summarize_text,
         inputs=[text_input, model_choice, summary_length, num_beams],