Spaces:

Natwar
/

Text_Summarization_Multipurpose

Running

App Files Files Community

Natwar commited on 8 days ago

Commit

fdebaf9

verified ·

1 Parent(s): f01d54d

Update app.py

Browse files

Files changed (1) hide show

app.py +30 -47

app.py CHANGED Viewed

@@ -6,12 +6,11 @@ warnings.filterwarnings("ignore")
 def run_pip(*args):
-    """Run a pip command and raise on failure."""
     subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
-# ── Phase 1: Install packages ────────────────────────────────────────────────
-# Order and pins matter — see comments below.
 print("=== Installing gradio (if needed) ===")
 try:
@@ -20,7 +19,7 @@ try:
 except ImportError:
     run_pip("gradio")
-print("=== Installing torch (CPU-only, ~190 MB vs ~900 MB for CUDA) ===")
 try:
     import torch  # noqa: F401
     print("torch already installed.")
@@ -29,8 +28,6 @@ except ImportError:
 print("=== Installing transformers 4.46.3 ===")
 # Pin to last v4 release — transformers 5.x removed the 'summarization' pipeline task.
-# This also pulls tokenizers 0.20.3 (native cp313 wheel, no Rust needed) and
-# huggingface-hub 0.36.x as a side-effect; we fix the hub version in Phase 2.
 try:
     import transformers as _tf
     if _tf.__version__ != "4.46.3":
@@ -39,57 +36,43 @@ try:
 except (ImportError, AttributeError):
     run_pip("transformers==4.46.3")
-# ── Phase 2: Patch transformers/utils/hub.py BEFORE importing it ─────────────
 #
-# Root cause: transformers 4.46.3 calls
-#     get_session().head(..., allow_redirects=False, ...)
-# In this environment get_session() returns an httpx.Client (because httpx is
-# installed as a gradio dependency and the hub version that transformers pulled
-# switches to httpx when it is available).  httpx uses `follow_redirects=`,
-# not `allow_redirects=`, so the call raises:
-#     TypeError: Client.head() got an unexpected keyword argument 'allow_redirects'
 #
-# Fix: rewrite every `allow_redirects=` → `follow_redirects=` in hub.py on
-# disk *before* Python imports it, so no module reload is needed.
-def patch_transformers_hub():
-    try:
-        import importlib.util
-        spec = importlib.util.find_spec("transformers")
-        if spec is None:
-            print("Warning: could not locate transformers package for patching.")
-            return
-        pkg_dir = os.path.dirname(spec.origin)
-        hub_path = os.path.join(pkg_dir, "utils", "hub.py")
-        with open(hub_path, "r", encoding="utf-8") as f:
-            src = f.read()
-        if "allow_redirects=" in src:
-            patched = src.replace("allow_redirects=", "follow_redirects=")
-            with open(hub_path, "w", encoding="utf-8") as f:
-                f.write(patched)
-            print(f"Patched {hub_path}: allow_redirects → follow_redirects")
-        else:
-            print("transformers hub.py already clean — no patch needed.")
-    except Exception as exc:
-        print(f"Warning: hub.py patch failed ({exc}). Will try to continue anyway.")
-patch_transformers_hub()
-# ── Phase 3: Safe imports (transformers is now patched on disk) ───────────────
-import gradio as gr          # noqa: E402
-import torch                  # noqa: E402
-from transformers import pipeline  # noqa: E402
-# ── App setup ──────────��─────────────────────────────────────────────────────
 DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
 AVAILABLE_MODELS = {
-    "sshleifer/distilbart-cnn-6-6": "Fast & light, good for general summarization",
-    "facebook/bart-large-cnn": "Larger BART model, better detail retention",
-    "google/pegasus-cnn_dailymail": "Pegasus model for high-quality summarization",
-    "allenai/led-base-16384": "Handles longer scientific documents",
 }
 print(f"Loading default model: {DEFAULT_MODEL}")
@@ -157,7 +140,7 @@ def paste_example(example_type):
     return EXAMPLE_TEXTS.get(example_type, "")
-# ── Gradio UI ─────────────────────────────────────────────────────────────────
 with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Multimodel Text Summarization")

 def run_pip(*args):
+    """Run a pip install command and raise on failure."""
     subprocess.check_call([sys.executable, "-m", "pip", "install", "--no-cache-dir"] + list(args))
+# ── Phase 1: Install packages ─────────────────────────────────────────────────
 print("=== Installing gradio (if needed) ===")
 try:
 except ImportError:
     run_pip("gradio")
+print("=== Installing torch (CPU-only, ~190 MB) ===")
 try:
     import torch  # noqa: F401
     print("torch already installed.")
 print("=== Installing transformers 4.46.3 ===")
 # Pin to last v4 release — transformers 5.x removed the 'summarization' pipeline task.
 try:
     import transformers as _tf
     if _tf.__version__ != "4.46.3":
 except (ImportError, AttributeError):
     run_pip("transformers==4.46.3")
+# ── Phase 2: Fix the requests-vs-httpx incompatibility ───────────────────────
 #
+# What happens:
+#   - transformers 4.46.3 requires huggingface-hub<1.0, so pip installs 0.36.x.
+#   - huggingface-hub 0.36.x makes get_session() return an httpx.Client when
+#     httpx is present (it is — gradio depends on it).
+#   - transformers' own hub.py then calls that client with requests-style kwargs:
+#       get_session().head(url, allow_redirects=False, proxies=proxies, timeout=10)
+#   - httpx.Client rejects every one of these: allow_redirects, proxies, etc.
 #
+# Fix:
+#   After importing transformers (so its module object is in sys.modules), replace
+#   the `get_session` name inside the `transformers.utils.hub` namespace with a
+#   lambda that returns a plain requests.Session.  A requests.Session accepts all
+#   of those kwargs natively, so every existing call in hub.py works unchanged.
+import transformers.utils.hub as _t_hub  # noqa: E402
+import requests as _requests              # noqa: E402
+_t_hub.get_session = lambda: _requests.Session()
+print("Patched transformers.utils.hub.get_session → requests.Session()")
+# ── Phase 3: Safe imports ─────────────────────────────────────────────────────
+import gradio as gr                       # noqa: E402
+import torch                              # noqa: E402
+from transformers import pipeline         # noqa: E402
+# ── App setup ─────────────────────────────────────────────────────────────────
 DEFAULT_MODEL = "sshleifer/distilbart-cnn-6-6"
 AVAILABLE_MODELS = {
+    "sshleifer/distilbart-cnn-6-6":   "Fast & light, good for general summarization",
+    "facebook/bart-large-cnn":         "Larger BART model, better detail retention",
+    "google/pegasus-cnn_dailymail":    "Pegasus model for high-quality summarization",
+    "allenai/led-base-16384":          "Handles longer scientific documents",
 }
 print(f"Loading default model: {DEFAULT_MODEL}")
     return EXAMPLE_TEXTS.get(example_type, "")
+# ── Gradio UI ──────────────────────────────────────────────────────────────────
 with gr.Blocks(title="Multimodel Summarization App", theme=gr.themes.Soft()) as demo:
     gr.Markdown("# 📝 Multimodel Text Summarization")