Spaces:

sayanAIAI
/

AIprojects

Runtime error

App Files Files Community

sayanAIAI commited on Sep 17

Commit

440165f

verified ·

1 Parent(s): e3483d8

Update main.py

Browse files

Files changed (1) hide show

main.py +77 -46

main.py CHANGED Viewed

@@ -96,30 +96,18 @@ def _first_int_from_text(s: str, fallback: Optional[int] = None) -> Optional[int
 # -------------------------
 # Safe model loading (preload on startup)
 # -------------------------
-def safe_load_pipeline(model_name: str):
-    """
-    Try to load tokenizer & model in robust manner:
-    - try fast tokenizer
-    - fallback to use_fast=False
-    - if still fails, return None (caller should fallback to fallback model)
-    """
     try:
-        tok = AutoTokenizer.from_pretrained(model_name)
-        model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-        pipe = pipeline("summarization", model=model, tokenizer=tok, device=DEVICE)
-        logger.info("Loaded pipeline for %s (fast tokenizer)", model_name)
-        return pipe
-    except Exception as e_fast:
-        logger.warning("Fast tokenizer load failed for %s: %s. Trying slow tokenizer...", model_name, e_fast)
-        try:
-            tok = AutoTokenizer.from_pretrained(model_name, use_fast=False)
-            model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
-            pipe = pipeline("summarization", model=model, tokenizer=tok, device=DEVICE)
-            logger.info("Loaded pipeline for %s (slow tokenizer)", model_name)
-            return pipe
-        except Exception as e_slow:
-            logger.exception("Slow tokenizer load failed for %s: %s", model_name, e_slow)
-            return None
 def preload_models_at_startup():
     global _PREFERRED_SUMMARIZER_KEY
@@ -216,44 +204,87 @@ def summarize_with_model(pipe, text_prompt: str, short_target: bool = False) ->
 # Param generator (AI decision) with fallback heuristic
 # -------------------------
 def generate_summarization_config(text: str) -> Dict[str, Any]:
-    # If parameter generator pipeline loaded, use it; else fallback to heuristic
-    pg_pipe = _PARAM_GENERATOR
-    if pg_pipe is None:
         words = len(text.split())
         length = "short" if words < 150 else ("medium" if words < 800 else "long")
-        defaults = {"short": (12, 50), "medium": (50, 130), "long": (130, 300)}
         mn, mx = defaults[length]
         return {"length": length, "min_length": mn, "max_length": mx, "tone": "neutral"}
     prompt = (
-        "Recommend summarization settings. Output JSON exactly like:\n"
         '{"length":"short|medium|long","tone":"neutral|formal|casual|bullet","min_words":MIN,"max_words":MAX}\n\n'
         "Text:\n'''"
         + text[:3000] + "'''"
     )
     try:
-        out = pg_pipe(prompt, max_new_tokens=48, do_sample=False, num_beams=1)[0].get("summary_text","")
-        # some pipelines return 'generated_text' or 'summary_text' depending; try both
         if not out:
-            out = pg_pipe(prompt, max_new_tokens=48, do_sample=False, num_beams=1)[0].get("generated_text","")
-        # attempt to extract JSON
-        j = re.search(r"\{.*\}", out, re.DOTALL)
-        if j:
-            cfg = json.loads(j.group().replace("'", '"'))
         else:
             cfg = None
-        if not cfg:
-            raise ValueError("Unparseable param output")
-        length = cfg.get("length","medium").lower()
-        tone = cfg.get("tone","neutral").lower()
-        return {"length": length, "min_length": cfg.get("min_words", 50), "max_length": cfg.get("max_words", 130), "tone": tone}
     except Exception as e:
-        logger.exception("Param generator failed: %s; using heuristic", e)
-        words = len(text.split())
-        length = "short" if words < 150 else ("medium" if words < 800 else "long")
-        defaults = {"short": (12,50), "medium": (50,130), "long":(130,300)}
-        mn, mx = defaults[length]
-        return {"length": length, "min_length": mn, "max_length": mx, "tone": "neutral"}
 # -------------------------
 # Orchestrator: chunk summarization with threadpool + timeouts

 # -------------------------
 # Safe model loading (preload on startup)
 # -------------------------
+def safe_load_param_generator():
+    global _PARAM_GENERATOR
     try:
+        logger.info("Loading param-generator (text2text) model: %s", PARAM_MODEL)
+        p_tok = AutoTokenizer.from_pretrained(PARAM_MODEL)
+        p_mod = AutoModelForSeq2SeqLM.from_pretrained(PARAM_MODEL)
+        # IMPORTANT: use text2text-generation so outputs are in generated_text (not summary_text)
+        _PARAM_GENERATOR = pipeline("text2text-generation", model=p_mod, tokenizer=p_tok, device=DEVICE)
+        logger.info("Param-generator loaded as text2text-generation.")
+    except Exception as e:
+        logger.exception("Param-generator failed to load as text2text: %s", e)
+        _PARAM_GENERATOR = None
 def preload_models_at_startup():
     global _PREFERRED_SUMMARIZER_KEY
 # Param generator (AI decision) with fallback heuristic
 # -------------------------
 def generate_summarization_config(text: str) -> Dict[str, Any]:
+    """
+    Uses the text2text param-generator to output a JSON config.
+    If the generator fails or returns something noisy (e.g., echoes the input),
+    fall back to a safe heuristic.
+    """
+    defaults = {"short": (12, 50), "medium": (50, 130), "long": (130, 300)}
+    # heuristic fallback
+    def fallback():
         words = len(text.split())
         length = "short" if words < 150 else ("medium" if words < 800 else "long")
         mn, mx = defaults[length]
         return {"length": length, "min_length": mn, "max_length": mx, "tone": "neutral"}
+    pg = _PARAM_GENERATOR
+    if pg is None:
+        logger.info("Param-generator not available; using fallback heuristic.")
+        return fallback()
     prompt = (
+        "Recommend summarization settings for this text. Answer ONLY with JSON of the form:\n"
         '{"length":"short|medium|long","tone":"neutral|formal|casual|bullet","min_words":MIN,"max_words":MAX}\n\n'
         "Text:\n'''"
         + text[:3000] + "'''"
     )
     try:
+        out_item = pg(prompt, max_new_tokens=64, do_sample=False, num_beams=1, early_stopping=True)[0]
+        # different pipeline versions may return different keys; check both:
+        out = out_item.get("generated_text") or out_item.get("summary_text") or out_item.get("text") or ""
+        out = (out or "").strip()
+        # COMMON FAILURE MODE: the model just echoes the input — reject that
+        # If output contains a long substring of the input, treat as invalid.
         if not out:
+            raise ValueError("Empty param-generator output")
+        # If the returned text contains more than 40% of the original input words, treat it as echo
+        input_words = set(w.lower() for w in re.findall(r"\w+", text)[:200])
+        out_words = set(w.lower() for w in re.findall(r"\w+", out)[:200])
+        if len(input_words) > 0 and (len(input_words & out_words) / max(1, len(input_words))) > 0.4:
+            logger.warning("Param-generator appears to echo input; discarding and using heuristic.")
+            return fallback()
+        # Find JSON object in output
+        jmatch = re.search(r"\{.*\}", out, re.DOTALL)
+        if jmatch:
+            raw = jmatch.group().replace("'", '"')
+            cfg = json.loads(raw)
         else:
+            # attempt to parse line with key:value pairs (tolerant)
             cfg = None
+        if not cfg or not isinstance(cfg, dict):
+            logger.warning("Param-generator output not parseable as JSON: %s", out[:300])
+            return fallback()
+        length = cfg.get("length", "medium").lower()
+        tone = cfg.get("tone", "neutral").lower()
+        min_w = cfg.get("min_words") or cfg.get("min_length") or cfg.get("min")
+        max_w = cfg.get("max_words") or cfg.get("max_length") or cfg.get("max")
+        if length not in ("short","medium","long"):
+            words = len(text.split())
+            length = "short" if words < 150 else ("medium" if words < 800 else "long")
+        if tone not in ("neutral","formal","casual","bullet"):
+            tone = "neutral"
+        defaults_min, defaults_max = defaults.get(length, (50,130))
+        try:
+            mn = int(min_w) if min_w is not None else defaults_min
+            mx = int(max_w) if max_w is not None else defaults_max
+        except Exception:
+            mn, mx = defaults_min, defaults_max
+        mn = max(5, min(mn, 2000))
+        mx = max(mn + 5, min(mx, 4000))
+        logger.info("Param-generator suggested length=%s tone=%s min=%s max=%s", length, tone, mn, mx)
+        return {"length": length, "min_length": mn, "max_length": mx, "tone": tone}
     except Exception as e:
+        logger.exception("Param-generator failed to produce usable config: %s", e)
+        return fallback()
 # -------------------------
 # Orchestrator: chunk summarization with threadpool + timeouts