Spaces:

sayanAIAI
/

AIprojects

Runtime error

App Files Files Community

sayanAIAI commited on Aug 12

Commit

fd8623d

verified ·

1 Parent(s): 2884a69

Update main.py

Browse files

Files changed (1) hide show

main.py +78 -22

main.py CHANGED Viewed

@@ -1,38 +1,94 @@
 import os
 os.environ['HF_HOME'] = '/tmp'
-from flask import Flask, render_template, request, jsonify
-from transformers import pipeline, AutoTokenizer
 app = Flask(__name__)
-model_name = "sshleifer/distilbart-cnn-12-6"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-summarizer = pipeline("summarization", model=model_name)
-@app.route("/")
-def index():
-    return render_template("index.html")
 @app.route("/summarize", methods=["POST"])
-def summarize():
-    try:
-        data = request.get_json()
-        text = data.get("text", "").strip()
-        if not text:
-            return jsonify({"error": "No text provided"}), 400
-        input_tokens = tokenizer.encode(text, return_tensors="pt")
-        input_len = input_tokens.shape[1]
-        max_len = max(10, min(100, input_len // 2))
-        min_len = max(5, max_len // 2)
-        summary = summarizer(text, max_length=max_len, min_length=min_len, do_sample=False)[0]['summary_text']
-        return jsonify({"summary": summary})
-    except Exception as e:
-        return jsonify({"error": str(e)}), 500
 if __name__ == "__main__":
     app.run(debug=True,port=7860)

 import os
 os.environ['HF_HOME'] = '/tmp'
+# main.py (excerpt)
+from flask import Flask, request, jsonify
+from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
+import math, textwrap
 app = Flask(__name__)
+MODEL_NAME = "sshleifer/distilbart-cnn-12-6"
+tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
+model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_NAME)
+summarizer = pipeline("summarization", model=model, tokenizer=tokenizer, device=-1)  # set device appropriately
+# Simple mapping of presets to generation lengths
+LENGTH_PRESETS = {
+  "short": {"min_length": 20, "max_length": 60},
+  "medium": {"min_length": 60, "max_length": 130},
+  "long": {"min_length": 130, "max_length": 300},
+}
+def chunk_text_by_chars(text, max_chars=1500, overlap=200):
+    if len(text) <= max_chars:
+        return [text]
+    parts = []
+    start = 0
+    while start < len(text):
+        end = min(len(text), start + max_chars)
+        # try to break at newline or sentence boundary for nicer chunking
+        chunk = text[start:end]
+        # extend to nearest newline if possible (avoid cutting sentences)
+        nl = chunk.rfind('\n')
+        if nl > max_chars*0.6:
+            end = start + nl
+            chunk = text[start:end]
+        parts.append(chunk.strip())
+        start = end - overlap
+    return parts
+def apply_tone_instruction(text, tone):
+    tone = (tone or "neutral").lower()
+    if tone == "formal":
+        instr = "Summarize in a formal, professional tone:"
+    elif tone == "casual":
+        instr = "Summarize in a casual, conversational tone:"
+    elif tone == "bullet":
+        instr = "Summarize into short bullet points:"
+    else:
+        instr = "Summarize:"
+    return f"{instr}\n\n{text}"
 @app.route("/summarize", methods=["POST"])
+def summarize_route():
+    data = request.get_json(force=True)
+    text = data.get("text", "")[:20000]  # safe cap
+    length = data.get("length", "medium")
+    tone = data.get("tone", "neutral")
+    if not text or len(text.split()) < 5:
+        return jsonify({"error": "Input too short."}), 400
+    preset = LENGTH_PRESETS.get(length, LENGTH_PRESETS["medium"])
+    chunks = chunk_text_by_chars(text, max_chars=1500, overlap=200)
+    summaries = []
+    for chunk in chunks:
+        prompted = apply_tone_instruction(chunk, tone)
+        # call summarizer with min/max lengths
+        out = summarizer(prompted,
+                         min_length=preset["min_length"],
+                         max_length=preset["max_length"],
+                         truncation=True)[0]["summary_text"]
+        summaries.append(out.strip())
+    # If multiple chunk summaries, join and compress once more
+    if len(summaries) == 1:
+        final = summaries[0]
+    else:
+        combined = "\n\n".join(summaries)
+        prompted = apply_tone_instruction(combined, tone)
+        final = summarizer(prompted,
+                           min_length=preset["min_length"],
+                           max_length=preset["max_length"],
+                           truncation=True)[0]["summary_text"]
+    # if bullet tone, post-process
+    if tone == "bullet":
+        lines = [l.strip() for s in final.splitlines() for l in s.split(". ") if l.strip()]
+        final = "\n".join(f"- {l.rstrip('.')}" for l in lines[:20])
+    return jsonify({"summary": final})
 if __name__ == "__main__":
     app.run(debug=True,port=7860)