Spaces:

NIKKI77
/

ks-version-1-1

Sleeping

App Files Files Community

NIKKI77 commited on Aug 19

Commit

5181b3c

1 Parent(s): d796f80

Subtitle KIS v1.1 – initial

Browse files

Files changed (3) hide show

Dockerfile +13 -4
README.md +9 -1
backend/app.py +14 -5

Dockerfile CHANGED Viewed

@@ -35,8 +35,7 @@ USER appuser
 # Python deps (user site)
 RUN pip install --no-cache-dir --user -r requirements.txt
-# Preload spaCy + NLTK data
-RUN python -m spacy download en_core_web_sm
 RUN python - <<'PY'
 import nltk
 for pkg in ["punkt","punkt_tab","wordnet","omw-1.4"]:
@@ -57,7 +56,17 @@ except Exception as e:
     print("Prefetch skipped:", e)
 PY
 EXPOSE 7860
-# Give cold start more time
-CMD ["gunicorn","-w","1","-k","gthread","--threads","4","--timeout","300","-b","0.0.0.0:7860","backend.app:app"]

 # Python deps (user site)
 RUN pip install --no-cache-dir --user -r requirements.txt
+# NLTK data (you use wordnet + tokenizers). spaCy removed since unused.
 RUN python - <<'PY'
 import nltk
 for pkg in ["punkt","punkt_tab","wordnet","omw-1.4"]:
     print("Prefetch skipped:", e)
 PY
+# (Optional) smoke test to catch FAISS/torch issues early
+RUN python - <<'PY'
+import sys
+print("PY:", sys.version)
+import faiss, torch
+print("FAISS:", faiss.__version__)
+print("Torch:", torch.__version__, "CUDA:", torch.cuda.is_available())
+PY
+# Spaces port + gunicorn binding
+ENV PORT=7860
 EXPOSE 7860
+CMD ["bash","-lc","gunicorn -w 1 -k gthread --threads 4 --timeout 300 -b 0.0.0.0:${PORT:-7860} backend.app:app"]

README.md CHANGED Viewed

@@ -5,8 +5,16 @@ colorFrom: indigo
 colorTo: blue
 sdk: docker
 pinned: false
 ---
-# Subtitle KIS (KSI Version 1.1)
 GPU-accelerated subtitle search & summarize (SBERT + FAISS + DistilBART + punctuation).

 colorTo: blue
 sdk: docker
 pinned: false
+license: mit
 ---
+# Subtitle KIS (KSI v1.1)
 GPU-accelerated subtitle search & summarize (SBERT + FAISS + DistilBART + punctuation).
+## How to run locally
+```bash
+pip install -r requirements.txt
+export PORT=7860
+python backend/app.py
+# open http://localhost:7860

backend/app.py CHANGED Viewed

@@ -6,8 +6,9 @@ from flask import Flask, render_template, request, jsonify
 from markupsafe import escape, Markup
 from nltk.corpus import wordnet
 from nltk.stem import WordNetLemmatizer
-from semantic_search import search_query
-from nlp_summary import summarize_text
 from autocomplete import get_suggestions
 from config import ABBREVIATION_MAP, VIDEO_METADATA, SEARCH_CONFIG
@@ -31,6 +32,11 @@ def apply_csp(response):
 def index():
     return render_template("index.html")
 # Template filter: convert HH:MM:SS to seconds
 @app.template_filter("jump_time")
 def jump_time(timestamp):
@@ -91,7 +97,10 @@ def perform_search(query, start=0, shown=0, previous_results=None, semantic_mode
     if previous_results is None:
         previous_results = []
     raw_results, _ = search_query(query, offset=0, top_k=1000, semantic_mode=semantic_mode)
     # Keyword mode
@@ -107,7 +116,6 @@ def perform_search(query, start=0, shown=0, previous_results=None, semantic_mode
         friendly_key = next((k for k, v in VIDEO_METADATA.items() if v["id"] == vid_id), None)
         r["video_title"] = VIDEO_METADATA.get(friendly_key, {}).get("title", "Unknown Title")
         context_chunks = []
         if idx > 0:
             context_chunks.append(paged_results[idx - 1]["summary_input"])
@@ -211,4 +219,5 @@ def autocomplete():
     return flask_json.dumps(get_suggestions(term))
 if __name__ == "__main__":
-    app.run(debug=True)

 from markupsafe import escape, Markup
 from nltk.corpus import wordnet
 from nltk.stem import WordNetLemmatizer
+# NOTE: heavy imports moved to lazy inside perform_search()
+# from semantic_search import search_query
+# from nlp_summary import summarize_text
 from autocomplete import get_suggestions
 from config import ABBREVIATION_MAP, VIDEO_METADATA, SEARCH_CONFIG
 def index():
     return render_template("index.html")
+# Health check (fast) — for HF Spaces readiness
+@app.get("/health")
+def health():
+    return {"ok": True}, 200
 # Template filter: convert HH:MM:SS to seconds
 @app.template_filter("jump_time")
 def jump_time(timestamp):
     if previous_results is None:
         previous_results = []
+    # 🔸 Lazy imports so heavy modules load on first search, not at boot
+    from semantic_search import search_query
+    from nlp_summary import summarize_text
     raw_results, _ = search_query(query, offset=0, top_k=1000, semantic_mode=semantic_mode)
     # Keyword mode
         friendly_key = next((k for k, v in VIDEO_METADATA.items() if v["id"] == vid_id), None)
         r["video_title"] = VIDEO_METADATA.get(friendly_key, {}).get("title", "Unknown Title")
         context_chunks = []
         if idx > 0:
             context_chunks.append(paged_results[idx - 1]["summary_input"])
     return flask_json.dumps(get_suggestions(term))
 if __name__ == "__main__":
+    port = int(os.environ.get("PORT", 7860))  # HF Spaces default
+    app.run(host="0.0.0.0", port=port, debug=False)