Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
|
@@ -457,18 +457,28 @@ class WhatsAppZipProcessor:
|
|
| 457 |
# WHATSAPP EXTRACTION PROMPT
|
| 458 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 459 |
|
| 460 |
-
WHATSAPP_EXTRACTION_PROMPT = """You are a support knowledge base curator for the Iris
|
| 461 |
|
| 462 |
Your task: analyse this WhatsApp support group chat segment and extract ONLY clear problem→solution pairs.
|
| 463 |
|
| 464 |
-
CONTEXT ABOUT THIS
|
| 465 |
-
- "Iris" is
|
| 466 |
-
-
|
| 467 |
-
|
| 468 |
-
|
| 469 |
-
|
| 470 |
-
|
| 471 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
|
| 473 |
STRICT RULES:
|
| 474 |
1. Extract ONLY exchanges where a user described a problem AND a named support person (Tendayi, Tony, Violet,
|
|
@@ -822,57 +832,208 @@ def submit_ticket_voice():
|
|
| 822 |
# FEATURE 4 — System Tutorial Ingestion
|
| 823 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 824 |
|
| 825 |
-
|
| 826 |
|
| 827 |
-
|
| 828 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 829 |
|
| 830 |
-
|
| 831 |
-
|
|
|
|
|
|
|
| 832 |
|
| 833 |
-
|
| 834 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 835 |
|
| 836 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 837 |
"""
|
| 838 |
|
| 839 |
-
|
| 840 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 841 |
try:
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 849 |
|
| 850 |
@app.post("/api/kb/tutorial-ingest")
|
| 851 |
def tutorial_ingest():
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 859 |
parsed = _safe_json(raw, [])
|
| 860 |
articles = _validate_articles(parsed) if isinstance(parsed, list) else []
|
|
|
|
| 861 |
if not articles:
|
| 862 |
-
return jsonify({
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 863 |
for a in articles:
|
| 864 |
a["video_url"] = video_url
|
| 865 |
a["video_title"] = video_title
|
| 866 |
for ts_key in ("timestamp_start", "timestamp_end"):
|
| 867 |
val = a.get(ts_key)
|
| 868 |
-
if isinstance(val,
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
|
|
|
|
|
|
| 872 |
stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
|
| 877 |
|
| 878 |
# ══════════════════════════════════════════════════════════════════════════════
|
|
|
|
| 457 |
# WHATSAPP EXTRACTION PROMPT
|
| 458 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 459 |
|
| 460 |
+
WHATSAPP_EXTRACTION_PROMPT = """You are a support knowledge base curator for the Iris platform, deployed across Zimbabwe.
|
| 461 |
|
| 462 |
Your task: analyse this WhatsApp support group chat segment and extract ONLY clear problem→solution pairs.
|
| 463 |
|
| 464 |
+
CONTEXT ABOUT THIS PLATFORM:
|
| 465 |
+
- "Iris" is an integrated POS (Point of Sale) and fiscalisation system with a mobile attendance and
|
| 466 |
+
location-tracking module used by field sales reps and in-store tellers at retail stores.
|
| 467 |
+
- The POS and fiscalisation layer handles sales transactions, receipt generation, and ZIMRA fiscal
|
| 468 |
+
compliance. The mobile module handles teller clock-in/out, GPS location verification, and hours tracking.
|
| 469 |
+
- Common POS/fiscal issues: fiscalisation failures, receipt errors, device not syncing to ZIMRA servers,
|
| 470 |
+
Elixir (fiscal device software) login/password problems.
|
| 471 |
+
- Common mobile attendance issues: GPS location not detected, clock-in failures, app killed by Android
|
| 472 |
+
battery optimiser, teller passkey problems, hours recording incorrectly, store radius too small,
|
| 473 |
+
wrong teller name shown after login, app not running in the background.
|
| 474 |
+
- Messages mix English, Shona, and Ndebele. Understand regional vernacular (e.g. "irikudzima" = switching
|
| 475 |
+
off, "ndakashanda" = I worked, "short yemahours" = hours shortage, "gadzirisayi" = fix it, "hupfu" = flour,
|
| 476 |
+
"yakuda kulogwa patsva" = needs to be logged in fresh).
|
| 477 |
+
- If screenshots show Android error dialogs (e.g. "Service killed by system", "App stopped", "Abrupt stop"),
|
| 478 |
+
reason through what that means for Android background restriction and background service killing, and include
|
| 479 |
+
that diagnosis and fix in the solution content.
|
| 480 |
+
- If screenshots show fiscal device or POS screens, extract the error code or state shown and reason through
|
| 481 |
+
the likely cause from the Elixir/ZIMRA integration context.
|
| 482 |
|
| 483 |
STRICT RULES:
|
| 484 |
1. Extract ONLY exchanges where a user described a problem AND a named support person (Tendayi, Tony, Violet,
|
|
|
|
| 832 |
# FEATURE 4 — System Tutorial Ingestion
|
| 833 |
# ══════════════════════════════════════════════════════════════════════════════
|
| 834 |
|
| 835 |
+
TUTORIAL_VIDEO_PROMPT = """You are a knowledge base curator watching a tutorial video about the Iris platform.
|
| 836 |
|
| 837 |
+
CONTEXT ABOUT IRIS:
|
| 838 |
+
- Iris is an integrated POS (Point of Sale) and fiscalisation system with a mobile attendance and
|
| 839 |
+
location-tracking module used by tellers and field reps at retail stores in Zimbabwe.
|
| 840 |
+
- The POS/fiscal layer handles sales, receipts, and ZIMRA fiscal compliance (Elixir device).
|
| 841 |
+
- The mobile module handles teller clock-in/out, GPS location, store radius, and hours tracking.
|
| 842 |
+
- The Iris Support Portal is a customer support desk used by admin staff, agents, and support tiers
|
| 843 |
+
to manage tickets, agents, customers, and the knowledge base.
|
| 844 |
|
| 845 |
+
YOUR TASK:
|
| 846 |
+
Watch this tutorial video in full. For every distinct feature, workflow, or task you observe being
|
| 847 |
+
demonstrated, extract one self-contained KB article. Identify the exact timestamp range in the video
|
| 848 |
+
where each demonstration occurs so users can jump directly to the relevant moment.
|
| 849 |
|
| 850 |
+
Be precise about timestamps — state the second at which the demonstration starts and ends.
|
| 851 |
+
Write step-by-step instructions based on what you see happening on screen, not generic descriptions.
|
| 852 |
+
If the presenter speaks, incorporate their narration into the steps.
|
| 853 |
+
|
| 854 |
+
Return ONLY a valid JSON array. No preamble, no markdown fences. All strings properly JSON-escaped.
|
| 855 |
+
Use \n for line breaks within content strings.
|
| 856 |
|
| 857 |
+
Schema per item:
|
| 858 |
+
{
|
| 859 |
+
"title": "string — concise how-to title, max 80 chars",
|
| 860 |
+
"content": "string — numbered step-by-step instructions based on what is shown",
|
| 861 |
+
"category": "one of: Account|Tickets|Agents|Reports|Admin|POS|Attendance|Other",
|
| 862 |
+
"tags": ["string"],
|
| 863 |
+
"timestamp_start": <integer — seconds from video start where this demo begins>,
|
| 864 |
+
"timestamp_end": <integer — seconds from video start where this demo ends>
|
| 865 |
+
}
|
| 866 |
+
|
| 867 |
+
If the video contains no discernible how-to demonstrations, return an empty array: []
|
| 868 |
"""
|
| 869 |
|
| 870 |
+
|
| 871 |
+
def _upload_video_to_gemini(video_bytes: bytes, mime_type: str, display_name: str) -> Optional[Any]:
|
| 872 |
+
"""
|
| 873 |
+
Upload a video to the Gemini Files API and poll until processing is ACTIVE.
|
| 874 |
+
Returns the uploaded file object (with .uri and .name) or None on failure.
|
| 875 |
+
|
| 876 |
+
Gemini Files API processes video at 1 FPS, adding timestamps every second.
|
| 877 |
+
Files are retained for 48 hours. We delete after use to be tidy.
|
| 878 |
+
"""
|
| 879 |
+
if not _gemini_client:
|
| 880 |
+
return None
|
| 881 |
+
|
| 882 |
try:
|
| 883 |
+
# Write bytes to a named temp file — Files API needs a file path or IO object
|
| 884 |
+
with tempfile.NamedTemporaryFile(suffix=f".{mime_type.split('/')[-1]}", delete=False) as tmp:
|
| 885 |
+
tmp.write(video_bytes)
|
| 886 |
+
tmp_path = tmp.name
|
| 887 |
+
|
| 888 |
+
logger.info("Uploading video to Gemini Files API: %s (%d bytes)", display_name, len(video_bytes))
|
| 889 |
+
uploaded = _gemini_client.files.upload(
|
| 890 |
+
file=tmp_path,
|
| 891 |
+
config={"mime_type": mime_type, "display_name": display_name}
|
| 892 |
+
)
|
| 893 |
+
os.unlink(tmp_path)
|
| 894 |
+
logger.info("Upload complete. File name: %s — polling for ACTIVE state...", uploaded.name)
|
| 895 |
+
|
| 896 |
+
except Exception as e:
|
| 897 |
+
logger.error("Gemini Files API upload error: %s", e)
|
| 898 |
+
return None
|
| 899 |
+
|
| 900 |
+
# Poll until state is ACTIVE (video processing complete) — max ~3 minutes
|
| 901 |
+
for attempt in range(36):
|
| 902 |
+
time.sleep(5)
|
| 903 |
+
try:
|
| 904 |
+
file_info = _gemini_client.files.get(name=uploaded.name)
|
| 905 |
+
state = getattr(file_info, "state", None)
|
| 906 |
+
state_str = str(state).upper() if state else ""
|
| 907 |
+
logger.info("Poll %d: file state = %s", attempt + 1, state_str)
|
| 908 |
+
if "ACTIVE" in state_str:
|
| 909 |
+
logger.info("Video ACTIVE after %d polls (~%ds)", attempt + 1, (attempt + 1) * 5)
|
| 910 |
+
return file_info
|
| 911 |
+
elif "FAILED" in state_str:
|
| 912 |
+
logger.error("Gemini Files API processing failed for %s", uploaded.name)
|
| 913 |
+
return None
|
| 914 |
+
except Exception as e:
|
| 915 |
+
logger.warning("Poll error: %s", e)
|
| 916 |
+
|
| 917 |
+
logger.error("Video did not reach ACTIVE state within timeout")
|
| 918 |
+
return None
|
| 919 |
+
|
| 920 |
+
|
| 921 |
+
def _delete_gemini_file(file_obj: Any) -> None:
|
| 922 |
+
"""Best-effort cleanup of a file from the Gemini Files API."""
|
| 923 |
+
try:
|
| 924 |
+
_gemini_client.files.delete(name=file_obj.name)
|
| 925 |
+
logger.info("Deleted Gemini file: %s", file_obj.name)
|
| 926 |
+
except Exception as e:
|
| 927 |
+
logger.warning("Could not delete Gemini file %s: %s", file_obj.name, e)
|
| 928 |
+
|
| 929 |
+
|
| 930 |
+
# Supported video MIME types for tutorial upload
|
| 931 |
+
SUPPORTED_VIDEO_MIMES = {
|
| 932 |
+
".mp4": "video/mp4",
|
| 933 |
+
".mov": "video/quicktime",
|
| 934 |
+
".avi": "video/x-msvideo",
|
| 935 |
+
".webm": "video/webm",
|
| 936 |
+
".mkv": "video/x-matroska",
|
| 937 |
+
".3gp": "video/3gpp",
|
| 938 |
+
".flv": "video/x-flv",
|
| 939 |
+
}
|
| 940 |
+
|
| 941 |
|
| 942 |
@app.post("/api/kb/tutorial-ingest")
|
| 943 |
def tutorial_ingest():
|
| 944 |
+
"""
|
| 945 |
+
Accepts a tutorial video file upload (multipart, field name "file").
|
| 946 |
+
Gemini watches the full video, self-generates timestamps, and extracts
|
| 947 |
+
one KB article per distinct feature or task demonstrated.
|
| 948 |
+
|
| 949 |
+
No transcript required — Gemini reasons directly from video + audio.
|
| 950 |
+
|
| 951 |
+
Supported: mp4, mov, avi, webm, mkv, 3gp, flv
|
| 952 |
+
Max practical size: ~500MB (Files API limit is 2GB, but HF Space upload limit applies)
|
| 953 |
+
|
| 954 |
+
Returns articles with timestamp_start/end in seconds so the frontend
|
| 955 |
+
can generate deep-links into the video.
|
| 956 |
+
"""
|
| 957 |
+
if "file" not in request.files:
|
| 958 |
+
return jsonify({"ok": False, "error": "No file uploaded. Use multipart field name 'file'."}), 400
|
| 959 |
+
|
| 960 |
+
f = request.files["file"]
|
| 961 |
+
filename = f.filename or "tutorial"
|
| 962 |
+
ext = os.path.splitext(filename.lower())[1]
|
| 963 |
+
video_title = request.form.get("video_title", filename)
|
| 964 |
+
video_url = request.form.get("video_url", "")
|
| 965 |
+
|
| 966 |
+
mime_type = SUPPORTED_VIDEO_MIMES.get(ext)
|
| 967 |
+
if not mime_type:
|
| 968 |
+
return jsonify({
|
| 969 |
+
"ok": False,
|
| 970 |
+
"error": f"Unsupported video format '{ext}'. Supported: {', '.join(SUPPORTED_VIDEO_MIMES)}"
|
| 971 |
+
}), 400
|
| 972 |
+
|
| 973 |
+
if not _gemini_client:
|
| 974 |
+
return jsonify({"ok": False, "error": "Gemini client not initialised — check GOOGLE_API_KEY"}), 500
|
| 975 |
+
|
| 976 |
+
video_bytes = f.read()
|
| 977 |
+
logger.info("Tutorial ingest: '%s', %d bytes, mime=%s", video_title, len(video_bytes), mime_type)
|
| 978 |
+
|
| 979 |
+
# Upload to Gemini Files API and wait for processing
|
| 980 |
+
gemini_file = _upload_video_to_gemini(video_bytes, mime_type, display_name=video_title)
|
| 981 |
+
if not gemini_file:
|
| 982 |
+
return jsonify({"ok": False, "error": "Video upload or processing by Gemini failed. Try a smaller file or check the format."}), 500
|
| 983 |
+
|
| 984 |
+
# Ask Gemini to watch and extract articles with self-generated timestamps
|
| 985 |
+
try:
|
| 986 |
+
logger.info("Sending video to Gemini for tutorial extraction...")
|
| 987 |
+
resp = _gemini_client.models.generate_content(
|
| 988 |
+
model=GEMINI_MODEL,
|
| 989 |
+
contents=[gemini_file, TUTORIAL_VIDEO_PROMPT],
|
| 990 |
+
config=genai_types.GenerateContentConfig(
|
| 991 |
+
response_mime_type="application/json"
|
| 992 |
+
)
|
| 993 |
+
)
|
| 994 |
+
raw = resp.text or ""
|
| 995 |
+
except Exception as e:
|
| 996 |
+
logger.error("Gemini video analysis error: %s", e)
|
| 997 |
+
_delete_gemini_file(gemini_file)
|
| 998 |
+
return jsonify({"ok": False, "error": f"Gemini analysis failed: {e}"}), 500
|
| 999 |
+
finally:
|
| 1000 |
+
# Always attempt cleanup — files expire in 48h anyway but clean up early
|
| 1001 |
+
_delete_gemini_file(gemini_file)
|
| 1002 |
+
|
| 1003 |
parsed = _safe_json(raw, [])
|
| 1004 |
articles = _validate_articles(parsed) if isinstance(parsed, list) else []
|
| 1005 |
+
|
| 1006 |
if not articles:
|
| 1007 |
+
return jsonify({
|
| 1008 |
+
"ok": False,
|
| 1009 |
+
"error": "Gemini could not extract any how-to articles from this video. "
|
| 1010 |
+
"Ensure the video contains on-screen demonstrations of Iris features."
|
| 1011 |
+
}), 500
|
| 1012 |
+
|
| 1013 |
+
# Attach video metadata and normalise timestamp types
|
| 1014 |
for a in articles:
|
| 1015 |
a["video_url"] = video_url
|
| 1016 |
a["video_title"] = video_title
|
| 1017 |
for ts_key in ("timestamp_start", "timestamp_end"):
|
| 1018 |
val = a.get(ts_key)
|
| 1019 |
+
if not isinstance(val, int):
|
| 1020 |
+
try:
|
| 1021 |
+
a[ts_key] = int(val) if val is not None else 0
|
| 1022 |
+
except (TypeError, ValueError):
|
| 1023 |
+
a[ts_key] = 0
|
| 1024 |
+
|
| 1025 |
stats = _save_kb_articles(articles, source_label=f"tutorial:{video_title}")
|
| 1026 |
+
logger.info("Tutorial ingest complete: %d articles, saved=%d, skipped=%d",
|
| 1027 |
+
len(articles), stats["saved"], stats["skipped"])
|
| 1028 |
+
|
| 1029 |
+
return jsonify({
|
| 1030 |
+
"ok": True,
|
| 1031 |
+
"video_title": video_title,
|
| 1032 |
+
"articles_found": len(articles),
|
| 1033 |
+
"articles": articles, # full list — frontend INSERTs to Supabase kb_articles
|
| 1034 |
+
"saved": stats["saved"],
|
| 1035 |
+
"skipped_dupes": stats["skipped"],
|
| 1036 |
+
})
|
| 1037 |
|
| 1038 |
|
| 1039 |
# ══════════════════════════════════════════════════════════════════════════════
|