Spaces:

chitech2026
/

sandbox_dev

Sleeping

App Files Files Community

igortech commited on Sep 16, 2025

Commit

92c739f

verified ·

1 Parent(s): 19601ea

Update app.py

Browse files

Files changed (1) hide show

app.py +84 -217

app.py CHANGED Viewed

@@ -3,8 +3,8 @@ import os
 import re
 import csv
 import tempfile
-from difflib import SequenceMatcher
 import datetime
 import gradio as gr
 # -----------------------------
@@ -12,260 +12,127 @@ import gradio as gr
 # -----------------------------
 DATA_PATH = "quotes.json"
 def load_dataset():
     if os.path.exists(DATA_PATH):
         with open(DATA_PATH, "r", encoding="utf-8") as f:
-            data = json.load(f)
-            if "staged_responses" not in data:
-                data["staged_responses"] = []
-            return data
     return {"staged_responses": []}
 dataset = load_dataset()
 # -----------------------------
-# Matching helpers
 # -----------------------------
-def normalize_text(s: str) -> str:
-    return re.sub(r"\W+", " ", (s or "").lower()).strip()
-def tokens(s: str):
-    return set(t for t in normalize_text(s).split() if t)
-def score_quote(user_input: str, quote_text: str):
-    """
-    Score a quote vs user input:
-      - token overlap yields a boosted score
-      - otherwise fallback to SequenceMatcher ratio
-    """
-    u_toks = tokens(user_input)
-    q_toks = tokens(quote_text)
-    overlap = len(u_toks & q_toks)
-    if overlap > 0:
-        return 1.0 + (overlap / max(1, len(q_toks)))
-    return SequenceMatcher(None, user_input.lower(), quote_text.lower()).ratio()
-def find_best_quotes(category, user_input, top_n=3, threshold=0.15):
-    """
-    Find best matches:
-     - try within `category` first (if provided)
-     - if none above `threshold`, search across all categories
-     - return list of tuples (score, quote, category)
-    """
-    if not user_input or not user_input.strip():
-        return []
-    def score_list_for_cat(cat):
-        scored = []
-        for item in dataset.get(cat, []):
-            q = item.get("quote", "")
-            s = score_quote(user_input, q)
-            scored.append((s, q, cat))
-        return scored
-    # 1) search selected category first (if present)
-    if category and category in dataset and category != "staged_responses":
-        scored = score_list_for_cat(category)
-        scored.sort(key=lambda x: x[0], reverse=True)
-        if scored and scored[0][0] >= threshold:
-            return scored[:top_n]
-    # 2) fallback: search all categories
-    all_scored = []
-    for cat in dataset.keys():
-        if cat == "staged_responses":
             continue
-        all_scored.extend(score_list_for_cat(cat))
-    all_scored.sort(key=lambda x: x[0], reverse=True)
-    if all_scored and all_scored[0][0] >= threshold:
-        return all_scored[:top_n]
-    # 3) nothing matches well enough
-    return []
-# -----------------------------
-# Response generation
-# -----------------------------
-def generate_three_fold(category, user_text):
-    matches = find_best_quotes(category, user_text, top_n=3, threshold=0.15)
     if not matches:
-        unknown_msg = f"No data about {user_text} (unknown)."
-        return unknown_msg, unknown_msg, "Reference: None"
-    top_quote = matches[0][1]
-    first_sentence = top_quote.split(".")[0].strip()
-    summary = f"Summary: {first_sentence}."
-    fused = " ".join(dict.fromkeys([m[1] for m in matches]))  # unique preserve order
-    fusion = f"Fusion: {fused}"
-    top_cat = matches[0][2]
-    reference = f"Reference: Example search for '{category}' (top match from '{top_cat}')."
-    return summary, fusion, reference
 # -----------------------------
-# Conversation & staging utilities
 # -----------------------------
-def append_user_assistant(history, user_text, assistant_text):
-    history = history or []
-    history.append({"role": "user", "content": user_text})
-    history.append({"role": "assistant", "content": assistant_text})
-    return history
-def get_last_user_and_assistant(history):
-    last_user = None
-    last_assistant = None
     if not history:
-        return None, None
-    # find last user and assistant after it
-    # traverse backwards to find last user; then find next assistant after that index
-    last_user_idx = None
-    for i in range(len(history)-1, -1, -1):
-        if history[i].get("role") == "user":
-            last_user_idx = i
-            last_user = history[i].get("content")
-            break
-    if last_user_idx is not None:
-        # find assistant after user (forward from user index)
-        for j in range(last_user_idx+1, len(history)):
-            if history[j].get("role") == "assistant":
-                last_assistant = history[j].get("content")
-                break
-    return last_user, last_assistant
 # -----------------------------
-# Temp file helpers
 # -----------------------------
-def write_temp_json(obj, suffix=".json"):
-    tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
-    path = tf.name
-    tf.close()
-    with open(path, "w", encoding="utf-8") as f:
-        json.dump(obj, f, indent=2, ensure_ascii=False)
-    return path
-def write_temp_csv_from_history(history, suffix=".csv"):
     if not history:
         return None
-    tf = tempfile.NamedTemporaryFile(delete=False, suffix=suffix)
-    path = tf.name
-    tf.close()
-    with open(path, "w", newline="", encoding="utf-8") as f:
-        writer = csv.writer(f)
-        writer.writerow(["role", "content"])
-        for m in history:
-            writer.writerow([m.get("role",""), m.get("content","")])
-    return path
-# -----------------------------
-# Gradio callbacks (UI-safe)
-# -----------------------------
-def respond(message, state, category):
-    """
-    Called by Send button or Enter.
-    Returns: cleared input, updated state, updated chatbot display (state replicated)
-    """
-    history = state or []
-    if not (message and message.strip()):
-        return "", history, history
-    summary, fusion, reference = generate_three_fold(category, message)
-    assistant_text = f"{summary}\n\n{fusion}\n\n{reference}"
-    history = append_user_assistant(history, message, assistant_text)
-    return "", history, history
-def clear_all():
-    # clear textbox, state and chatbot
-    return "", [], []
-def upload_json(filepath):
-    """Load uploaded dataset file (filepath is local path inside container)"""
-    global dataset, DATA_PATH
-    try:
-        with open(filepath, "r", encoding="utf-8") as f:
-            data = json.load(f)
-        if not isinstance(data, dict):
-            return "Upload failed: root must be an object", gr.update(choices=sorted(list(dataset.keys())), value=None)
-        if "staged_responses" not in data:
-            data["staged_responses"] = []
-        dataset = data
-        DATA_PATH = os.path.basename(filepath)
-        cats = sorted([k for k in dataset.keys() if k != "staged_responses"])
-        status = f"Loaded {len(cats)} categories from {DATA_PATH}."
-        return status, gr.update(choices=cats, value=(cats[0] if cats else None))
-    except Exception as e:
-        return f"Error loading file: {e}", gr.update(choices=sorted(list(dataset.keys())), value=None)
-def stage_last_conversation(state, target_category):
-    """
-    Stage the last user + assistant pair into dataset['staged_responses']
-    (stored as {"question":..., "answer":..., "category":...})
-    """
-    if not state:
-        return "No conversation in memory."
-    last_user, last_assistant = get_last_user_and_assistant(state)
-    if not last_user:
-        return "No user message to stage."
-    entry = {"question": last_user, "answer": last_assistant or "", "category": target_category}
-    if "staged_responses" not in dataset:
-        dataset["staged_responses"] = []
-    dataset["staged_responses"].append(entry)
-    return f"Staged last Q/A into '{target_category}'."
-def download_conversation_csv(state):
-    path = write_temp_csv_from_history(state or [])
-    if not path:
-        return gr.File.update(value=None)
-    return gr.File.update(value=path)
-def download_current_dataset():
-    path = write_temp_json(dataset, suffix=".json")
-    return gr.File.update(value=path)
 # -----------------------------
-# Gradio UI (components + wiring)
 # -----------------------------
 with gr.Blocks() as demo:
-    gr.Markdown("## Campus Life — 3-fold responses, staging, CSV/JSON downloads")
-    # dropdown choices exclude staged_responses
-    category_choices = sorted([k for k in dataset.keys() if k != "staged_responses"])
-    with gr.Row():
-        category = gr.Dropdown(label="Category", choices=category_choices,
-                               value=(category_choices[0] if category_choices else None))
     chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
-    conversation_state = gr.State([])  # holds list of {"role":..,"content":..}
-    msg = gr.Textbox(label="Your message", placeholder="Type and press Enter (or click Send)", autofocus=True)
-    send = gr.Button("Send")
-    clear = gr.Button("Clear")
     with gr.Row():
-        stage_btn = gr.Button("Stage last Q/A to category")
-        stage_status = gr.Textbox(label="Stage status", interactive=False, value="")
     with gr.Row():
-        upload = gr.File(label="Upload dataset (.json)", file_types=[".json"], type="filepath")
-        upload_status = gr.Textbox(label="Upload status", interactive=False, value="")
-        download_json_btn = gr.Button("Download current dataset (JSON)")
-        download_json_file = gr.File(label="Download JSON", interactive=True)
-        download_csv_btn = gr.Button("Download conversation (CSV)")
-        download_csv_file = gr.File(label="Download CSV", interactive=True)
-    # events
-    msg.submit(respond, [msg, conversation_state, category], [msg, conversation_state, chatbot])
-    send.click(respond, [msg, conversation_state, category], [msg, conversation_state, chatbot])
-    clear.click(clear_all, [], [msg, conversation_state, chatbot])
-    stage_btn.click(stage_last_conversation, [conversation_state, category], stage_status)
-    upload.upload(upload_json, upload, [upload_status, category])
-    download_csv_btn.click(download_conversation_csv, [conversation_state], download_csv_file)
-    download_json_btn.click(download_current_dataset, None, download_json_file)
-# -----------------------------
-# Startup log
-# -----------------------------
-print("===== Application startup =====")
-print(f"Dataset categories: {[k for k in dataset.keys()]}")
 if __name__ == "__main__":
-    demo.launch(server_name="0.0.0.0", server_port=7860)

 import re
 import csv
 import tempfile
 import datetime
+from difflib import SequenceMatcher
 import gradio as gr
 # -----------------------------
 # -----------------------------
 DATA_PATH = "quotes.json"
 def load_dataset():
     if os.path.exists(DATA_PATH):
         with open(DATA_PATH, "r", encoding="utf-8") as f:
+            return json.load(f)
     return {"staged_responses": []}
+def save_dataset(data):
+    with open(DATA_PATH, "w", encoding="utf-8") as f:
+        json.dump(data, f, indent=2, ensure_ascii=False)
 dataset = load_dataset()
 # -----------------------------
+# Core logic
 # -----------------------------
+def find_best_matches(user_input, dataset, top_n=3, threshold=0.3):
+    matches = []
+    for category, quotes in dataset.items():
+        if category == "staged_responses":
             continue
+        for entry in quotes:
+            quote = entry["quote"]
+            score = SequenceMatcher(None, user_input.lower(), quote.lower()).ratio()
+            matches.append((score, category, quote))
+    matches.sort(key=lambda x: x[0], reverse=True)
+    return [m for m in matches if m[0] >= threshold][:top_n]
+def generate_response(message, history):
+    matches = find_best_matches(message, dataset)
     if not matches:
+        return (
+            history
+            + [{"role": "assistant", "content": f"No data about {message}."}]
+        )
+    responses = []
+    for score, category, quote in matches:
+        responses.append(f"Category: {category}\nWhat real people say:\n{quote}")
+    reply = "\n\n".join(responses)
+    return history + [{"role": "assistant", "content": reply}]
 # -----------------------------
+# Conversation & staging
 # -----------------------------
+def stage_conversation(history, category):
     if not history:
+        return "No conversation to stage."
+    convo_text = "\n".join([f"{msg['role']}: {msg['content']}" for msg in history])
+    new_entry = {"quote": convo_text}
+    if "staged_responses" not in dataset:
+        dataset["staged_responses"] = []
+    dataset["staged_responses"].append(new_entry)
+    save_dataset(dataset)
+    return f"Conversation staged under {category}."
 # -----------------------------
+# Download helpers
 # -----------------------------
+def download_conversation_csv(history):
     if not history:
         return None
+    tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".csv", mode="w", newline="", encoding="utf-8")
+    writer = csv.writer(tmpfile)
+    writer.writerow(["role", "content"])
+    for msg in history:
+        writer.writerow([msg["role"], msg["content"]])
+    tmpfile.close()
+    return tmpfile.name
+def download_dataset():
+    tmpfile = tempfile.NamedTemporaryFile(delete=False, suffix=".json", mode="w", encoding="utf-8")
+    json.dump(dataset, tmpfile, indent=2, ensure_ascii=False)
+    tmpfile.close()
+    return tmpfile.name
 # -----------------------------
+# Gradio UI
 # -----------------------------
 with gr.Blocks() as demo:
+    gr.Markdown("# Campus Conversation Bot")
     chatbot = gr.Chatbot(label="Conversation", height=360, type="messages")
+    msg = gr.Textbox(label="Type your question", placeholder="Ask me something...", container=True)
     with gr.Row():
+        clear_btn = gr.Button("Clear")
+        export_csv_btn = gr.Button("Export Conversation to CSV")
+        download_json_btn = gr.Button("Download Current Dataset")
     with gr.Row():
+        category_dropdown = gr.Dropdown(choices=list(dataset.keys()), label="Choose category to stage", interactive=True)
+        stage_btn = gr.Button("Stage Conversation to Category")
+    # Events
+    msg.submit(generate_response, [msg, chatbot], chatbot)
+    msg.submit(lambda: "", None, msg)  # clear textbox on Enter
+    clear_btn.click(lambda: [], None, chatbot)
+    export_csv_file = gr.File(label="Download Conversation CSV")
+    export_csv_btn.click(download_conversation_csv, chatbot, export_csv_file)
+    download_json_file = gr.File(label="Download Dataset JSON")
+    download_json_btn.click(download_dataset, None, download_json_file)
+    stage_btn.click(stage_conversation, [chatbot, category_dropdown], None)
 if __name__ == "__main__":
+    demo.launch()