Spaces:
Sleeping
Sleeping
| from flask import Flask, jsonify, send_file, request, send_from_directory | |
| from flask_cors import CORS | |
| import os, json, uuid, time | |
| import pandas as pd | |
| from datetime import datetime, timedelta | |
| from huggingface_hub import HfApi | |
| import sys | |
| from sklearn.feature_extraction.text import TfidfVectorizer | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| from system.pledge_tracking import run_pipeline | |
| from huggingface_hub import hf_hub_download | |
| import spacy | |
| import traceback | |
| import threading | |
| nlp = spacy.load("en_core_web_sm") | |
| app = Flask(__name__, static_folder='.') | |
| CORS(app) | |
| HF_DATASET_REPO = "PledgeTracker/demo_feedback" | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| TMP_DIR = "tmp" | |
| FEEDBACK_DIR = "feedback_logs" | |
| os.makedirs(TMP_DIR, exist_ok=True) | |
| os.makedirs(FEEDBACK_DIR, exist_ok=True) | |
| REFERENCE_PLEDGES = [] | |
| REFERENCE_PLEDGE_PATH = hf_hub_download( | |
| repo_id="PledgeTracker/demo_feedback", | |
| filename="existing_pledges.txt", | |
| repo_type="dataset", | |
| token=os.environ["HF_TOKEN"] | |
| ) | |
| if os.path.exists(REFERENCE_PLEDGE_PATH): | |
| with open(REFERENCE_PLEDGE_PATH, "r") as f: | |
| REFERENCE_PLEDGES = [line.strip() for line in f if line.strip()] | |
| else: | |
| print(f"Missing reference pledge file: {REFERENCE_PLEDGE_PATH}") | |
| def lemmatize(text): | |
| doc = nlp(text) | |
| return " ".join([token.lemma_ for token in doc if not token.is_punct and not token.is_space]) | |
| def similar_pledges(): | |
| data = request.get_json() | |
| claim = data.get("claim", "").strip() | |
| if not claim or not REFERENCE_PLEDGES: | |
| return jsonify({"suggestions": []}) | |
| all_pledges = [claim] + REFERENCE_PLEDGES | |
| lemmatized_pledges = [lemmatize(p) for p in all_pledges] | |
| vectorizer = TfidfVectorizer().fit_transform(lemmatized_pledges) | |
| similarities = cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten() | |
| filtered = [(i, similarities[i]) for i in range(len(similarities)) if similarities[i] > 0.3] | |
| top_filtered = sorted(filtered, key=lambda x: x[1], reverse=True)[:5] | |
| suggestions = [ | |
| {"text": REFERENCE_PLEDGES[i], "index": int(i)} | |
| for i, score in top_filtered | |
| ] | |
| return jsonify({"suggestions": suggestions}) | |
| def calculate_time_range(option: str, pledge_date: str = None): | |
| today = datetime.today() | |
| # pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d") | |
| if isinstance(pledge_date, str): | |
| pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d") | |
| elif not isinstance(pledge_date, datetime): | |
| raise ValueError("pledge_date must be a str or datetime") | |
| if option == "week": | |
| one_week_ago = today - timedelta(days=7) | |
| start = max(one_week_ago, pledge_date) | |
| elif option == "month": | |
| one_month_ago = today - timedelta(days=30) | |
| start = max(one_month_ago, pledge_date) | |
| elif option == "since_pledge_date": | |
| if not pledge_date: | |
| raise ValueError("Pledge date is required for 'since_pledge_date' option") | |
| start = pledge_date | |
| else: | |
| raise ValueError("Invalid time range option") | |
| print(start, pledge_date) | |
| return start.strftime("%Y%m%d"), today.strftime("%Y%m%d") | |
| def serve_html(): | |
| return send_from_directory('.', 'test.html') | |
| def check_status(): | |
| user_id = request.args.get("user_id") | |
| timestamp = request.args.get("timestamp") | |
| log_file_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}_status.log") | |
| if not os.path.exists(log_file_path): | |
| return jsonify({"status": {}}), 200 | |
| try: | |
| with open(log_file_path, "r") as f: | |
| status = json.load(f) | |
| except Exception: | |
| status = {} | |
| return jsonify({"status": status}) | |
| def run_model(): | |
| data = request.get_json() | |
| claim = data.get("claim", "no input") | |
| time_range_option = data.get("time_range", "month") | |
| system_start_time = datetime.now() | |
| suggestion_meta = data.get("suggestion_meta") | |
| pledge_date = data.get("pledge_date", "") | |
| pledge_author = data.get("pledge_author", "") | |
| timestamp = data.get("timestamp") or time.strftime("%Y-%m-%d_%H-%M-%S") | |
| user_id = data.get("user_id") or str(uuid.uuid4())[:8] | |
| log_file_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}_status.log") | |
| status_lock = threading.Lock() | |
| def update_status(step_id, msg): | |
| print(f"[STATUS] Step {step_id}: {msg}") | |
| with status_lock: | |
| if os.path.exists(log_file_path): | |
| try: | |
| with open(log_file_path, "r") as f: | |
| current = json.load(f) | |
| except Exception: | |
| current = {} | |
| else: | |
| current = {} | |
| current[str(step_id)] = f"{msg}" | |
| with open(log_file_path, "w") as f: | |
| json.dump(current, f, indent=2) | |
| try: | |
| time_start, time_end = calculate_time_range(time_range_option, pledge_date=pledge_date) | |
| print(f"[DEMO] Received claim: {claim}") | |
| print(f"[DEMO] Time range: {time_start} ~ {time_end}") | |
| print(f"[DEMO] Pledge date range: {pledge_date}") | |
| # user_id = str(uuid.uuid4())[:8] | |
| # outputs = run_pipeline(claim, pledge_date, pledge_author, time_start, timestamp, user_id) | |
| update_status(0, "📌 Starting the system ...") | |
| print(suggestion_meta) | |
| outputs = run_pipeline( | |
| claim, pledge_date, pledge_author, time_start, timestamp, user_id, | |
| update_fn=update_status, suggestion_meta=suggestion_meta | |
| ) | |
| df = pd.read_excel(outputs["sorted_events"]) | |
| json_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}.json") | |
| df.to_json(json_path, orient="records", indent=2) | |
| system_end_time = datetime.now() | |
| runtime = system_end_time - system_start_time | |
| events = df.to_dict(orient="records") | |
| log_entry = { | |
| "requested_time": timestamp, | |
| "user_id": user_id, | |
| "pledge": claim, | |
| "suggestion_meta": suggestion_meta, | |
| "time_start": time_start, | |
| "time_end": time_end, | |
| "runtime": runtime.total_seconds(), | |
| "pledge_author": pledge_author, | |
| "pledge_date": pledge_date, | |
| "events": events | |
| } | |
| default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl" | |
| with open(default_log_path, "w") as f: | |
| f.write(json.dumps(log_entry, indent=1)) | |
| tsv_path = outputs["augmented_tsv_file"] | |
| try: | |
| api = HfApi() | |
| api.upload_file( | |
| path_or_fileobj=default_log_path, | |
| path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl", | |
| repo_id=HF_DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| api.upload_file( | |
| path_or_fileobj=tsv_path, | |
| path_in_repo=f"logs/augmented_{timestamp}_{user_id}.tsv", | |
| repo_id=HF_DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| traceback.print_exc() | |
| print(f"[Default Feedback Upload Error] {e}") | |
| return jsonify({ | |
| "status": "success", | |
| "file": f"{timestamp}_{user_id}.json", | |
| "user_id": user_id, | |
| "timestamp": timestamp | |
| }) | |
| except Exception as e: | |
| traceback.print_exc() | |
| return jsonify({"status": "error", "detail": str(e)}), 500 | |
| def get_events(): | |
| filename = request.args.get("file") | |
| file_path = os.path.join(TMP_DIR, filename) | |
| if not os.path.exists(file_path): | |
| return jsonify({"error": "File not found"}), 404 | |
| with open(file_path, "r") as f: | |
| events = json.load(f) | |
| return jsonify(events) | |
| def receive_feedback(): | |
| data = request.get_json() | |
| pledge = data.get("pledge", "no_pledge_text") | |
| feedback_list = data.get("feedback", []) | |
| filename = data.get("file") | |
| file_path = os.path.join(TMP_DIR, filename) | |
| timestamp = data.get("timestamp") | |
| user_id = data.get("user_id") | |
| if not user_id or not timestamp: | |
| return jsonify({'status': 'error', 'detail': 'Missing user_id or timestamp'}), 400 | |
| if not os.path.exists(file_path): | |
| return jsonify({"error": "Event file not found"}), 400 | |
| with open(file_path, "r") as f: | |
| events = json.load(f) | |
| suggestion_meta = None | |
| time_start = None | |
| time_end = None | |
| try: | |
| prev_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl" | |
| with open(prev_log_path, "r") as f: | |
| previous_log = json.load(f) | |
| suggestion_meta = previous_log.get("suggestion_meta") | |
| time_start = previous_log.get("time_start") | |
| time_end = previous_log.get("time_end") | |
| pledge_author = previous_log.get("pledge_author") | |
| pledge_date = previous_log.get("pledge_date") | |
| runtime = previous_log.get("runtime") | |
| except Exception: | |
| pass | |
| feedback_dict = {int(item['eventIndex']): item['answer'] for item in feedback_list} | |
| for idx, event in enumerate(events): | |
| event["user_feedback"] = feedback_dict.get(idx) | |
| log_entry = { | |
| "requested_time": timestamp, | |
| "user_id": user_id, | |
| "pledge": pledge, | |
| "suggestion_meta": suggestion_meta, | |
| "time_start": time_start, | |
| "time_end": time_end, | |
| "runtime": runtime, | |
| "pledge_author": pledge_author, | |
| "pledge_date": pledge_date, | |
| "events": events | |
| } | |
| local_filename = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl" | |
| with open(local_filename, "w") as f: | |
| f.write(json.dumps(log_entry, indent=1)) | |
| try: | |
| api = HfApi() | |
| api.upload_file( | |
| path_or_fileobj=local_filename, | |
| path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl", | |
| repo_id=HF_DATASET_REPO, | |
| repo_type="dataset", | |
| token=HF_TOKEN | |
| ) | |
| except Exception as e: | |
| return jsonify({'status': 'partial_success', 'error': str(e)}), 500 | |
| return jsonify({'status': 'success'}) | |
| def download_feedback_file(filename): | |
| return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True) | |
| def list_feedback_files(): | |
| files = os.listdir(FEEDBACK_DIR) | |
| return jsonify(sorted(files)) | |
| def download_excel(): | |
| file = request.args.get("file") | |
| if not file: | |
| return "Missing file param", 400 | |
| json_path = os.path.join(TMP_DIR, file) | |
| if not os.path.exists(json_path): | |
| return "Event file not found", 404 | |
| with open(json_path, "r") as f: | |
| data = json.load(f) | |
| df = pd.DataFrame(data) | |
| xlsx_path = os.path.join(TMP_DIR, file.replace(".json", ".xlsx")) | |
| df.to_excel(xlsx_path, index=False) | |
| return send_file(xlsx_path, as_attachment=True) | |
| if __name__ == '__main__': | |
| app.run(host="0.0.0.0", port=7860) | |