File size: 11,285 Bytes
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
10e50a5
35b3f62
10e50a5
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259448b
 
 
 
 
 
35b3f62
 
 
 
 
 
 
 
 
 
431c8be
35b3f62
 
44ef3dd
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
90fcd9f
b2cd959
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95d7c79
35b3f62
 
 
 
 
 
 
259448b
35b3f62
 
 
 
 
 
 
 
 
 
6101e3a
35b3f62
 
 
 
 
6101e3a
35b3f62
90fcd9f
5cfc531
6101e3a
35b3f62
 
 
fcd14e1
35b3f62
 
fcd14e1
 
765435c
35b3f62
 
5cfc531
35b3f62
 
fcd14e1
35b3f62
 
 
9529a0d
d210108
35b3f62
 
 
 
 
 
 
 
 
f4af3c4
 
 
 
 
 
 
06d0c78
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b94c715
ab44078
 
 
 
 
35b3f62
 
 
 
 
 
 
ab44078
 
 
 
 
 
 
 
 
 
bf32721
 
5cfc531
ab44078
 
35b3f62
ab44078
35b3f62
 
 
 
ab44078
 
35b3f62
ab44078
 
 
5cfc531
35b3f62
 
5cfc531
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab44078
35b3f62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
from flask import Flask, jsonify, send_file, request, send_from_directory
from flask_cors import CORS
import os, json, uuid, time
import pandas as pd
from datetime import datetime, timedelta
from huggingface_hub import HfApi
import sys
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from system.pledge_tracking import run_pipeline  
from huggingface_hub import hf_hub_download
import spacy
import traceback
import threading

nlp = spacy.load("en_core_web_sm")

app = Flask(__name__, static_folder='.')
CORS(app)

HF_DATASET_REPO = "PledgeTracker/demo_feedback"
HF_TOKEN = os.environ.get("HF_TOKEN")
TMP_DIR = "tmp"
FEEDBACK_DIR = "feedback_logs"
os.makedirs(TMP_DIR, exist_ok=True)
os.makedirs(FEEDBACK_DIR, exist_ok=True)

REFERENCE_PLEDGES = []

REFERENCE_PLEDGE_PATH = hf_hub_download(
        repo_id="PledgeTracker/demo_feedback",    
        filename="existing_pledges.txt",            
        repo_type="dataset",                      
        token=os.environ["HF_TOKEN"]            
    )

if os.path.exists(REFERENCE_PLEDGE_PATH):
    with open(REFERENCE_PLEDGE_PATH, "r") as f:
        REFERENCE_PLEDGES = [line.strip() for line in f if line.strip()]
else:
    print(f"Missing reference pledge file: {REFERENCE_PLEDGE_PATH}")


def lemmatize(text):
    doc = nlp(text)
    return " ".join([token.lemma_ for token in doc if not token.is_punct and not token.is_space])


@app.route("/api/similar-pledges", methods=["POST"])
def similar_pledges():
    data = request.get_json()
    claim = data.get("claim", "").strip()
    if not claim or not REFERENCE_PLEDGES:
        return jsonify({"suggestions": []})

    all_pledges = [claim] + REFERENCE_PLEDGES
    lemmatized_pledges = [lemmatize(p) for p in all_pledges]

    vectorizer = TfidfVectorizer().fit_transform(lemmatized_pledges)
    similarities = cosine_similarity(vectorizer[0:1], vectorizer[1:]).flatten()
    filtered = [(i, similarities[i]) for i in range(len(similarities)) if similarities[i] > 0.3]
    top_filtered = sorted(filtered, key=lambda x: x[1], reverse=True)[:5]

    suggestions = [
        {"text": REFERENCE_PLEDGES[i], "index": int(i)}
        for i, score in top_filtered
    ]

    return jsonify({"suggestions": suggestions})


def calculate_time_range(option: str, pledge_date: str = None):
    today = datetime.today()
    # pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d")

    if isinstance(pledge_date, str):
        pledge_date = datetime.strptime(pledge_date, "%Y-%m-%d")
    elif not isinstance(pledge_date, datetime):
        raise ValueError("pledge_date must be a str or datetime")

    if option == "week":
        one_week_ago = today - timedelta(days=7)
        start = max(one_week_ago, pledge_date)
    elif option == "month":
        one_month_ago = today - timedelta(days=30)
        start = max(one_month_ago, pledge_date)
    elif option == "since_pledge_date":
        if not pledge_date:
            raise ValueError("Pledge date is required for 'since_pledge_date' option")
        start = pledge_date
    else:
        raise ValueError("Invalid time range option")
    print(start, pledge_date)
    return start.strftime("%Y%m%d"), today.strftime("%Y%m%d")

@app.route("/")
def serve_html():
    return send_from_directory('.', 'test.html')

@app.route("/api/status")
def check_status():
    user_id = request.args.get("user_id")
    timestamp = request.args.get("timestamp")
    log_file_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}_status.log")
    if not os.path.exists(log_file_path):
        return jsonify({"status": {}}), 200
    try:
        with open(log_file_path, "r") as f:
            status = json.load(f)
    except Exception:
        status = {}

    return jsonify({"status": status})


@app.route("/api/run-model", methods=["POST"])
def run_model():
    data = request.get_json()
    claim = data.get("claim", "no input")
    time_range_option = data.get("time_range", "month")
    system_start_time = datetime.now()

    suggestion_meta = data.get("suggestion_meta") 
    pledge_date = data.get("pledge_date", "")
    pledge_author = data.get("pledge_author", "")
    timestamp = data.get("timestamp") or time.strftime("%Y-%m-%d_%H-%M-%S")
    user_id = data.get("user_id") or str(uuid.uuid4())[:8]

    log_file_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}_status.log")

    status_lock = threading.Lock()

    def update_status(step_id, msg):
        print(f"[STATUS] Step {step_id}: {msg}")
        with status_lock:
            if os.path.exists(log_file_path):
                try:
                    with open(log_file_path, "r") as f:
                        current = json.load(f)
                except Exception:
                    current = {}
            else:
                current = {}
            current[str(step_id)] = f"{msg}"
            with open(log_file_path, "w") as f:
                json.dump(current, f, indent=2)

    try:
        time_start, time_end = calculate_time_range(time_range_option, pledge_date=pledge_date)
        print(f"[DEMO] Received claim: {claim}")
        print(f"[DEMO] Time range: {time_start} ~ {time_end}")
        print(f"[DEMO] Pledge date range: {pledge_date}")

        # user_id = str(uuid.uuid4())[:8]  
        # outputs = run_pipeline(claim, pledge_date, pledge_author, time_start, timestamp, user_id)


        update_status(0, "πŸ“Œ Starting the system ...")
        print(suggestion_meta)

        outputs = run_pipeline(
                claim, pledge_date, pledge_author, time_start, timestamp, user_id,
                update_fn=update_status, suggestion_meta=suggestion_meta
            )

        df = pd.read_excel(outputs["sorted_events"])
        json_path = os.path.join(TMP_DIR, f"{timestamp}_{user_id}.json")
        df.to_json(json_path, orient="records", indent=2)
        

        system_end_time = datetime.now()
        runtime = system_end_time - system_start_time

        events = df.to_dict(orient="records")
        log_entry = {
            "requested_time": timestamp,
            "user_id": user_id,
            "pledge": claim,
            "suggestion_meta": suggestion_meta,
            "time_start": time_start,
            "time_end": time_end,
            "runtime": runtime.total_seconds(),
            "pledge_author": pledge_author,
            "pledge_date": pledge_date,
            "events": events
        }
        default_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"

        with open(default_log_path, "w") as f:
            f.write(json.dumps(log_entry, indent=1))

        tsv_path = outputs["augmented_tsv_file"]

        try:
            api = HfApi()
            api.upload_file(
                path_or_fileobj=default_log_path,
                path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl",
                repo_id=HF_DATASET_REPO,
                repo_type="dataset",
                token=HF_TOKEN
            )
            api.upload_file(
                path_or_fileobj=tsv_path,
                path_in_repo=f"logs/augmented_{timestamp}_{user_id}.tsv",
                repo_id=HF_DATASET_REPO,
                repo_type="dataset",
                token=HF_TOKEN
            )
            

        except Exception as e:
            traceback.print_exc()
            print(f"[Default Feedback Upload Error] {e}")

        return jsonify({
            "status": "success",
            "file": f"{timestamp}_{user_id}.json",
            "user_id": user_id,
            "timestamp": timestamp
        })
    except Exception as e:
        traceback.print_exc()
        return jsonify({"status": "error", "detail": str(e)}), 500

@app.route("/api/events")
def get_events():
    filename = request.args.get("file")
    file_path = os.path.join(TMP_DIR, filename)

    if not os.path.exists(file_path):
        return jsonify({"error": "File not found"}), 404

    with open(file_path, "r") as f:
        events = json.load(f)

    return jsonify(events)


@app.route("/api/feedback", methods=["POST"])
def receive_feedback():
    data = request.get_json()
    pledge = data.get("pledge", "no_pledge_text")
    feedback_list = data.get("feedback", [])
    filename = data.get("file")
    file_path = os.path.join(TMP_DIR, filename)

    timestamp = data.get("timestamp") 
    user_id = data.get("user_id")

    if not user_id or not timestamp:
        return jsonify({'status': 'error', 'detail': 'Missing user_id or timestamp'}), 400

    if not os.path.exists(file_path):
        return jsonify({"error": "Event file not found"}), 400

    with open(file_path, "r") as f:
        events = json.load(f)

    suggestion_meta = None
    time_start = None
    time_end = None
    try:
        prev_log_path = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
        with open(prev_log_path, "r") as f:
            previous_log = json.load(f)
        suggestion_meta = previous_log.get("suggestion_meta")
        time_start = previous_log.get("time_start")
        time_end = previous_log.get("time_end")
        pledge_author = previous_log.get("pledge_author")
        pledge_date = previous_log.get("pledge_date")
        runtime = previous_log.get("runtime")
    except Exception:
        pass  

    feedback_dict = {int(item['eventIndex']): item['answer'] for item in feedback_list}
    for idx, event in enumerate(events):
        event["user_feedback"] = feedback_dict.get(idx)

    log_entry = {
        "requested_time": timestamp,
        "user_id": user_id,
        "pledge": pledge,
        "suggestion_meta": suggestion_meta,
        "time_start": time_start,
        "time_end": time_end,
        "runtime": runtime,
        "pledge_author": pledge_author,
        "pledge_date": pledge_date,
        "events": events
    }

    local_filename = f"{FEEDBACK_DIR}/feedback_{timestamp}_{user_id}.jsonl"
    with open(local_filename, "w") as f:
        f.write(json.dumps(log_entry, indent=1))

    try:
        api = HfApi()
        api.upload_file(
            path_or_fileobj=local_filename,
            path_in_repo=f"logs/feedback_{timestamp}_{user_id}.jsonl",
            repo_id=HF_DATASET_REPO,
            repo_type="dataset",
            token=HF_TOKEN
        )
    except Exception as e:
        return jsonify({'status': 'partial_success', 'error': str(e)}), 500

    return jsonify({'status': 'success'})


@app.route("/download-feedback/<filename>")
def download_feedback_file(filename):
    return send_from_directory(FEEDBACK_DIR, filename, as_attachment=True)

@app.route("/feedback-files")
def list_feedback_files():
    files = os.listdir(FEEDBACK_DIR)
    return jsonify(sorted(files))

@app.route("/download")
def download_excel():
    file = request.args.get("file")
    if not file:
        return "Missing file param", 400

    json_path = os.path.join(TMP_DIR, file)
    if not os.path.exists(json_path):
        return "Event file not found", 404

    with open(json_path, "r") as f:
        data = json.load(f)

    df = pd.DataFrame(data)
    xlsx_path = os.path.join(TMP_DIR, file.replace(".json", ".xlsx"))
    df.to_excel(xlsx_path, index=False)

    return send_file(xlsx_path, as_attachment=True)


if __name__ == '__main__':
    app.run(host="0.0.0.0", port=7860)