File size: 3,148 Bytes
729d130
d84c926
729d130
 
d84c926
729d130
804ddc3
729d130
 
 
 
d84c926
 
 
 
 
 
 
 
 
 
45d03f9
d84c926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45d03f9
729d130
 
 
 
45d03f9
729d130
 
 
 
 
ef23101
d84c926
ac7cbfc
 
 
d84c926
 
 
 
729d130
 
45d03f9
d84c926
 
 
 
 
 
 
 
 
 
45d03f9
d84c926
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45d03f9
729d130
32c50d0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import json
import os
from flask import Flask, jsonify, request
from semantic_search import SemanticSearch
from datetime import datetime

search = SemanticSearch()

app = Flask(__name__)
app.config['JSON_AS_ASCII'] = False

# Set the path for log files
LOGS_BASE_PATH = os.getenv("LOGS_BASE_PATH", "logs")

# Create logs directory if it doesn't exist
if not os.path.exists(LOGS_BASE_PATH):
    os.makedirs(LOGS_BASE_PATH)

# Check if logs are enabled
ENABLE_LOGS = os.getenv("ENABLE_LOGS", "0") == "1"


def log_query_result(query, top, request_id, result):
    if not ENABLE_LOGS:
        return

    timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
    log_file_path = os.path.join(LOGS_BASE_PATH, f"{timestamp}.json")

    log_data = {
        "timestamp": timestamp,
        "query": query,
        "top": top,
        "request_id": request_id,
        "result": result
    }

    with open(log_file_path, 'w') as log_file:
        json.dump(log_data, log_file, indent=2)


@app.route('/health', methods=['GET'])
def health():
    return jsonify({"status": "ok"})


@app.route('/search', methods=['POST'])
def search_route():
    data = request.get_json()
    query = data.get('query', '')
    top = data.get('top', 10)
    use_llm_for_teasers = data.get('use_llm_for_teasers', False)
    request_id = data.get('request_id', '')
    titles, docs, teasers, scores = search.search(query, top, use_llm_for_teasers)
    result = [{'title': str(item1), 'text': str(item2), 'teaser': (item3), 'relevance': str(item4)}
              for item1, item2, item3, item4 in zip(titles, docs, teasers, scores)]

    # Log the query and result if ENABLE_LOGS is True
    log_query_result(query, top, request_id, result)

    return jsonify(result)


@app.route('/read_logs', methods=['GET'])
def read_logs():
    logs = []
    for log_file in os.listdir(LOGS_BASE_PATH):
        if log_file.endswith(".json"):
            with open(os.path.join(LOGS_BASE_PATH, log_file), 'r') as file:
                log_data = json.load(file)
                logs.append(log_data)
    return jsonify(logs)


@app.route('/analyze_logs', methods=['GET'])
def analyze_logs():
    logs_by_query_top = {}
    for log_file in os.listdir(LOGS_BASE_PATH):
        if log_file.endswith(".json"):
            with open(os.path.join(LOGS_BASE_PATH, log_file), 'r') as file:
                log_data = json.load(file)
                query = log_data.get("query", "")
                top = log_data.get("top", "")
                request_id = log_data.get("request_id", "")

                # Group logs by query and top
                key = f"{query}_{top}"
                if key not in logs_by_query_top:
                    logs_by_query_top[key] = []
                logs_by_query_top[key].append(log_data)

    # Analyze logs and filter out logs with different results for the same query and top
    invalid_logs = []
    for key, logs in logs_by_query_top.items():
        if len(set(json.dumps(log['result']) for log in logs)) > 1:
            invalid_logs.extend(logs)

    return jsonify(invalid_logs)


if __name__ == '__main__':
    app.run(debug=False, host='0.0.0.0')