Spaces:
Runtime error
Runtime error
from umap_reducer import UMAPReducer | |
from embeddings_encoder import EmbeddingsEncoder | |
from flask import Flask, request, render_template, jsonify, make_response, session | |
from flask_session import Session | |
from flask_cors import CORS, cross_origin | |
import os | |
from dotenv import load_dotenv | |
import feedparser | |
import json | |
from dateutil import parser | |
import re | |
import numpy as np | |
import gzip | |
import hashlib | |
load_dotenv() | |
app = Flask(__name__, static_url_path='/static') | |
app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY") | |
app.config["SESSION_PERMANENT"] = True | |
app.config["SESSION_TYPE"] = "filesystem" | |
app.config["SESSION_COOKIE_SAMESITE"] = "None" | |
app.config["SESSION_COOKIE_SECURE"] = True | |
Session(app) | |
CORS(app) | |
reducer = UMAPReducer() | |
encoder = EmbeddingsEncoder() | |
def index(): | |
return render_template('index.html') | |
def run_umap(): | |
input_data = request.get_json() | |
sentences = input_data['data']['sentences'] | |
umap_options = input_data['data']['umap_options'] | |
cluster_options = input_data['data']['cluster_options'] | |
# create unique hash for input, avoid recalculating embeddings | |
sentences_input_hash = hashlib.sha256( | |
''.join(sentences).encode("utf-8")).hexdigest() | |
print("input options:", sentences_input_hash, | |
umap_options, cluster_options, "\n\n") | |
try: | |
if not session.get(sentences_input_hash): | |
print("New input, calculating embeddings" "\n\n") | |
embeddings = encoder.encode(sentences) | |
session[sentences_input_hash] = embeddings.tolist() | |
else: | |
print("Input already calculated, using cached embeddings", "\n\n") | |
embeddings = session[sentences_input_hash] | |
# UMAP embeddings | |
reducer.setParams(umap_options, cluster_options) | |
umap_embeddings = reducer.embed(embeddings) | |
# HDBScan cluster analysis | |
clusters = reducer.clusterAnalysis(umap_embeddings) | |
content = gzip.compress(json.dumps( | |
{ | |
"embeddings": umap_embeddings.tolist(), | |
"clusters": clusters.labels_.tolist() | |
} | |
).encode('utf8'), 5) | |
response = make_response(content) | |
response.headers['Content-length'] = len(content) | |
response.headers['Content-Encoding'] = 'gzip' | |
return response | |
except Exception as e: | |
return jsonify({"error": str(e)}), 400 | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860))) | |