radames's picture
radames HF staff
add transformers emmbeddings and UMAP
9663a4b
from umap_reducer import UMAPReducer
from embeddings_encoder import EmbeddingsEncoder
from flask import Flask, request, render_template, jsonify, make_response
from flask_cors import CORS
import os
from dotenv import load_dotenv
import feedparser
import json
from dateutil import parser
import re
import numpy as np
import gzip
load_dotenv()
app = Flask(__name__, static_url_path='/static')
reducer = UMAPReducer()
encoder = EmbeddingsEncoder()
CORS(app)
@app.route('/')
def index():
return render_template('index.html')
@app.route('/run-umap', methods=['POST'])
def run_umap():
input_data = request.get_json()
sentences = input_data['data']['sentences']
umap_options = input_data['data']['umap_options']
cluster_options = input_data['data']['cluster_options']
print("input options:", umap_options, cluster_options)
try:
embeddings = encoder.encode(sentences)
# UMAP embeddings
reducer.setParams(umap_options, cluster_options)
umap_embeddings = reducer.embed(embeddings)
# HDBScan cluster analysis
clusters = reducer.clusterAnalysis(umap_embeddings)
content = gzip.compress(json.dumps(
{
"embeddings": umap_embeddings.tolist(),
"clusters": clusters.labels_.tolist()
}
).encode('utf8'), 5)
response = make_response(content)
response.headers['Content-length'] = len(content)
response.headers['Content-Encoding'] = 'gzip'
return response
except Exception as e:
return jsonify({"error": str(e)}), 201
if __name__ == '__main__':
app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))