radames HF staff commited on
Commit
6bbf952
1 Parent(s): 021ceab
Files changed (6) hide show
  1. .gitignore +2 -1
  2. README.md +2 -4
  3. app.py +27 -6
  4. main.py +0 -3
  5. requirements.txt +3 -2
  6. templates/index.html +103 -2
.gitignore CHANGED
@@ -24,4 +24,5 @@ htmlcov/
24
  .coverage.*
25
  *,cover
26
  venv
27
- *_cache.json
 
24
  .coverage.*
25
  *,cover
26
  venv
27
+ *_cache.json
28
+ flask_session/
README.md CHANGED
@@ -1,11 +1,9 @@
1
  ---
2
- title: News Clustering Observable
3
- emoji: 🚀
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
9
  ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces#reference
1
  ---
2
+ title: Sentence Embeddings Visualization
3
+ emoji: 📈
4
  colorFrom: green
5
  colorTo: indigo
6
  sdk: gradio
7
  app_file: app.py
8
  pinned: false
9
  ---
 
 
app.py CHANGED
@@ -1,7 +1,8 @@
1
  from umap_reducer import UMAPReducer
2
  from embeddings_encoder import EmbeddingsEncoder
3
- from flask import Flask, request, render_template, jsonify, make_response
4
- from flask_cors import CORS
 
5
  import os
6
  from dotenv import load_dotenv
7
  import feedparser
@@ -10,14 +11,22 @@ from dateutil import parser
10
  import re
11
  import numpy as np
12
  import gzip
 
13
 
14
  load_dotenv()
15
 
16
 
17
  app = Flask(__name__, static_url_path='/static')
 
 
 
 
 
 
 
 
18
  reducer = UMAPReducer()
19
  encoder = EmbeddingsEncoder()
20
- CORS(app)
21
 
22
 
23
  @app.route('/')
@@ -26,15 +35,27 @@ def index():
26
 
27
 
28
  @app.route('/run-umap', methods=['POST'])
 
29
  def run_umap():
30
  input_data = request.get_json()
31
  sentences = input_data['data']['sentences']
32
  umap_options = input_data['data']['umap_options']
33
  cluster_options = input_data['data']['cluster_options']
 
 
 
34
 
35
- print("input options:", umap_options, cluster_options)
 
36
  try:
37
- embeddings = encoder.encode(sentences)
 
 
 
 
 
 
 
38
  # UMAP embeddings
39
  reducer.setParams(umap_options, cluster_options)
40
  umap_embeddings = reducer.embed(embeddings)
@@ -51,7 +72,7 @@ def run_umap():
51
  response.headers['Content-Encoding'] = 'gzip'
52
  return response
53
  except Exception as e:
54
- return jsonify({"error": str(e)}), 201
55
 
56
 
57
  if __name__ == '__main__':
1
  from umap_reducer import UMAPReducer
2
  from embeddings_encoder import EmbeddingsEncoder
3
+ from flask import Flask, request, render_template, jsonify, make_response, session
4
+ from flask_session import Session
5
+ from flask_cors import CORS, cross_origin
6
  import os
7
  from dotenv import load_dotenv
8
  import feedparser
11
  import re
12
  import numpy as np
13
  import gzip
14
+ import hashlib
15
 
16
  load_dotenv()
17
 
18
 
19
  app = Flask(__name__, static_url_path='/static')
20
+ app.config["SECRET_KEY"] = os.environ.get("SECRET_KEY")
21
+ app.config["SESSION_PERMANENT"] = True
22
+ app.config["SESSION_TYPE"] = "filesystem"
23
+ app.config["SESSION_COOKIE_SAMESITE"] = "None"
24
+ app.config["SESSION_COOKIE_SECURE"] = True
25
+ Session(app)
26
+ CORS(app)
27
+
28
  reducer = UMAPReducer()
29
  encoder = EmbeddingsEncoder()
 
30
 
31
 
32
  @app.route('/')
35
 
36
 
37
  @app.route('/run-umap', methods=['POST'])
38
+ @cross_origin(supports_credentials=True)
39
  def run_umap():
40
  input_data = request.get_json()
41
  sentences = input_data['data']['sentences']
42
  umap_options = input_data['data']['umap_options']
43
  cluster_options = input_data['data']['cluster_options']
44
+ # create unique hash for input, avoid recalculating embeddings
45
+ sentences_input_hash = hashlib.sha256(
46
+ ''.join(sentences).encode("utf-8")).hexdigest()
47
 
48
+ print("input options:", sentences_input_hash,
49
+ umap_options, cluster_options, "\n\n")
50
  try:
51
+ if not session.get(sentences_input_hash):
52
+ print("New input, calculating embeddings" "\n\n")
53
+ embeddings = encoder.encode(sentences)
54
+ session[sentences_input_hash] = embeddings.tolist()
55
+ else:
56
+ print("Input already calculated, using cached embeddings", "\n\n")
57
+ embeddings = session[sentences_input_hash]
58
+
59
  # UMAP embeddings
60
  reducer.setParams(umap_options, cluster_options)
61
  umap_embeddings = reducer.embed(embeddings)
72
  response.headers['Content-Encoding'] = 'gzip'
73
  return response
74
  except Exception as e:
75
+ return jsonify({"error": str(e)}), 400
76
 
77
 
78
  if __name__ == '__main__':
main.py DELETED
@@ -1,3 +0,0 @@
1
- import subprocess
2
-
3
- subprocess.run(["make", "build-all"], shell=False)
 
 
 
requirements.txt CHANGED
@@ -1,10 +1,11 @@
1
  feedparser==6.0.8
2
  Flask==2.0.3
3
  flask_cors==3.0.10
 
4
  hdbscan==0.8.28
5
  numpy==1.22.2
6
  python-dotenv==0.19.2
7
  python_dateutil==2.8.2
 
8
  transformers==4.16.2
9
- umap-learn==0.5.2
10
- torch
1
  feedparser==6.0.8
2
  Flask==2.0.3
3
  flask_cors==3.0.10
4
+ flask_session==0.4.0
5
  hdbscan==0.8.28
6
  numpy==1.22.2
7
  python-dotenv==0.19.2
8
  python_dateutil==2.8.2
9
+ torch==1.10.2
10
  transformers==4.16.2
11
+ umap-learn==0.5.2
 
templates/index.html CHANGED
@@ -11,16 +11,117 @@
11
  rel="stylesheet"
12
  href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
13
  />
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  </head>
15
  <body>
16
- <div id="observablehq-3f13b363"></div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  <script type="module">
18
  import {
19
  Runtime,
20
  Inspector,
21
  } from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
22
  import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
23
- new Runtime().module(define, Inspector.into("#observablehq-3f13b363"));
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  </script>
25
  </body>
26
  </html>
11
  rel="stylesheet"
12
  href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
13
  />
14
+ <style>
15
+ @import url("https://fonts.googleapis.com/css2?family=Roboto&family=Source+Serif+4:wght@700&display=swap");
16
+ </style>
17
+ <style>
18
+ .mw8 {
19
+ max-width: 64rem;
20
+ }
21
+ .mx-auto {
22
+ margin-left: auto;
23
+ margin-right: auto;
24
+ }
25
+ .ph3 {
26
+ padding-left: 1rem;
27
+ padding-right: 1rem;
28
+ }
29
+ .measure-wide {
30
+ max-width: 34em;
31
+ }
32
+ * {
33
+ font-family: "Roboto", sans-serif;
34
+ }
35
+ h1,
36
+ h2,
37
+ h3,
38
+ h4,
39
+ h5,
40
+ h6 {
41
+ font-family: "Source Serif 4", serif;
42
+ }
43
+ </style>
44
  </head>
45
  <body>
46
+ <article class="mw8 mx-auto ph3 sans-serif">
47
+ <div class="measure-wide" id="observablehq-intro-7dbb745c"></div>
48
+ <div id="observablehq-viewof-sentences-7dbb745c"></div>
49
+ <div id="observablehq-viewof-params-7dbb745c"></div>
50
+ <div id="observablehq-viewof-tryme-7dbb745c"></div>
51
+ <div id="observablehq-viewof-clear-7dbb745c"></div>
52
+ <div id="observablehq-dialog-7dbb745c"></div>
53
+ <div id="observablehq-viewof-scatter-7dbb745c"></div>
54
+ <div id="observablehq-umapoptions-7dbb745c"></div>
55
+ <div id="observablehq-viewof-umapOptions-7dbb745c"></div>
56
+ <div id="observablehq-umapbutton-7dbb745c"></div>
57
+ <div id="observablehq-hdbscanoptions-7dbb745c"></div>
58
+ <div id="observablehq-viewof-clusterOptions-7dbb745c"></div>
59
+ <div id="observablehq-hdbcscanbutton-7dbb745c"></div>
60
+ </article>
61
+
62
  <script type="module">
63
  import {
64
  Runtime,
65
  Inspector,
66
  } from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
67
  import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
68
+ new Runtime().module(define, (name) => {
69
+ if (name === "intro")
70
+ return new Inspector(
71
+ document.querySelector("#observablehq-intro-7dbb745c")
72
+ );
73
+ if (name === "viewof sentences")
74
+ return new Inspector(
75
+ document.querySelector("#observablehq-viewof-sentences-7dbb745c")
76
+ );
77
+ if (name === "viewof params")
78
+ return new Inspector(
79
+ document.querySelector("#observablehq-viewof-params-7dbb745c")
80
+ );
81
+ if (name === "viewof tryme")
82
+ return new Inspector(
83
+ document.querySelector("#observablehq-viewof-tryme-7dbb745c")
84
+ );
85
+ if (name === "viewof clear")
86
+ return new Inspector(
87
+ document.querySelector("#observablehq-viewof-clear-7dbb745c")
88
+ );
89
+ if (name === "dialog")
90
+ return new Inspector(
91
+ document.querySelector("#observablehq-dialog-7dbb745c")
92
+ );
93
+ if (name === "viewof scatter")
94
+ return new Inspector(
95
+ document.querySelector("#observablehq-viewof-scatter-7dbb745c")
96
+ );
97
+ if (name === "umapoptions")
98
+ return new Inspector(
99
+ document.querySelector("#observablehq-umapoptions-7dbb745c")
100
+ );
101
+ if (name === "viewof umapOptions")
102
+ return new Inspector(
103
+ document.querySelector("#observablehq-viewof-umapOptions-7dbb745c")
104
+ );
105
+ if (name === "umapbutton")
106
+ return new Inspector(
107
+ document.querySelector("#observablehq-umapbutton-7dbb745c")
108
+ );
109
+ if (name === "hdbscanoptions")
110
+ return new Inspector(
111
+ document.querySelector("#observablehq-hdbscanoptions-7dbb745c")
112
+ );
113
+ if (name === "viewof clusterOptions")
114
+ return new Inspector(
115
+ document.querySelector(
116
+ "#observablehq-viewof-clusterOptions-7dbb745c"
117
+ )
118
+ );
119
+ if (name === "hdbcscanbutton")
120
+ return new Inspector(
121
+ document.querySelector("#observablehq-hdbcscanbutton-7dbb745c")
122
+ );
123
+ return ["update", "data", "colorScale"].includes(name);
124
+ });
125
  </script>
126
  </body>
127
  </html>