Spaces:

radames
/

sentence-embeddings-visualization

Runtime error

App Files Files Community

radames commited on Feb 17, 2022

Commit

57535ba

•

1 Parent(s): 211a7e2

first

Browse files

Files changed (10) hide show

.gitignore +27 -0
Makefile +2 -0
app.py +113 -0
install-node.sh +10 -0
main.py +3 -0
packages.txt +0 -0
popular.txt +0 -0
requirements.txt +10 -0
templates/index.html +26 -0
umap_reducer.py +37 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,27 @@

+.DS_Store
+.env
+.flaskenv
+*.pyc
+*.pyo
+env/
+venv/
+.venv/
+env*
+dist/
+build/
+*.egg
+*.egg-info/
+_mailinglist
+.tox/
+.cache/
+.pytest_cache/
+.idea/
+docs/_build/
+.vscode
+# Coverage reports
+htmlcov/
+.coverage
+.coverage.*
+*,cover
+venv
+*_cache.json

Makefile ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ run:
2	+ PORT=3000 FLASK_ENV=development python app.py

app.py ADDED Viewed

	@@ -0,0 +1,113 @@

+from umap_reducer import UMAPReducer
+from flask import Flask, request, render_template, jsonify, make_response
+from flask_cors import CORS
+import os
+from dotenv import load_dotenv
+from transformers import pipeline
+import feedparser
+import json
+from dateutil import parser
+import re
+import numpy as np
+import gzip
+load_dotenv()
+# Load Setiment Classifier
+# sentiment_analysis = pipeline(
+#     "sentiment-analysis", model="siebert/sentiment-roberta-large-english")
+app = Flask(__name__, static_url_path='/static')
+reducer = UMAPReducer()
+CORS(app)
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/run-umap')  # //methods=['POST'])
+def run_umap():
+    data = np.random.rand(512, 4)
+    # UMAP embeddings
+    embeddings = reducer.embed(data)
+    content = gzip.compress(json.dumps(embeddings.tolist()).encode('utf8'), 5)
+    response = make_response(content)
+    response.headers['Content-length'] = len(content)
+    response.headers['Content-Encoding'] = 'gzip'
+    return response
+# @app.route('/news')
+# def get_news():
+#     feed_url = request.args.get('feed_url')
+#     # check if string is a valid
+#     # file name for cache
+#     file_name = "".join(re.split(r"https://|\.|/", feed_url))
+#     feed_entries = get_feed(feed_url)
+#     # filter only titles for sentiment analysis
+#     try:
+#         with open(f'{file_name}_cache.json') as file:
+#             cache = json.load(file)
+#     except:
+#         cache = {}
+#     # if new homepage is newer than cache, update cache and return
+#     print("new date", feed_entries['last_update'])
+#     print("old date", cache['last_update']
+#           if 'last_update' in cache else "None")
+#     if not cache or parser.parse(feed_entries['last_update']) > parser.parse(cache['last_update']):
+#         print("Updating cache with new preditions")
+#         titles = [entry['title'] for entry in feed_entries['entries']]
+#         # run sentiment analysis on titles
+#         predictions = [sentiment_analysis(sentence) for sentence in titles]
+#         # parse Negative and Positive, normalize to -1 to 1
+#         predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
+#                        'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
+#         # merge rss data with predictions
+#         entries_predicitons = [{**entry, 'sentiment': prediction}
+#                                for entry, prediction in zip(feed_entries['entries'], predictions)]
+#         output = {'entries': entries_predicitons,
+#                   'last_update': feed_entries['last_update']}
+#         # update last precitions cache
+#         with open(f'{file_name}_cache.json', 'w') as file:
+#             json.dump(output, file)
+#         # send back json
+#         return jsonify(output)
+#     else:
+#         print("Returning cached predictions")
+#         return jsonify(cache)
+# @ app.route('/predict', methods=['POST'])
+# def predict():
+#     # get data from POST
+#     if request.method == 'POST':
+#         # get current news
+#         # get post body data
+#         data = request.get_json()
+#         if data.get('sentences') is None:
+#             return jsonify({'error': 'No text provided'})
+#         # get post expeceted to be under {'sentences': ['text': '...']}
+#         sentences = data.get('sentences')
+#         # prencit sentiments
+#         predictions = [sentiment_analysis(sentence) for sentence in sentences]
+#         # parse Negative and Positive, normalize to -1 to 1
+#         predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
+#                        'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
+#         output = [dict(sentence=sentence, sentiment=prediction)
+#                   for sentence, prediction in zip(sentences, predictions)]
+#         # send back json
+#         return jsonify(output)
+# def get_feed(feed_url):
+#     feed = feedparser.parse(feed_url)
+#     return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
+if __name__ == '__main__':
+    app.run(host='0.0.0.0',  port=int(os.environ.get('PORT', 7860)))

install-node.sh ADDED Viewed

	@@ -0,0 +1,10 @@

+curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash
+export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
+[ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
+nvm install --lts
+node --version
+npm --version
+which node
+which npm
+command ln -s "$NVM_BIN/node" /home/user/.local/bin/node
+command ln -s "$NVM_BIN/npm" /home/user/.local/bin/npm

main.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ import subprocess
2	+
3	+ subprocess.run(["make", "build-all"], shell=False)

packages.txt ADDED Viewed

File without changes

popular.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+feedparser==6.0.8
+Flask==2.0.3
+flask_cors==3.0.10
+hdbscan==0.8.28
+numpy==1.22.2
+python-dotenv==0.19.2
+python_dateutil==2.8.2
+transformers==4.16.2
+umap-learn==0.5.2
+torch

templates/index.html ADDED Viewed

	@@ -0,0 +1,26 @@

+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <base href="" />
+    <meta charset="utf-8" />
+    <meta name="description" content="" />
+    <link rel="icon" href="favicon.png" />
+    <meta name="viewport" content="width=device-width, initial-scale=1" />
+    <script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
+    <link
+      rel="stylesheet"
+      href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
+    />
+  </head>
+  <body>
+    <div id="observablehq-3f13b363"></div>
+    <script type="module">
+      import {
+        Runtime,
+        Inspector,
+      } from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
+      import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
+      new Runtime().module(define, Inspector.into("#observablehq-3f13b363"));
+    </script>
+  </body>
+</html>

umap_reducer.py ADDED Viewed

	@@ -0,0 +1,37 @@

+import umap
+import hdbscan
+import copy
+class UMAPReducer:
+    def __init__(self, options={}):
+        # set options with defaults
+        options = {'n_components': 3, 'spread': 1, 'min_dist': 0.1, 'n_neighbors': 15,
+                   'metric': 'hellinger', 'min_cluster_size': 60, 'min_samples': 15, **options}
+        print(options)
+        self.reducer = umap.UMAP(
+            n_neighbors=options['n_neighbors'],
+            min_dist=options['min_dist'],
+            n_components=options['n_components'],
+            metric=options['metric'],
+            verbose=True)
+        # cluster init
+        self.clusterer = hdbscan.HDBSCAN(
+            min_cluster_size=options['min_cluster_size'],
+            min_samples=options['min_samples'],
+            allow_single_cluster=True
+        )
+        self.cluster_params = copy.deepcopy(options)
+    def setParams(self, options):
+        # update params
+        self.cluster_params = {**self.cluster_params, **options}
+    def clusterAnalysis(self, data):
+        clusters = self.clusterer.fit(data)
+        return clusters
+    def embed(self, data):
+        result = self.reducer.fit_transform(data)
+        return result