radames HF staff commited on
Commit
57535ba
1 Parent(s): 211a7e2
Files changed (10) hide show
  1. .gitignore +27 -0
  2. Makefile +2 -0
  3. app.py +113 -0
  4. install-node.sh +10 -0
  5. main.py +3 -0
  6. packages.txt +0 -0
  7. popular.txt +0 -0
  8. requirements.txt +10 -0
  9. templates/index.html +26 -0
  10. umap_reducer.py +37 -0
.gitignore ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DS_Store
2
+ .env
3
+ .flaskenv
4
+ *.pyc
5
+ *.pyo
6
+ env/
7
+ venv/
8
+ .venv/
9
+ env*
10
+ dist/
11
+ build/
12
+ *.egg
13
+ *.egg-info/
14
+ _mailinglist
15
+ .tox/
16
+ .cache/
17
+ .pytest_cache/
18
+ .idea/
19
+ docs/_build/
20
+ .vscode
21
+ # Coverage reports
22
+ htmlcov/
23
+ .coverage
24
+ .coverage.*
25
+ *,cover
26
+ venv
27
+ *_cache.json
Makefile ADDED
@@ -0,0 +1,2 @@
 
 
1
+ run:
2
+ PORT=3000 FLASK_ENV=development python app.py
app.py ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from umap_reducer import UMAPReducer
2
+ from flask import Flask, request, render_template, jsonify, make_response
3
+ from flask_cors import CORS
4
+ import os
5
+ from dotenv import load_dotenv
6
+ from transformers import pipeline
7
+ import feedparser
8
+ import json
9
+ from dateutil import parser
10
+ import re
11
+ import numpy as np
12
+ import gzip
13
+
14
+ load_dotenv()
15
+
16
+ # Load Setiment Classifier
17
+ # sentiment_analysis = pipeline(
18
+ # "sentiment-analysis", model="siebert/sentiment-roberta-large-english")
19
+ app = Flask(__name__, static_url_path='/static')
20
+ reducer = UMAPReducer()
21
+
22
+ CORS(app)
23
+
24
+
25
+ @app.route('/')
26
+ def index():
27
+ return render_template('index.html')
28
+
29
+
30
+ @app.route('/run-umap') # //methods=['POST'])
31
+ def run_umap():
32
+ data = np.random.rand(512, 4)
33
+
34
+ # UMAP embeddings
35
+ embeddings = reducer.embed(data)
36
+
37
+ content = gzip.compress(json.dumps(embeddings.tolist()).encode('utf8'), 5)
38
+ response = make_response(content)
39
+ response.headers['Content-length'] = len(content)
40
+ response.headers['Content-Encoding'] = 'gzip'
41
+ return response
42
+
43
+
44
+ # @app.route('/news')
45
+ # def get_news():
46
+ # feed_url = request.args.get('feed_url')
47
+ # # check if string is a valid
48
+
49
+ # # file name for cache
50
+ # file_name = "".join(re.split(r"https://|\.|/", feed_url))
51
+
52
+ # feed_entries = get_feed(feed_url)
53
+ # # filter only titles for sentiment analysis
54
+ # try:
55
+ # with open(f'{file_name}_cache.json') as file:
56
+ # cache = json.load(file)
57
+ # except:
58
+ # cache = {}
59
+
60
+ # # if new homepage is newer than cache, update cache and return
61
+ # print("new date", feed_entries['last_update'])
62
+ # print("old date", cache['last_update']
63
+ # if 'last_update' in cache else "None")
64
+ # if not cache or parser.parse(feed_entries['last_update']) > parser.parse(cache['last_update']):
65
+ # print("Updating cache with new preditions")
66
+ # titles = [entry['title'] for entry in feed_entries['entries']]
67
+ # # run sentiment analysis on titles
68
+ # predictions = [sentiment_analysis(sentence) for sentence in titles]
69
+ # # parse Negative and Positive, normalize to -1 to 1
70
+ # predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
71
+ # 'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
72
+ # # merge rss data with predictions
73
+ # entries_predicitons = [{**entry, 'sentiment': prediction}
74
+ # for entry, prediction in zip(feed_entries['entries'], predictions)]
75
+ # output = {'entries': entries_predicitons,
76
+ # 'last_update': feed_entries['last_update']}
77
+ # # update last precitions cache
78
+ # with open(f'{file_name}_cache.json', 'w') as file:
79
+ # json.dump(output, file)
80
+ # # send back json
81
+ # return jsonify(output)
82
+ # else:
83
+ # print("Returning cached predictions")
84
+ # return jsonify(cache)
85
+
86
+
87
+ # @ app.route('/predict', methods=['POST'])
88
+ # def predict():
89
+ # # get data from POST
90
+ # if request.method == 'POST':
91
+ # # get current news
92
+ # # get post body data
93
+ # data = request.get_json()
94
+ # if data.get('sentences') is None:
95
+ # return jsonify({'error': 'No text provided'})
96
+ # # get post expeceted to be under {'sentences': ['text': '...']}
97
+ # sentences = data.get('sentences')
98
+ # # prencit sentiments
99
+ # predictions = [sentiment_analysis(sentence) for sentence in sentences]
100
+ # # parse Negative and Positive, normalize to -1 to 1
101
+ # predictions = [-prediction[0]['score'] if prediction[0]['label'] ==
102
+ # 'NEGATIVE' else prediction[0]['score'] for prediction in predictions]
103
+ # output = [dict(sentence=sentence, sentiment=prediction)
104
+ # for sentence, prediction in zip(sentences, predictions)]
105
+ # # send back json
106
+ # return jsonify(output)
107
+
108
+
109
+ # def get_feed(feed_url):
110
+ # feed = feedparser.parse(feed_url)
111
+ # return {'entries': feed['entries'], 'last_update': feed["feed"]['updated']}
112
+ if __name__ == '__main__':
113
+ app.run(host='0.0.0.0', port=int(os.environ.get('PORT', 7860)))
install-node.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.39.1/install.sh | bash
2
+ export NVM_DIR="$([ -z "${XDG_CONFIG_HOME-}" ] && printf %s "${HOME}/.nvm" || printf %s "${XDG_CONFIG_HOME}/nvm")"
3
+ [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
4
+ nvm install --lts
5
+ node --version
6
+ npm --version
7
+ which node
8
+ which npm
9
+ command ln -s "$NVM_BIN/node" /home/user/.local/bin/node
10
+ command ln -s "$NVM_BIN/npm" /home/user/.local/bin/npm
main.py ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ import subprocess
2
+
3
+ subprocess.run(["make", "build-all"], shell=False)
packages.txt ADDED
File without changes
popular.txt ADDED
The diff for this file is too large to render. See raw diff
requirements.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
1
+ feedparser==6.0.8
2
+ Flask==2.0.3
3
+ flask_cors==3.0.10
4
+ hdbscan==0.8.28
5
+ numpy==1.22.2
6
+ python-dotenv==0.19.2
7
+ python_dateutil==2.8.2
8
+ transformers==4.16.2
9
+ umap-learn==0.5.2
10
+ torch
templates/index.html ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <base href="" />
5
+ <meta charset="utf-8" />
6
+ <meta name="description" content="" />
7
+ <link rel="icon" href="favicon.png" />
8
+ <meta name="viewport" content="width=device-width, initial-scale=1" />
9
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/iframe-resizer/4.3.1/iframeResizer.contentWindow.min.js"></script>
10
+ <link
11
+ rel="stylesheet"
12
+ href="https://cdn.jsdelivr.net/npm/@observablehq/inspector@3/dist/inspector.css"
13
+ />
14
+ </head>
15
+ <body>
16
+ <div id="observablehq-3f13b363"></div>
17
+ <script type="module">
18
+ import {
19
+ Runtime,
20
+ Inspector,
21
+ } from "https://cdn.jsdelivr.net/npm/@observablehq/runtime@4/dist/runtime.js";
22
+ import define from "https://api.observablehq.com/d/843a8bdf01fc2c8f.js?v=3";
23
+ new Runtime().module(define, Inspector.into("#observablehq-3f13b363"));
24
+ </script>
25
+ </body>
26
+ </html>
umap_reducer.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import umap
2
+ import hdbscan
3
+ import copy
4
+
5
+ class UMAPReducer:
6
+ def __init__(self, options={}):
7
+
8
+ # set options with defaults
9
+ options = {'n_components': 3, 'spread': 1, 'min_dist': 0.1, 'n_neighbors': 15,
10
+ 'metric': 'hellinger', 'min_cluster_size': 60, 'min_samples': 15, **options}
11
+
12
+ print(options)
13
+ self.reducer = umap.UMAP(
14
+ n_neighbors=options['n_neighbors'],
15
+ min_dist=options['min_dist'],
16
+ n_components=options['n_components'],
17
+ metric=options['metric'],
18
+ verbose=True)
19
+ # cluster init
20
+ self.clusterer = hdbscan.HDBSCAN(
21
+ min_cluster_size=options['min_cluster_size'],
22
+ min_samples=options['min_samples'],
23
+ allow_single_cluster=True
24
+ )
25
+ self.cluster_params = copy.deepcopy(options)
26
+
27
+ def setParams(self, options):
28
+ # update params
29
+ self.cluster_params = {**self.cluster_params, **options}
30
+
31
+ def clusterAnalysis(self, data):
32
+ clusters = self.clusterer.fit(data)
33
+ return clusters
34
+
35
+ def embed(self, data):
36
+ result = self.reducer.fit_transform(data)
37
+ return result