DjPapzin commited on
Commit
14a3421
1 Parent(s): 4f525da

Upload 9 files

Browse files
Files changed (9) hide show
  1. .gitignore +161 -0
  2. README.md +54 -7
  3. app.py +182 -0
  4. data.py +48 -0
  5. names.py +3 -0
  6. requirements.txt +11 -0
  7. scrape.py +98 -0
  8. storage.py +21 -0
  9. utils.py +30 -0
.gitignore ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ disney-lyrics/
2
+ # Byte-compiled / optimized / DLL files
3
+ __pycache__/
4
+ *.py[cod]
5
+ *$py.class
6
+
7
+ # C extensions
8
+ *.so
9
+
10
+ # Distribution / packaging
11
+ .Python
12
+ build/
13
+ develop-eggs/
14
+ dist/
15
+ downloads/
16
+ eggs/
17
+ .eggs/
18
+ lib/
19
+ lib64/
20
+ parts/
21
+ sdist/
22
+ var/
23
+ wheels/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+ cover/
54
+
55
+ # Translations
56
+ *.mo
57
+ *.pot
58
+
59
+ # Django stuff:
60
+ *.log
61
+ local_settings.py
62
+ db.sqlite3
63
+ db.sqlite3-journal
64
+
65
+ # Flask stuff:
66
+ instance/
67
+ .webassets-cache
68
+
69
+ # Scrapy stuff:
70
+ .scrapy
71
+
72
+ # Sphinx documentation
73
+ docs/_build/
74
+
75
+ # PyBuilder
76
+ .pybuilder/
77
+ target/
78
+
79
+ # Jupyter Notebook
80
+ .ipynb_checkpoints
81
+
82
+ # IPython
83
+ profile_default/
84
+ ipython_config.py
85
+
86
+ # pyenv
87
+ # For a library or package, you might want to ignore these files since the code is
88
+ # intended to run in multiple environments; otherwise, check them in:
89
+ # .python-version
90
+
91
+ # pipenv
92
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
94
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
95
+ # install all needed dependencies.
96
+ #Pipfile.lock
97
+
98
+ # poetry
99
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
101
+ # commonly ignored for libraries.
102
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103
+ #poetry.lock
104
+
105
+ # pdm
106
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107
+ #pdm.lock
108
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109
+ # in version control.
110
+ # https://pdm.fming.dev/#use-with-ide
111
+ .pdm.toml
112
+
113
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114
+ __pypackages__/
115
+
116
+ # Celery stuff
117
+ celerybeat-schedule
118
+ celerybeat.pid
119
+
120
+ # SageMath parsed files
121
+ *.sage.py
122
+
123
+ # Environments
124
+ .env
125
+ .venv
126
+ env/
127
+ venv/
128
+ ENV/
129
+ env.bak/
130
+ venv.bak/
131
+
132
+ # Spyder project settings
133
+ .spyderproject
134
+ .spyproject
135
+
136
+ # Rope project settings
137
+ .ropeproject
138
+
139
+ # mkdocs documentation
140
+ /site
141
+
142
+ # mypy
143
+ .mypy_cache/
144
+ .dmypy.json
145
+ dmypy.json
146
+
147
+ # Pyre type checker
148
+ .pyre/
149
+
150
+ # pytype static type analyzer
151
+ .pytype/
152
+
153
+ # Cython debug symbols
154
+ cython_debug/
155
+
156
+ # PyCharm
157
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
160
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161
+ #.idea/
README.md CHANGED
@@ -1,12 +1,59 @@
1
  ---
2
- title: PlayMyEmotions
3
- emoji: 🔥
4
- colorFrom: pink
5
- colorTo: green
6
- sdk: streamlit
7
- sdk_version: 1.26.0
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: "PlayMyEmotions"
3
+ emoji: "🔮"
4
+ colorFrom: "indigo"
5
+ colorTo: "purple"
6
+ sdk: "streamlit"
7
+ sdk_version: "1.19.0"
8
  app_file: app.py
9
  pinned: false
10
  ---
11
 
12
+ # Play My Emotions 🎵🏰🔮
13
+
14
+ This app takes a user input and suggestes songs that matches its emotions/vibes.
15
+
16
+ Made with [DeepLake](https://www.deeplake.ai/) 🚀 and [LangChain](https://python.langchain.com/en/latest/index.html) 🦜⛓️
17
+
18
+ We also used [upstash](https://upstash.com/) to store user inputs/emotions and recommended songs
19
+
20
+ ## How it works
21
+
22
+ The application follows a sequence of steps to deliver Disney songs matching the user's emotions:
23
+ - **User Input**: The application starts by collecting user's emotional state through a text input.
24
+ - **Emotion Encoding**: The user-provided emotions are then fed to a Language Model (LLM). The LLM interprets and encodes these emotions.
25
+ - **Similarity Search**: These encoded emotions are utilized to perform a similarity search within our [vector database](Deep Lake Vector Store in LangChain). This database houses Disney songs, each represented as emotional embeddings.
26
+ - **Song Selection**: From the pool of top matching songs, the application randomly selects one. The selection is weighted, giving preference to songs with higher similarity scores.
27
+ - **Song Retrieval**: The selected song's embedded player is displayed on the webpage for the user. Additionally, the LLM interpreted emotional state associated with the chosen song is displayed.
28
+
29
+ ## Run it
30
+
31
+ Clone this repo.
32
+
33
+ create a `venv`
34
+
35
+ ```
36
+ python -m venv .venv
37
+ source .venv/bin/activate
38
+ pip install -r requirements.txt
39
+ ```
40
+
41
+ You will need the following `.env` file
42
+
43
+ ```bash
44
+ OPENAI_API_KEY=<OPENAI_API_KEY>
45
+ ACTIVELOOP_TOKEN=<ACTIVELOOP_TOKEN>
46
+ ACTIVELOOP_ORG_ID=zuppif
47
+ UPSTASH_URL=<UPSTASH_URL>
48
+ UPSTASH_PASSWORD=<UPSTASH_PASSWORD>
49
+ ```
50
+
51
+ If you **don't want to use upstash** set the `USE_STORAGE=False`
52
+
53
+ Then
54
+
55
+ ```
56
+ streamlit run app.py
57
+ ```
58
+
59
+ Then navitage to `http://192.168.1.181:8501`
app.py ADDED
@@ -0,0 +1,182 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pathlib import Path
2
+
3
+ import streamlit as st
4
+ from dotenv import load_dotenv
5
+ from langchain.chains import LLMChain
6
+ from langchain.prompts import PromptTemplate
7
+
8
+ load_dotenv()
9
+ import os
10
+ from typing import List, Tuple
11
+
12
+ import numpy as np
13
+ from langchain.chat_models import ChatOpenAI
14
+ from langchain.embeddings.openai import OpenAIEmbeddings
15
+ from langchain.schema import Document
16
+
17
+ from data import load_db
18
+ from names import DATASET_ID, MODEL_ID
19
+ from storage import RedisStorage, UserInput
20
+ from utils import weighted_random_sample
21
+
22
+
23
+ class RetrievalType:
24
+ FIRST_MATCH = "first-match"
25
+ POOL_MATCHES = "pool-matches"
26
+
27
+
28
+ Matches = List[Tuple[Document, float]]
29
+ USE_STORAGE = os.environ.get("USE_STORAGE", "True").lower() in ("true", "t", "1")
30
+
31
+ print("USE_STORAGE", USE_STORAGE)
32
+
33
+
34
+ @st.cache_resource
35
+ def init():
36
+ embeddings = OpenAIEmbeddings(model=MODEL_ID)
37
+ dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
38
+
39
+ db = load_db(
40
+ dataset_path,
41
+ embedding_function=embeddings,
42
+ token=os.environ["ACTIVELOOP_TOKEN"],
43
+ # org_id=os.environ["ACTIVELOOP_ORG_ID"],
44
+ read_only=True,
45
+ )
46
+
47
+ storage = RedisStorage(
48
+ host=os.environ["UPSTASH_URL"], password=os.environ["UPSTASH_PASSWORD"]
49
+ )
50
+ prompt = PromptTemplate(
51
+ input_variables=["user_input"],
52
+ template=Path("prompts/bot.prompt").read_text(),
53
+ )
54
+
55
+ llm = ChatOpenAI(temperature=0.3)
56
+
57
+ chain = LLMChain(llm=llm, prompt=prompt)
58
+
59
+ return db, storage, chain
60
+
61
+
62
+ # Don't show the setting sidebar
63
+ if "sidebar_state" not in st.session_state:
64
+ st.session_state.sidebar_state = "collapsed"
65
+
66
+ st.set_page_config(initial_sidebar_state=st.session_state.sidebar_state)
67
+
68
+
69
+ db, storage, chain = init()
70
+
71
+ st.title("PlayMyEmotions 🎵🏰🔮")
72
+ st.markdown(
73
+ """
74
+ *<small>Made with [DeepLake](https://www.deeplake.ai/) 🚀 and [LangChain](https://python.langchain.com/en/latest/index.html) 🦜⛓️</small>*
75
+
76
+ 💫 Unleash the magic within you with our enchanting app, turning your sentiments into a Disney soundtrack! 🌈 Just express your emotions, and embark on a whimsical journey as we tailor a Disney melody to match your mood. 👑💖""",
77
+ unsafe_allow_html=True,
78
+ )
79
+ how_it_works = st.expander(label="How it works")
80
+
81
+ text_input = st.text_input(
82
+ label="How are you feeling today?",
83
+ placeholder="I am ready to rock and rool!",
84
+ )
85
+
86
+ run_btn = st.button("Make me sing! 🎶")
87
+ with how_it_works:
88
+ st.markdown(
89
+ """
90
+ The application follows a sequence of steps to deliver Disney songs matching the user's emotions:
91
+ - **User Input**: The application starts by collecting user's emotional state through a text input.
92
+ - **Emotion Encoding**: The user-provided emotions are then fed to a Language Model (LLM). The LLM interprets and encodes these emotions.
93
+ - **Similarity Search**: These encoded emotions are utilized to perform a similarity search within our [vector database](https://www.deeplake.ai/). This database houses Disney songs, each represented as emotional embeddings.
94
+ - **Song Selection**: From the pool of top matching songs, the application randomly selects one. The selection is weighted, giving preference to songs with higher similarity scores.
95
+ - **Song Retrieval**: The selected song's embedded player is displayed on the webpage for the user. Additionally, the LLM interpreted emotional state associated with the chosen song is displayed.
96
+ """
97
+ )
98
+
99
+
100
+ placeholder_emotions = st.empty()
101
+ placeholder = st.empty()
102
+
103
+
104
+ with st.sidebar:
105
+ st.text("App settings")
106
+ filter_threshold = st.slider(
107
+ "Threshold used to filter out low scoring songs",
108
+ min_value=0.0,
109
+ max_value=1.0,
110
+ value=0.8,
111
+ )
112
+ max_number_of_songs = st.slider(
113
+ "Max number of songs we will retrieve from the db",
114
+ min_value=5,
115
+ max_value=50,
116
+ value=20,
117
+ step=1,
118
+ )
119
+ number_of_displayed_songs = st.slider(
120
+ "Number of displayed songs", min_value=1, max_value=4, value=2, step=1
121
+ )
122
+
123
+
124
+ def filter_scores(matches: Matches, th: float = 0.8) -> Matches:
125
+ return [(doc, score) for (doc, score) in matches if score > th]
126
+
127
+
128
+ def normalize_scores_by_sum(matches: Matches) -> Matches:
129
+ scores = [score for _, score in matches]
130
+ tot = sum(scores)
131
+ return [(doc, (score / tot)) for doc, score in matches]
132
+
133
+
134
+ def get_song(user_input: str, k: int = 20):
135
+ emotions = chain.run(user_input=user_input)
136
+ matches = db.similarity_search_with_score(emotions, distance_metric="cos", k=k)
137
+ # [print(doc.metadata['name'], score) for doc, score in matches]
138
+ docs, scores = zip(
139
+ *normalize_scores_by_sum(filter_scores(matches, filter_threshold))
140
+ )
141
+ choosen_docs = weighted_random_sample(
142
+ np.array(docs), np.array(scores), n=number_of_displayed_songs
143
+ ).tolist()
144
+ return choosen_docs, emotions
145
+
146
+
147
+ def set_song(user_input):
148
+ if user_input == "":
149
+ return
150
+ # take first 120 chars
151
+ user_input = user_input[:120]
152
+ docs, emotions = get_song(user_input, k=max_number_of_songs)
153
+ print(docs)
154
+ songs = []
155
+ with placeholder_emotions:
156
+ st.markdown("Your emotions: `" + emotions + "`")
157
+ with placeholder:
158
+ iframes_html = ""
159
+ for doc in docs:
160
+ name = doc.metadata["name"]
161
+ print(f"song = {name}")
162
+ songs.append(name)
163
+ embed_url = doc.metadata["embed_url"]
164
+ iframes_html += (
165
+ f'<iframe src="{embed_url}" style="border:0;height:100px"> </iframe>'
166
+ )
167
+
168
+ st.markdown(
169
+ f"<div style='display:flex;flex-direction:column'>{iframes_html}</div>",
170
+ unsafe_allow_html=True,
171
+ )
172
+
173
+ if USE_STORAGE:
174
+ success_storage = storage.store(
175
+ UserInput(text=user_input, emotions=emotions, songs=songs)
176
+ )
177
+ if not success_storage:
178
+ print("[ERROR] was not able to store user_input")
179
+
180
+
181
+ if run_btn:
182
+ set_song(text_input)
data.py ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dotenv import load_dotenv
2
+
3
+ load_dotenv()
4
+ import json
5
+ import os
6
+
7
+ from langchain.embeddings.openai import OpenAIEmbeddings
8
+ from langchain.llms import OpenAI
9
+ from langchain.vectorstores import DeepLake
10
+
11
+ from names import DATASET_ID, MODEL_ID
12
+
13
+
14
+ def create_db(dataset_path: str, json_filepath: str) -> DeepLake:
15
+ with open(json_filepath, "r") as f:
16
+ data = json.load(f)
17
+
18
+ texts = []
19
+ metadatas = []
20
+
21
+ for movie, lyrics in data.items():
22
+ for lyric in lyrics:
23
+ texts.append(lyric["text"])
24
+ metadatas.append(
25
+ {
26
+ "movie": movie,
27
+ "name": lyric["name"],
28
+ "embed_url": lyric["embed_url"],
29
+ }
30
+ )
31
+
32
+ embeddings = OpenAIEmbeddings(model=MODEL_ID)
33
+
34
+ db = DeepLake.from_texts(
35
+ texts, embeddings, metadatas=metadatas, dataset_path=dataset_path
36
+ )
37
+
38
+ return db
39
+
40
+
41
+ def load_db(dataset_path: str, *args, **kwargs) -> DeepLake:
42
+ db = DeepLake(dataset_path, *args, **kwargs)
43
+ return db
44
+
45
+
46
+ if __name__ == "__main__":
47
+ dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
48
+ create_db(dataset_path, "data/emotions_with_spotify_url.json")
names.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ MODEL_ID = "text-embedding-ada-002"
2
+ DATASET_ID = "disney-lyrics"
3
+ # DATASET_ID = "disney-lyrics-emotions"
requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ openai
2
+ python-dotenv
3
+ deeplake
4
+ langchain
5
+ tiktoken
6
+ aiohttp
7
+ cchardet
8
+ aiodns
9
+ streamlit
10
+ redis
11
+ bs4
scrape.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import asyncio
2
+ import json
3
+ from collections import defaultdict
4
+ from itertools import chain
5
+ from typing import List, Optional, Tuple, TypedDict
6
+
7
+ import aiohttp
8
+ from bs4 import BeautifulSoup
9
+
10
+ """
11
+ This file scrapes disney songs + lyrics from "https://www.disneyclips.com/lyrics/"
12
+ """
13
+
14
+ URL = "https://www.disneyclips.com/lyrics/"
15
+
16
+
17
+ async def get_lyrics_names_and_urls_from_movie_url(
18
+ movie_name: str, url: str, session: aiohttp.ClientSession
19
+ ) -> List[Tuple[str, str]]:
20
+ async with session.get(url) as response:
21
+ html = await response.text()
22
+ soup = BeautifulSoup(html, "html.parser")
23
+ table = soup.find("table", {"class": "songs"})
24
+ names_and_urls = []
25
+ if table:
26
+ links = table.find_all("a")
27
+ names_and_urls = []
28
+ for link in links:
29
+ names_and_urls.append(
30
+ (movie_name, link.text, f"{URL}/{link.get('href')}")
31
+ )
32
+ return names_and_urls
33
+
34
+
35
+ async def get_lyric_from_lyric_url(
36
+ movie_name: str, lyric_name: str, url: str, session: aiohttp.ClientSession
37
+ ) -> str:
38
+ async with session.get(url) as response:
39
+ html = await response.text()
40
+ soup = BeautifulSoup(html, "html.parser")
41
+ div = soup.find("div", {"id": "cnt"}).find("div", {"class": "main"})
42
+ paragraphs = div.find_all("p")
43
+ text = ""
44
+ # first <p> has the lyric
45
+ p = paragraphs[0]
46
+ for br in p.find_all("br"):
47
+ br.replace_with(". ")
48
+ for span in p.find_all("span"):
49
+ span.decompose()
50
+ text += p.text
51
+
52
+ return (movie_name, lyric_name, text)
53
+
54
+
55
+ async def get_movie_names_and_urls(
56
+ session: aiohttp.ClientSession,
57
+ ) -> List[Tuple[str, str]]:
58
+ async with session.get(URL) as response:
59
+ html = await response.text()
60
+ soup = BeautifulSoup(html, "html.parser")
61
+ links = (
62
+ soup.find("div", {"id": "cnt"}).find("div", {"class": "main"}).find_all("a")
63
+ )
64
+ movie_names_and_urls = [
65
+ (link.text, f"{URL}/{link.get('href')}") for link in links
66
+ ]
67
+ return movie_names_and_urls
68
+
69
+
70
+ async def scrape_disney_lyrics():
71
+ async with aiohttp.ClientSession() as session:
72
+ data = await get_movie_names_and_urls(session)
73
+ data = await asyncio.gather(
74
+ *[
75
+ asyncio.create_task(
76
+ get_lyrics_names_and_urls_from_movie_url(*el, session)
77
+ )
78
+ for el in data
79
+ ]
80
+ )
81
+ data = await asyncio.gather(
82
+ *[
83
+ asyncio.create_task(get_lyric_from_lyric_url(*data, session))
84
+ for data in chain(*data)
85
+ ]
86
+ )
87
+
88
+ result = defaultdict(list)
89
+
90
+ for movie_name, lyric_name, lyric_text in data:
91
+ result[movie_name].append({"name": lyric_name, "text": lyric_text})
92
+
93
+ with open("data/lyrics.json", "w") as f:
94
+ json.dump(result, f)
95
+
96
+
97
+ loop = asyncio.get_event_loop()
98
+ loop.run_until_complete(scrape_disney_lyrics())
storage.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, TypedDict
3
+ from uuid import uuid4
4
+
5
+ import redis
6
+
7
+
8
+ class UserInput(TypedDict):
9
+ text: str
10
+ emotions: str
11
+ songs: List[str]
12
+
13
+
14
+ class RedisStorage:
15
+ def __init__(self, host: str, password: str):
16
+ self._client = redis.Redis(host=host, port="34307", password=password, ssl=True)
17
+
18
+ def store(self, data: UserInput) -> bool:
19
+ uid = uuid4()
20
+ response = self._client.json().set(f"data:{uid}", "$", data)
21
+ return response
utils.py ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+
3
+
4
+ def weighted_random_sample(items: np.array, weights: np.array, n: int) -> np.array:
5
+ """
6
+ Does np.random.choice but ensuring we don't have duplicates in the final result
7
+
8
+ Args:
9
+ items (np.array): _description_
10
+ weights (np.array): _description_
11
+ n (int): _description_
12
+
13
+ Returns:
14
+ np.array: _description_
15
+ """
16
+ indices = np.arange(len(items))
17
+ out_indices = []
18
+
19
+ for _ in range(n):
20
+ chosen_index = np.random.choice(indices, p=weights)
21
+ out_indices.append(chosen_index)
22
+
23
+ mask = indices != chosen_index
24
+ indices = indices[mask]
25
+ weights = weights[mask]
26
+
27
+ if weights.sum() != 0:
28
+ weights = weights / weights.sum()
29
+
30
+ return items[out_indices]