Spaces:

DjPapzin
/

PlayMyEmotions

Runtime error

App Files Files Community

DjPapzin commited on Sep 9, 2023

Commit

14a3421

•

1 Parent(s): 4f525da

Upload 9 files

Browse files

Files changed (9) hide show

.gitignore +161 -0
README.md +54 -7
app.py +182 -0
data.py +48 -0
names.py +3 -0
requirements.txt +11 -0
scrape.py +98 -0
storage.py +21 -0
utils.py +30 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,161 @@

+disney-lyrics/
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+# C extensions
+*.so
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+cover/
+# Translations
+*.mo
+*.pot
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+# Flask stuff:
+instance/
+.webassets-cache
+# Scrapy stuff:
+.scrapy
+# Sphinx documentation
+docs/_build/
+# PyBuilder
+.pybuilder/
+target/
+# Jupyter Notebook
+.ipynb_checkpoints
+# IPython
+profile_default/
+ipython_config.py
+# pyenv
+#   For a library or package, you might want to ignore these files since the code is
+#   intended to run in multiple environments; otherwise, check them in:
+# .python-version
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+# poetry
+#   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
+#   This is especially recommended for binary packages to ensure reproducibility, and is more
+#   commonly ignored for libraries.
+#   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
+#poetry.lock
+# pdm
+#   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
+#pdm.lock
+#   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
+#   in version control.
+#   https://pdm.fming.dev/#use-with-ide
+.pdm.toml
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
+__pypackages__/
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+# SageMath parsed files
+*.sage.py
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+# Spyder project settings
+.spyderproject
+.spyproject
+# Rope project settings
+.ropeproject
+# mkdocs documentation
+/site
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+# Pyre type checker
+.pyre/
+# pytype static type analyzer
+.pytype/
+# Cython debug symbols
+cython_debug/
+# PyCharm
+#  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
+#  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
+#  and can be added to the global gitignore or merged into this file.  For a more nuclear
+#  option (not recommended) you can uncomment the following to ignore the entire idea folder.
+#.idea/

README.md CHANGED Viewed

@@ -1,12 +1,59 @@
 ---
-title: PlayMyEmotions
-emoji: 🔥
-colorFrom: pink
-colorTo: green
-sdk: streamlit
-sdk_version: 1.26.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: "PlayMyEmotions"
+emoji: "🔮"
+colorFrom: "indigo"
+colorTo: "purple"
+sdk: "streamlit"
+sdk_version: "1.19.0"
 app_file: app.py
 pinned: false
 ---
+# Play My Emotions 🎵🏰🔮
+This app takes a user input and suggestes songs that matches its emotions/vibes.
+Made with [DeepLake](https://www.deeplake.ai/) 🚀 and [LangChain](https://python.langchain.com/en/latest/index.html) 🦜⛓️
+We also used [upstash](https://upstash.com/) to store user inputs/emotions and recommended songs
+## How it works
+The application follows a sequence of steps to deliver Disney songs matching the user's emotions:
+- **User Input**: The application starts by collecting user's emotional state through a text input.
+- **Emotion Encoding**: The user-provided emotions are then fed to a Language Model (LLM). The LLM interprets and encodes these emotions.
+- **Similarity Search**: These encoded emotions are utilized to perform a similarity search within our [vector database](Deep Lake Vector Store in LangChain). This database houses Disney songs, each represented as emotional embeddings.
+- **Song Selection**: From the pool of top matching songs, the application randomly selects one. The selection is weighted, giving preference to songs with higher similarity scores.
+- **Song Retrieval**: The selected song's embedded player is displayed on the webpage for the user. Additionally, the LLM interpreted emotional state associated with the chosen song is displayed.
+## Run it
+Clone this repo.
+create a `venv`
+```
+python -m venv .venv
+source .venv/bin/activate
+pip install -r requirements.txt
+```
+You will need the following `.env` file
+```bash
+OPENAI_API_KEY=<OPENAI_API_KEY>
+ACTIVELOOP_TOKEN=<ACTIVELOOP_TOKEN>
+ACTIVELOOP_ORG_ID=zuppif
+UPSTASH_URL=<UPSTASH_URL>
+UPSTASH_PASSWORD=<UPSTASH_PASSWORD>
+```
+If you **don't want to use upstash** set the `USE_STORAGE=False`
+Then
+```
+streamlit run app.py
+```
+Then navitage to `http://192.168.1.181:8501`

app.py ADDED Viewed

	@@ -0,0 +1,182 @@

+from pathlib import Path
+import streamlit as st
+from dotenv import load_dotenv
+from langchain.chains import LLMChain
+from langchain.prompts import PromptTemplate
+load_dotenv()
+import os
+from typing import List, Tuple
+import numpy as np
+from langchain.chat_models import ChatOpenAI
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.schema import Document
+from data import load_db
+from names import DATASET_ID, MODEL_ID
+from storage import RedisStorage, UserInput
+from utils import weighted_random_sample
+class RetrievalType:
+    FIRST_MATCH = "first-match"
+    POOL_MATCHES = "pool-matches"
+Matches = List[Tuple[Document, float]]
+USE_STORAGE = os.environ.get("USE_STORAGE", "True").lower() in ("true", "t", "1")
+print("USE_STORAGE", USE_STORAGE)
+@st.cache_resource
+def init():
+    embeddings = OpenAIEmbeddings(model=MODEL_ID)
+    dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
+    db = load_db(
+        dataset_path,
+        embedding_function=embeddings,
+        token=os.environ["ACTIVELOOP_TOKEN"],
+        # org_id=os.environ["ACTIVELOOP_ORG_ID"],
+        read_only=True,
+    )
+    storage = RedisStorage(
+        host=os.environ["UPSTASH_URL"], password=os.environ["UPSTASH_PASSWORD"]
+    )
+    prompt = PromptTemplate(
+        input_variables=["user_input"],
+        template=Path("prompts/bot.prompt").read_text(),
+    )
+    llm = ChatOpenAI(temperature=0.3)
+    chain = LLMChain(llm=llm, prompt=prompt)
+    return db, storage, chain
+# Don't show the setting sidebar
+if "sidebar_state" not in st.session_state:
+    st.session_state.sidebar_state = "collapsed"
+st.set_page_config(initial_sidebar_state=st.session_state.sidebar_state)
+db, storage, chain = init()
+st.title("PlayMyEmotions 🎵🏰🔮")
+st.markdown(
+    """
+*<small>Made with [DeepLake](https://www.deeplake.ai/) 🚀 and [LangChain](https://python.langchain.com/en/latest/index.html) 🦜⛓️</small>*
+💫 Unleash the magic within you with our enchanting app, turning your sentiments into a Disney soundtrack! 🌈 Just express your emotions, and embark on a whimsical journey as we tailor a Disney melody to match your mood. 👑💖""",
+    unsafe_allow_html=True,
+)
+how_it_works = st.expander(label="How it works")
+text_input = st.text_input(
+    label="How are you feeling today?",
+    placeholder="I am ready to rock and rool!",
+)
+run_btn = st.button("Make me sing! 🎶")
+with how_it_works:
+    st.markdown(
+        """
+The application follows a sequence of steps to deliver Disney songs matching the user's emotions:
+- **User Input**: The application starts by collecting user's emotional state through a text input.
+- **Emotion Encoding**: The user-provided emotions are then fed to a Language Model (LLM). The LLM interprets and encodes these emotions.
+- **Similarity Search**: These encoded emotions are utilized to perform a similarity search within our [vector database](https://www.deeplake.ai/). This database houses Disney songs, each represented as emotional embeddings.
+- **Song Selection**: From the pool of top matching songs, the application randomly selects one. The selection is weighted, giving preference to songs with higher similarity scores.
+- **Song Retrieval**: The selected song's embedded player is displayed on the webpage for the user. Additionally, the LLM interpreted emotional state associated with the chosen song is displayed.
+"""
+    )
+placeholder_emotions = st.empty()
+placeholder = st.empty()
+with st.sidebar:
+    st.text("App settings")
+    filter_threshold = st.slider(
+        "Threshold used to filter out low scoring songs",
+        min_value=0.0,
+        max_value=1.0,
+        value=0.8,
+    )
+    max_number_of_songs = st.slider(
+        "Max number of songs we will retrieve from the db",
+        min_value=5,
+        max_value=50,
+        value=20,
+        step=1,
+    )
+    number_of_displayed_songs = st.slider(
+        "Number of displayed songs", min_value=1, max_value=4, value=2, step=1
+    )
+def filter_scores(matches: Matches, th: float = 0.8) -> Matches:
+    return [(doc, score) for (doc, score) in matches if score > th]
+def normalize_scores_by_sum(matches: Matches) -> Matches:
+    scores = [score for _, score in matches]
+    tot = sum(scores)
+    return [(doc, (score / tot)) for doc, score in matches]
+def get_song(user_input: str, k: int = 20):
+    emotions = chain.run(user_input=user_input)
+    matches = db.similarity_search_with_score(emotions, distance_metric="cos", k=k)
+    # [print(doc.metadata['name'], score) for doc, score in matches]
+    docs, scores = zip(
+        *normalize_scores_by_sum(filter_scores(matches, filter_threshold))
+    )
+    choosen_docs = weighted_random_sample(
+        np.array(docs), np.array(scores), n=number_of_displayed_songs
+    ).tolist()
+    return choosen_docs, emotions
+def set_song(user_input):
+    if user_input == "":
+        return
+    # take first 120 chars
+    user_input = user_input[:120]
+    docs, emotions = get_song(user_input, k=max_number_of_songs)
+    print(docs)
+    songs = []
+    with placeholder_emotions:
+        st.markdown("Your emotions: `" + emotions + "`")
+    with placeholder:
+        iframes_html = ""
+        for doc in docs:
+            name = doc.metadata["name"]
+            print(f"song = {name}")
+            songs.append(name)
+            embed_url = doc.metadata["embed_url"]
+            iframes_html += (
+                f'<iframe src="{embed_url}" style="border:0;height:100px"> </iframe>'
+            )
+        st.markdown(
+            f"<div style='display:flex;flex-direction:column'>{iframes_html}</div>",
+            unsafe_allow_html=True,
+        )
+        if USE_STORAGE:
+            success_storage = storage.store(
+                UserInput(text=user_input, emotions=emotions, songs=songs)
+            )
+            if not success_storage:
+                print("[ERROR] was not able to store user_input")
+if run_btn:
+    set_song(text_input)

data.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from dotenv import load_dotenv
+load_dotenv()
+import json
+import os
+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.llms import OpenAI
+from langchain.vectorstores import DeepLake
+from names import DATASET_ID, MODEL_ID
+def create_db(dataset_path: str, json_filepath: str) -> DeepLake:
+    with open(json_filepath, "r") as f:
+        data = json.load(f)
+    texts = []
+    metadatas = []
+    for movie, lyrics in data.items():
+        for lyric in lyrics:
+            texts.append(lyric["text"])
+            metadatas.append(
+                {
+                    "movie": movie,
+                    "name": lyric["name"],
+                    "embed_url": lyric["embed_url"],
+                }
+            )
+    embeddings = OpenAIEmbeddings(model=MODEL_ID)
+    db = DeepLake.from_texts(
+        texts, embeddings, metadatas=metadatas, dataset_path=dataset_path
+    )
+    return db
+def load_db(dataset_path: str, *args, **kwargs) -> DeepLake:
+    db = DeepLake(dataset_path, *args, **kwargs)
+    return db
+if __name__ == "__main__":
+    dataset_path = f"hub://{os.environ['ACTIVELOOP_ORG_ID']}/{DATASET_ID}"
+    create_db(dataset_path, "data/emotions_with_spotify_url.json")

names.py ADDED Viewed

	@@ -0,0 +1,3 @@

+MODEL_ID = "text-embedding-ada-002"
+DATASET_ID = "disney-lyrics"
+# DATASET_ID = "disney-lyrics-emotions"

requirements.txt ADDED Viewed

	@@ -0,0 +1,11 @@

+openai
+python-dotenv
+deeplake
+langchain
+tiktoken
+aiohttp
+cchardet
+aiodns
+streamlit
+redis
+bs4

scrape.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import asyncio
+import json
+from collections import defaultdict
+from itertools import chain
+from typing import List, Optional, Tuple, TypedDict
+import aiohttp
+from bs4 import BeautifulSoup
+"""
+This file scrapes disney songs + lyrics from "https://www.disneyclips.com/lyrics/"
+"""
+URL = "https://www.disneyclips.com/lyrics/"
+async def get_lyrics_names_and_urls_from_movie_url(
+    movie_name: str, url: str, session: aiohttp.ClientSession
+) -> List[Tuple[str, str]]:
+    async with session.get(url) as response:
+        html = await response.text()
+        soup = BeautifulSoup(html, "html.parser")
+        table = soup.find("table", {"class": "songs"})
+        names_and_urls = []
+        if table:
+            links = table.find_all("a")
+            names_and_urls = []
+            for link in links:
+                names_and_urls.append(
+                    (movie_name, link.text, f"{URL}/{link.get('href')}")
+                )
+        return names_and_urls
+async def get_lyric_from_lyric_url(
+    movie_name: str, lyric_name: str, url: str, session: aiohttp.ClientSession
+) -> str:
+    async with session.get(url) as response:
+        html = await response.text()
+        soup = BeautifulSoup(html, "html.parser")
+        div = soup.find("div", {"id": "cnt"}).find("div", {"class": "main"})
+        paragraphs = div.find_all("p")
+        text = ""
+        # first <p> has the lyric
+        p = paragraphs[0]
+        for br in p.find_all("br"):
+            br.replace_with(". ")
+        for span in p.find_all("span"):
+            span.decompose()
+        text += p.text
+        return (movie_name, lyric_name, text)
+async def get_movie_names_and_urls(
+    session: aiohttp.ClientSession,
+) -> List[Tuple[str, str]]:
+    async with session.get(URL) as response:
+        html = await response.text()
+        soup = BeautifulSoup(html, "html.parser")
+        links = (
+            soup.find("div", {"id": "cnt"}).find("div", {"class": "main"}).find_all("a")
+        )
+        movie_names_and_urls = [
+            (link.text, f"{URL}/{link.get('href')}") for link in links
+        ]
+        return movie_names_and_urls
+async def scrape_disney_lyrics():
+    async with aiohttp.ClientSession() as session:
+        data = await get_movie_names_and_urls(session)
+        data = await asyncio.gather(
+            *[
+                asyncio.create_task(
+                    get_lyrics_names_and_urls_from_movie_url(*el, session)
+                )
+                for el in data
+            ]
+        )
+        data = await asyncio.gather(
+            *[
+                asyncio.create_task(get_lyric_from_lyric_url(*data, session))
+                for data in chain(*data)
+            ]
+        )
+        result = defaultdict(list)
+        for movie_name, lyric_name, lyric_text in data:
+            result[movie_name].append({"name": lyric_name, "text": lyric_text})
+        with open("data/lyrics.json", "w") as f:
+            json.dump(result, f)
+loop = asyncio.get_event_loop()
+loop.run_until_complete(scrape_disney_lyrics())

storage.py ADDED Viewed

	@@ -0,0 +1,21 @@

+import os
+from typing import List, TypedDict
+from uuid import uuid4
+import redis
+class UserInput(TypedDict):
+    text: str
+    emotions: str
+    songs: List[str]
+class RedisStorage:
+    def __init__(self, host: str, password: str):
+        self._client = redis.Redis(host=host, port="34307", password=password, ssl=True)
+    def store(self, data: UserInput) -> bool:
+        uid = uuid4()
+        response = self._client.json().set(f"data:{uid}", "$", data)
+        return response

utils.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import numpy as np
+def weighted_random_sample(items: np.array, weights: np.array, n: int) -> np.array:
+    """
+    Does np.random.choice but ensuring we don't have duplicates in the final result
+    Args:
+        items (np.array): _description_
+        weights (np.array): _description_
+        n (int): _description_
+    Returns:
+        np.array: _description_
+    """
+    indices = np.arange(len(items))
+    out_indices = []
+    for _ in range(n):
+        chosen_index = np.random.choice(indices, p=weights)
+        out_indices.append(chosen_index)
+        mask = indices != chosen_index
+        indices = indices[mask]
+        weights = weights[mask]
+        if weights.sum() != 0:
+            weights = weights / weights.sum()
+    return items[out_indices]