Spaces:

kmaurinjones
/

SongScope

Sleeping

App Files Files Community

kmaurinjones commited on Apr 22, 2023

Commit

618c903

•

1 Parent(s): 6781c46

Update songscope.py

Browse files

Files changed (1) hide show

songscope.py +4 -26

songscope.py CHANGED Viewed

@@ -12,7 +12,7 @@ from nltk.sentiment import SentimentIntensityAnalyzer
 nltk.download("vader_lexicon")
 # jaro distance from scratch because importing or installing the module didn't work
-def homemade_jaro(s1, s2):
     if not s1 or not s2:
         return 0.0
@@ -60,7 +60,6 @@ def random_delay(min_val: float, max_val: float, print_delay: bool = False):
         The maximum amount of time to delay (in seconds).
     print_delay: bool
         Whether or not to print the delay time.
     Returns:
     -------
     val: float
@@ -78,12 +77,10 @@ def find_artist(artist_name: str):
     This function sends an HTTP request to azlyrics.com, scrapes the HTML content
     to find the artist's page, and returns the URL to that page.
     Parameters:
     ----------
     artist_name: str
         The name of the artist.
     Returns:
     -------
     url: str
@@ -119,7 +116,7 @@ def find_artist(artist_name: str):
         max_sim = -100
         for id, name in enumerate(artist_urls):
             # dist = Levenshtein.jaro(artist_name, name)
-            dist = homemade_jaro(artist_name, name)
             if max_sim < dist:
                 max_sim = dist
                 min_id = id
@@ -133,23 +130,19 @@ def follow_lyrics(lyric_url: str):
     This function sends an HTTP request to the given `lyric_url`, parses the HTML content,
     and extracts the song lyrics. The lyrics are cleaned by removing unnecessary HTML tags
     and whitespace. The function returns the lyrics as a string.
     Note: Web scraping may be against the terms of service of some websites. Azlyrics.com
     specifically prohibits the usage of their content by third-party lyrics providers.
     Always review the website's policies and ensure you are compliant before scraping data.
     Parameters:
     ------
     `lyric_url`: str
         The URL of the song lyrics on azlyrics.com.
     `song_title`: str
         Title of the song
     Returns:
     ------
     `lyrics_str`:
         The lyrics of the song as a single string.
     Raises:
     ------
         `ValueError`: If the lyrics cannot be found or if there's an error fetching the webpage.
@@ -190,17 +183,14 @@ def follow_lyrics(lyric_url: str):
 def find_artist(artist_name: str) -> str:
     """
     Finds the link for the artist page on azlyrics.com.
     Parameters:
     ------
     `artist_name`: str
         The name of the artist.
     Returns:
     ------
     `url`: str
         The URL of the artist page on azlyrics.com.
     Raises:
     ------
     `ValueError`: If the artist page cannot be found.
@@ -245,7 +235,7 @@ def find_artist(artist_name: str) -> str:
         max_sim = -100
         for id, name in enumerate(artist_urls):
             # dist = Levenshtein.jaro(artist_name, name)
-            dist = homemade_jaro(artist_name, name)
             if max_sim < dist:
                 max_sim = dist
                 min_id = id
@@ -256,12 +246,10 @@ def find_artist(artist_name: str) -> str:
 def flatten_list(lst: list):
     """
     Flattens all inner lists (all depths) of a list into a list of depth == 1.
     Parameters:
     ------
     `lst`: List
         The list to be flattened.
     Returns:
     ------
     `result`: List
@@ -280,12 +268,10 @@ def process_lyrics(lyrics: str):
     """
     Pre-processes the lyrics by replacing `\r` with an empty string, splitting the lyrics
     by `\n`, and removing consecutive whitespace list items.
     Parameters:
     ------
     `lyrics`: str
         The lyrics to be pre-processed.
     Returns:
     ------
     `cleaned_lines`: List
@@ -306,12 +292,10 @@ def process_lyrics(lyrics: str):
 def sectionize(lyrics: str):
     """
     Splits the pre-processed lyrics into sections.
     Parameters:
     ------
     `lyrics`: str
         The pre-processed lyrics.
     Returns:
     ------
     `all_sections`: List
@@ -347,12 +331,10 @@ def analyze_sentiment_vader(text: str):
     """
     Analyzes the sentiment of a text using the VADER (Valence Aware Dictionary and sEntiment Reasoner)
     sentiment analysis model.
     Parameters:
     ------
     `text`: str
         The text to be analyzed.
     Returns:
     ------
     `label`: str
@@ -378,23 +360,19 @@ def analyze_sentiment_vader(text: str):
 def get_sentiments(df):
     """
     Retrieves the sentiment analysis of all text items in the 'lyrics' column of a pandas DataFrame.
     This function applies the Vader sentiment analysis model from the Natural Language Toolkit (nltk)
     to each text item in the 'lyrics' column of the passed DataFrame. It assigns a sentiment label
     ('POSITIVE', 'NEGATIVE', or 'NEUTRAL') and a compound sentiment score (ranging from -1 to 1) to
     each text item. The sentiment analysis is added as new columns to the DataFrame and the modified
     DataFrame is returned.
     Parameters:
     -----------
     df : pandas DataFrame
         The DataFrame containing the 'lyrics' column to be analyzed.
     Returns:
     --------
     df : pandas DataFrame
         The modified DataFrame with sentiment analysis added as new columns.
     Raises:
     -------
     None.
@@ -451,7 +429,7 @@ def get_metadata(artist_name: str, song_titles: list = None) -> dict:
                 max_sim = -100
                 for id, az_name in enumerate(az_titles):
                     # dist = Levenshtein.jaro(title, az_name)
-                    dist = homemade_jaro(title, az_name)
                     if max_sim < dist:
                         max_sim = dist
                         min_id = id

 nltk.download("vader_lexicon")
 # jaro distance from scratch because importing or installing the module didn't work
+def jaro_distance(s1, s2):
     if not s1 or not s2:
         return 0.0
         The maximum amount of time to delay (in seconds).
     print_delay: bool
         Whether or not to print the delay time.
     Returns:
     -------
     val: float
     This function sends an HTTP request to azlyrics.com, scrapes the HTML content
     to find the artist's page, and returns the URL to that page.
     Parameters:
     ----------
     artist_name: str
         The name of the artist.
     Returns:
     -------
     url: str
         max_sim = -100
         for id, name in enumerate(artist_urls):
             # dist = Levenshtein.jaro(artist_name, name)
+            dist = jaro_distance(artist_name, name)
             if max_sim < dist:
                 max_sim = dist
                 min_id = id
     This function sends an HTTP request to the given `lyric_url`, parses the HTML content,
     and extracts the song lyrics. The lyrics are cleaned by removing unnecessary HTML tags
     and whitespace. The function returns the lyrics as a string.
     Note: Web scraping may be against the terms of service of some websites. Azlyrics.com
     specifically prohibits the usage of their content by third-party lyrics providers.
     Always review the website's policies and ensure you are compliant before scraping data.
     Parameters:
     ------
     `lyric_url`: str
         The URL of the song lyrics on azlyrics.com.
     `song_title`: str
         Title of the song
     Returns:
     ------
     `lyrics_str`:
         The lyrics of the song as a single string.
     Raises:
     ------
         `ValueError`: If the lyrics cannot be found or if there's an error fetching the webpage.
 def find_artist(artist_name: str) -> str:
     """
     Finds the link for the artist page on azlyrics.com.
     Parameters:
     ------
     `artist_name`: str
         The name of the artist.
     Returns:
     ------
     `url`: str
         The URL of the artist page on azlyrics.com.
     Raises:
     ------
     `ValueError`: If the artist page cannot be found.
         max_sim = -100
         for id, name in enumerate(artist_urls):
             # dist = Levenshtein.jaro(artist_name, name)
+            dist = jaro_distance(artist_name, name)
             if max_sim < dist:
                 max_sim = dist
                 min_id = id
 def flatten_list(lst: list):
     """
     Flattens all inner lists (all depths) of a list into a list of depth == 1.
     Parameters:
     ------
     `lst`: List
         The list to be flattened.
     Returns:
     ------
     `result`: List
     """
     Pre-processes the lyrics by replacing `\r` with an empty string, splitting the lyrics
     by `\n`, and removing consecutive whitespace list items.
     Parameters:
     ------
     `lyrics`: str
         The lyrics to be pre-processed.
     Returns:
     ------
     `cleaned_lines`: List
 def sectionize(lyrics: str):
     """
     Splits the pre-processed lyrics into sections.
     Parameters:
     ------
     `lyrics`: str
         The pre-processed lyrics.
     Returns:
     ------
     `all_sections`: List
     """
     Analyzes the sentiment of a text using the VADER (Valence Aware Dictionary and sEntiment Reasoner)
     sentiment analysis model.
     Parameters:
     ------
     `text`: str
         The text to be analyzed.
     Returns:
     ------
     `label`: str
 def get_sentiments(df):
     """
     Retrieves the sentiment analysis of all text items in the 'lyrics' column of a pandas DataFrame.
     This function applies the Vader sentiment analysis model from the Natural Language Toolkit (nltk)
     to each text item in the 'lyrics' column of the passed DataFrame. It assigns a sentiment label
     ('POSITIVE', 'NEGATIVE', or 'NEUTRAL') and a compound sentiment score (ranging from -1 to 1) to
     each text item. The sentiment analysis is added as new columns to the DataFrame and the modified
     DataFrame is returned.
     Parameters:
     -----------
     df : pandas DataFrame
         The DataFrame containing the 'lyrics' column to be analyzed.
     Returns:
     --------
     df : pandas DataFrame
         The modified DataFrame with sentiment analysis added as new columns.
     Raises:
     -------
     None.
                 max_sim = -100
                 for id, az_name in enumerate(az_titles):
                     # dist = Levenshtein.jaro(title, az_name)
+                    dist = jaro_distance(title, az_name)
                     if max_sim < dist:
                         max_sim = dist
                         min_id = id