Spaces:
Running
Running
Commit
•
ffb3bfb
1
Parent(s):
08ef248
Update songscope.py
Browse files- songscope.py +13 -12
songscope.py
CHANGED
@@ -1,3 +1,4 @@
|
|
|
|
1 |
import pandas as pd
|
2 |
import requests
|
3 |
from bs4 import BeautifulSoup
|
@@ -65,7 +66,7 @@ def random_delay(min_val: float, max_val: float, print_delay: bool = False):
|
|
65 |
val = random.uniform(min_val, max_val)
|
66 |
time.sleep(val)
|
67 |
if print_delay == True:
|
68 |
-
|
69 |
return val
|
70 |
|
71 |
def find_artist(artist_name: str):
|
@@ -170,9 +171,9 @@ def follow_lyrics(lyric_url: str):
|
|
170 |
# Clean up the lyrics by removing unnecessary HTML tags and whitespace
|
171 |
lyrics_str = lyrics_div.get_text(strip = False)
|
172 |
else:
|
173 |
-
|
174 |
else:
|
175 |
-
|
176 |
|
177 |
return lyrics_str
|
178 |
|
@@ -199,7 +200,7 @@ def find_artist(artist_name: str) -> str:
|
|
199 |
|
200 |
# The target URL
|
201 |
url = f"https://www.azlyrics.com/{first_letter}.html"
|
202 |
-
|
203 |
|
204 |
# Send an HTTP request to the URL
|
205 |
response = requests.get(url)
|
@@ -226,7 +227,7 @@ def find_artist(artist_name: str) -> str:
|
|
226 |
for url in artist_links:
|
227 |
artist_urls.append(str(url).split("/")[-1][:-5])
|
228 |
|
229 |
-
|
230 |
|
231 |
if artist_name in artist_urls:
|
232 |
return f"https://www.azlyrics.com/{artist_links[artist_urls.index(artist_name)]}"
|
@@ -462,12 +463,12 @@ def get_all_data(artist_name: str, song_titles: list = None,
|
|
462 |
A pandas DataFrame containing metadata and sentiment analysis for each song found.
|
463 |
"""
|
464 |
if print_progress == True:
|
465 |
-
|
466 |
|
467 |
artist_data = get_metadata(artist_name = artist_name, song_titles = song_titles)
|
468 |
|
469 |
if print_progress == True:
|
470 |
-
|
471 |
|
472 |
times = []
|
473 |
|
@@ -477,7 +478,7 @@ def get_all_data(artist_name: str, song_titles: list = None,
|
|
477 |
# lyrics = follow_lyrics(lyric_url = artist_data[title]['url'])
|
478 |
# artist_data[title]['lyrics'] = sectionize(lyrics)
|
479 |
# except: (UnboundLocalError, TypeError, AttributeError)
|
480 |
-
#
|
481 |
# pass
|
482 |
|
483 |
lyrics = follow_lyrics(lyric_url = artist_data[title]['url'])
|
@@ -495,13 +496,13 @@ def get_all_data(artist_name: str, song_titles: list = None,
|
|
495 |
if print_progress == True:
|
496 |
if remaining >= 60: # more than one minute remaining
|
497 |
remaining = round(remaining / 60, 2)
|
498 |
-
|
499 |
else: # less than one minute remaining
|
500 |
remaining = round(remaining, 2)
|
501 |
-
|
502 |
|
503 |
if print_progress == True:
|
504 |
-
|
505 |
|
506 |
df_dict = {}
|
507 |
df_dict['artist_name'] = []
|
@@ -546,6 +547,6 @@ def get_all_data(artist_name: str, song_titles: list = None,
|
|
546 |
'sentiment_valence', "album_name", "release_type", "lyrics_url"]]
|
547 |
|
548 |
if print_progress == True:
|
549 |
-
|
550 |
|
551 |
return sents_df
|
|
|
1 |
+
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import requests
|
4 |
from bs4 import BeautifulSoup
|
|
|
66 |
val = random.uniform(min_val, max_val)
|
67 |
time.sleep(val)
|
68 |
if print_delay == True:
|
69 |
+
st.write(f"Delayed {val} seconds")
|
70 |
return val
|
71 |
|
72 |
def find_artist(artist_name: str):
|
|
|
171 |
# Clean up the lyrics by removing unnecessary HTML tags and whitespace
|
172 |
lyrics_str = lyrics_div.get_text(strip = False)
|
173 |
else:
|
174 |
+
st.write(f"Error: Unable to find the lyrics for '{lyric_url}'.")
|
175 |
else:
|
176 |
+
st.write(f"Error: Unable to fetch the webpage. Status code: {response.status_code}")
|
177 |
|
178 |
return lyrics_str
|
179 |
|
|
|
200 |
|
201 |
# The target URL
|
202 |
url = f"https://www.azlyrics.com/{first_letter}.html"
|
203 |
+
st.write(url)
|
204 |
|
205 |
# Send an HTTP request to the URL
|
206 |
response = requests.get(url)
|
|
|
227 |
for url in artist_links:
|
228 |
artist_urls.append(str(url).split("/")[-1][:-5])
|
229 |
|
230 |
+
st.write(artist_urls)
|
231 |
|
232 |
if artist_name in artist_urls:
|
233 |
return f"https://www.azlyrics.com/{artist_links[artist_urls.index(artist_name)]}"
|
|
|
463 |
A pandas DataFrame containing metadata and sentiment analysis for each song found.
|
464 |
"""
|
465 |
if print_progress == True:
|
466 |
+
st.write(f"------------------------\n\nFinding song data for '{artist_name}'. This may take a few moments...")
|
467 |
|
468 |
artist_data = get_metadata(artist_name = artist_name, song_titles = song_titles)
|
469 |
|
470 |
if print_progress == True:
|
471 |
+
st.write(f"\n\t- All metadata found")
|
472 |
|
473 |
times = []
|
474 |
|
|
|
478 |
# lyrics = follow_lyrics(lyric_url = artist_data[title]['url'])
|
479 |
# artist_data[title]['lyrics'] = sectionize(lyrics)
|
480 |
# except: (UnboundLocalError, TypeError, AttributeError)
|
481 |
+
# st.write(f"\tCouldn't find lyrics to {title}. Moving to next song.")
|
482 |
# pass
|
483 |
|
484 |
lyrics = follow_lyrics(lyric_url = artist_data[title]['url'])
|
|
|
496 |
if print_progress == True:
|
497 |
if remaining >= 60: # more than one minute remaining
|
498 |
remaining = round(remaining / 60, 2)
|
499 |
+
st.write(f"\t- Lyrics to '{title}' found. Estimated time remaining: {remaining} minutes")
|
500 |
else: # less than one minute remaining
|
501 |
remaining = round(remaining, 2)
|
502 |
+
st.write(f"\t- Lyrics to '{title}' found. Estimated time remaining: {remaining} seconds")
|
503 |
|
504 |
if print_progress == True:
|
505 |
+
st.write(f"\nAll lyrics and metadata found. Returning structured data.")
|
506 |
|
507 |
df_dict = {}
|
508 |
df_dict['artist_name'] = []
|
|
|
547 |
'sentiment_valence', "album_name", "release_type", "lyrics_url"]]
|
548 |
|
549 |
if print_progress == True:
|
550 |
+
st.write(f"Data retrieval complete!\n\n------------------------")
|
551 |
|
552 |
return sents_df
|