pn23 commited on
Commit
4d7d148
·
verified ·
1 Parent(s): 0fff6b6

Delete scraping.py

Browse files
Files changed (1) hide show
  1. scraping.py +0 -132
scraping.py DELETED
@@ -1,132 +0,0 @@
1
- from selenium import webdriver
2
- from webdriver_manager.chrome import ChromeDriverManager
3
- from selenium.webdriver.common.by import By
4
- from selenium.webdriver.support.ui import WebDriverWait
5
- from selenium.webdriver.support import expected_conditions as EC
6
- import time
7
- from selenium.webdriver.chrome.options import Options
8
- import os
9
- from pyspark.sql import SparkSession
10
- #from delta import configure_spark_with_delta_pip
11
- import tempfile
12
-
13
- def get_mp3(song_title):
14
- # Set the download directory to the "music" subfolder within the current directory
15
- # download_directory = os.path.join(os.getcwd(), "music")
16
- # os.makedirs(download_directory, exist_ok=True)
17
-
18
- # Set up Chrome options
19
- # options = Options()
20
- # options.add_argument("--headless")
21
- # options.add_argument("--no-sandbox") # Bypass OS security model, necessary on certain platforms
22
- # options.add_argument("--disable-dev-shm-usage") # Overcome limited resource problems
23
- # #options.add_argument(f"download.default_directory={download_dir}") # Set download directory
24
- # options.add_experimental_option("prefs", {
25
- # # "download.default_directory": download_dir,
26
- # # "download.prompt_for_download": False,
27
- # # "download.directory_upgrade": True,
28
- # "plugins.always_open_pdf_externally": True, # Auto-download PDFs
29
- # "safebrowsing.enabled": True # Enable safe browsing to avoid triggering the browser's security warnings
30
- # })
31
-
32
- chrome_options = webdriver.ChromeOptions()
33
- # chrome_options.add_argument('--headless')
34
- # chrome_options.add_argument('--no-sandbox')
35
- # chrome_options.add_argument('--disable-dev-shm-usage')
36
- driver = webdriver.Chrome('chromedriver')
37
-
38
-
39
- # Set up the Selenium WebDriver (e.g., Chrome)
40
- #driver = webdriver.Chrome(chrome_options=options)
41
-
42
- # Navigate to the website
43
- driver.get("https://suno.com/me")
44
-
45
-
46
- # Wait for the sign-in button to be clickable and click it
47
- sign_in_button = WebDriverWait(driver, 10).until(
48
- EC.element_to_be_clickable((By.CSS_SELECTOR, "#__next > div > div > div > div > div.cl-main.🔒️.cl-internal-xk295g > div > button.cl-socialButtonsIconButton.cl-socialButtonsIconButton__discord.🔒️.cl-internal-855i1h"))
49
- )
50
- sign_in_button.click()
51
-
52
- # Wait for the username field to be visible and enter the username
53
- #username: applebottom_12
54
- username_field = WebDriverWait(driver, 10).until(
55
- EC.visibility_of_element_located((By.CSS_SELECTOR, "#uid_8"))
56
- )
57
- username_field.send_keys("asfasfasfgasdfasgfsag@gmail.com")
58
-
59
- # Find the password field and enter the password
60
- password_field = WebDriverWait(driver, 10).until(
61
- EC.visibility_of_element_located((By.CSS_SELECTOR, "#uid_10"))
62
- )
63
- password_field.send_keys("AppleBottom12")
64
-
65
- # Find the password button and click it
66
- password_button = WebDriverWait(driver, 10).until(
67
- EC.element_to_be_clickable((By.CSS_SELECTOR, "#app-mount > div.appAsidePanelWrapper__5e6e2 > div.notAppAsidePanel__95814 > div.app_b1f720 > div > div > div > div > form > div.centeringWrapper__5e247 > div > div.mainLoginContainer_f58870 > div.block__681fa.marginTop20__7e0ad > button.marginBottom8_ce1fb9.button__5573c.button__581d0.lookFilled__950dd.colorBrand__27d57.sizeLarge_b395a7.fullWidth_fdb23d.grow__4c8a4"))
68
- )
69
- password_button.click()
70
-
71
- # Wait for the page to load after signing in
72
- WebDriverWait(driver, 10).until(
73
- EC.url_contains("https://suno.com/me")
74
- )
75
-
76
- # Click on the specific song
77
- three_dots = WebDriverWait(driver, 10).until(
78
- EC.element_to_be_clickable((By.LINK_TEXT, song_title))
79
- )
80
- three_dots.click()
81
-
82
- # Play the song
83
- play_button = WebDriverWait(driver, 10).until(
84
- EC.element_to_be_clickable((By.CSS_SELECTOR, "body > div.css-fhtuey > div.css-bhm5u7 > div > div.css-l9hfgy > div.css-144pizt > button.chakra-button.css-15rci1t"))
85
- )
86
- play_button.click()
87
-
88
- time.sleep(3)
89
-
90
- three_dots = WebDriverWait(driver, 10).until(
91
- EC.element_to_be_clickable((By.XPATH, '//button[@aria-label="More Actions"]'))
92
- )
93
- three_dots.click()
94
-
95
- # Wait for the Download button to be clickable and click on it
96
- download_button = WebDriverWait(driver, 10).until(
97
- EC.element_to_be_clickable((By.XPATH, '//div[@role="menuitem" and contains(text(), "Download")]'))
98
- )
99
- download_button.click()
100
-
101
- # Wait for the Audio element to be clickable and click on it
102
- audio_element = WebDriverWait(driver, 10).until(
103
- EC.element_to_be_clickable((By.XPATH, '//div[@role="menuitem" and contains(text(), "Audio")]'))
104
- )
105
- audio_element.click()
106
-
107
- time.sleep(3)
108
-
109
- # print("Successfully signed in!")
110
-
111
- # # Create a SparkSession with Delta Lake configuration
112
- # builder = SparkSession.builder.appName("SaveMP3ToDatabricks") \
113
- # .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
114
- # .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog")
115
-
116
- # spark = configure_spark_with_delta_pip(builder).getOrCreate()
117
-
118
- # # Setting the Retrieving Director
119
- # retrieve_directory = os.path.join(download_directory, f"{song_title}.mp3")
120
-
121
- # # Read the downloaded MP3 file as binary
122
- # mp3_data = spark.sparkContext.binaryFiles(retrieve_directory).collect()[0][1]
123
-
124
- # # Create a DataFrame with the MP3 data
125
- # df = spark.createDataFrame([("Samba Kickoff.mp3", mp3_data,)], ["song_name", "mp3_data"])
126
-
127
- # # Save the DataFrame to a Databricks table
128
- # df.write.format("delta").mode("append").saveAsTable("mp3_table")
129
-
130
- # print("MP3 file saved to Databricks table.")
131
-
132
- driver.quit()