Spaces:
Running
Running
kevinwang676
commited on
Commit
•
f9ec799
1
Parent(s):
9a60ca9
Upload search (1).py
Browse files- search (1).py +163 -0
search (1).py
ADDED
@@ -0,0 +1,163 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os.path
|
2 |
+
import random
|
3 |
+
|
4 |
+
from musicdl import musicdl
|
5 |
+
from musicdl.modules import Downloader
|
6 |
+
from pydub import AudioSegment
|
7 |
+
from yt_dlp import YoutubeDL
|
8 |
+
import yt_dlp
|
9 |
+
from yt_dlp.utils import download_range_func
|
10 |
+
import json
|
11 |
+
|
12 |
+
|
13 |
+
def is_integer(string):
|
14 |
+
if string.isdigit():
|
15 |
+
return int(string)
|
16 |
+
else:
|
17 |
+
return 0
|
18 |
+
|
19 |
+
|
20 |
+
def is_numeric(string):
|
21 |
+
if string.isdigit():
|
22 |
+
return True
|
23 |
+
if string.count('.') == 1:
|
24 |
+
integer_part, decimal_part = string.split('.')
|
25 |
+
if integer_part.isdigit() and decimal_part.isdigit():
|
26 |
+
return True
|
27 |
+
return False
|
28 |
+
|
29 |
+
|
30 |
+
def time_to_seconds(time_string):
|
31 |
+
hours, minutes, seconds = map(lambda x: is_integer(x), time_string.split(':'))
|
32 |
+
total_seconds = hours * 3600 + minutes * 60 + seconds
|
33 |
+
return total_seconds
|
34 |
+
|
35 |
+
|
36 |
+
def size_to_int(size_string):
|
37 |
+
prefix_size_str = size_string[:-2] # 去除最后的单位部分,转换为浮点数
|
38 |
+
if not is_numeric(prefix_size_str):
|
39 |
+
return 5.1 * 1024 * 1024
|
40 |
+
unit = size_string[-2:] # 获取单位部分
|
41 |
+
size = float(prefix_size_str)
|
42 |
+
if unit == 'KB':
|
43 |
+
size *= 1024 # 转换为字节
|
44 |
+
elif unit == 'MB':
|
45 |
+
size *= 1024 * 1024
|
46 |
+
elif unit == 'GB':
|
47 |
+
size *= 1024 * 1024 * 1024
|
48 |
+
elif unit == 'TB':
|
49 |
+
size *= 1024 * 1024 * 1024 * 1024
|
50 |
+
|
51 |
+
return int(size) # 转换为整数
|
52 |
+
|
53 |
+
|
54 |
+
def search_youtube(keywords):
|
55 |
+
YDL_OPTIONS = {
|
56 |
+
'format': 'bestaudio',
|
57 |
+
# 'noplaylist': 'True',
|
58 |
+
# 'proxy': 'http://127.0.0.1:8889',
|
59 |
+
}
|
60 |
+
with YoutubeDL(YDL_OPTIONS) as ydl:
|
61 |
+
video = ydl.extract_info(f"ytsearch:{keywords}", download=False)['entries'][0:5]
|
62 |
+
# video = ydl.extract_info(keywords, download=False)
|
63 |
+
if len(video) > 0:
|
64 |
+
ret = random.choice(video)
|
65 |
+
return ydl.sanitize_info(ret)
|
66 |
+
else:
|
67 |
+
return None
|
68 |
+
|
69 |
+
|
70 |
+
def download_youtube(info, save_path):
|
71 |
+
url = info['original_url']
|
72 |
+
duration = info['duration']
|
73 |
+
|
74 |
+
|
75 |
+
start_second = 0
|
76 |
+
end_second = duration
|
77 |
+
|
78 |
+
ydl_opts = {
|
79 |
+
'format': 'm4a/bestaudio/best',
|
80 |
+
'downloader': 'ffmpeg',
|
81 |
+
'download_ranges': download_range_func(None, [(start_second, end_second)]),
|
82 |
+
# ℹ️ See help(yt_dlp.postprocessor) for a list of available Postprocessors and their arguments
|
83 |
+
'postprocessors': [{ # Extract audio using ffmpeg
|
84 |
+
'key': 'FFmpegExtractAudio',
|
85 |
+
'preferredcodec': 'mp3',
|
86 |
+
}],
|
87 |
+
'outtmpl': save_path,
|
88 |
+
# 'proxy': 'http://127.0.0.1:8889',
|
89 |
+
}
|
90 |
+
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
91 |
+
info = ydl.extract_info(url, download=True)
|
92 |
+
# ℹ️ ydl.sanitize_info makes the info json-serializable
|
93 |
+
ret_info = ydl.sanitize_info(info)
|
94 |
+
ret_info['save_path'] = save_path
|
95 |
+
return ret_info
|
96 |
+
|
97 |
+
|
98 |
+
def get_youtube(keywords, save_path):
|
99 |
+
info = search_youtube(keywords)
|
100 |
+
if info is None:
|
101 |
+
return
|
102 |
+
else:
|
103 |
+
download_youtube(info, save_path)
|
104 |
+
|
105 |
+
|
106 |
+
def get_albums(keywords, config):
|
107 |
+
target_srcs = [
|
108 |
+
'kugou', 'kuwo', 'qqmusic', 'qianqian', 'fivesing',
|
109 |
+
'netease', 'migu', 'joox', 'yiting',
|
110 |
+
]
|
111 |
+
client = musicdl.musicdl(config=config)
|
112 |
+
results = client.search(keywords, target_srcs)
|
113 |
+
albums_set = set()
|
114 |
+
valid_albums = []
|
115 |
+
for albums in results.values():
|
116 |
+
if len(albums) == 0:
|
117 |
+
continue
|
118 |
+
for album in albums:
|
119 |
+
if album['songname'] in albums_set:
|
120 |
+
continue
|
121 |
+
if album['ext'] != 'mp3':
|
122 |
+
continue
|
123 |
+
if size_to_int(album['filesize']) > 5 * 1024 * 1024:
|
124 |
+
continue
|
125 |
+
if time_to_seconds(album['duration']) > 300:
|
126 |
+
continue
|
127 |
+
else:
|
128 |
+
albums_set.add(album['songname'])
|
129 |
+
valid_albums.append(album)
|
130 |
+
return valid_albums
|
131 |
+
|
132 |
+
|
133 |
+
def get_random_spit(songinfo, save_path):
|
134 |
+
d = Downloader(songinfo)
|
135 |
+
d.start()
|
136 |
+
song = AudioSegment.from_mp3(save_path)
|
137 |
+
# pydub does things in milliseconds
|
138 |
+
length = len(song)
|
139 |
+
left_idx = length / 2 - 15 * 1000
|
140 |
+
right_idx = length / 2 + 15 * 1000
|
141 |
+
if left_idx < 0:
|
142 |
+
left_idx = 0
|
143 |
+
if right_idx > length:
|
144 |
+
right_idx = length
|
145 |
+
middle_30s = song[left_idx:right_idx]
|
146 |
+
middle_30s.export(save_path, format="wav")
|
147 |
+
return save_path
|
148 |
+
|
149 |
+
|
150 |
+
def download_random(keywords, config, save_path):
|
151 |
+
albums = get_albums(keywords, config)
|
152 |
+
if len(albums) == 0:
|
153 |
+
return None
|
154 |
+
album = random.choice(albums)
|
155 |
+
get_random_spit(album, save_path=save_path)
|
156 |
+
|
157 |
+
|
158 |
+
if __name__ == '__main__':
|
159 |
+
# config = {'logfilepath': 'musicdl.log', 'downloaded': 'downloaded', 'search_size_per_source': 5, 'proxies': {}}
|
160 |
+
# infos = get_albums('李荣浩', config)
|
161 |
+
# print(infos)
|
162 |
+
info = search_youtube('李荣浩 模特')
|
163 |
+
download_youtube(info, "downloaded/模特")
|