rogerxavier's picture
Upload 258 files
0aee47a verified
"""
bilibili_api.utils.parse_link
链接资源解析。
"""
import re
import json
from enum import Enum
from typing import Tuple, Union, Literal
import httpx
from yarl import URL
from .network import Api
from ..game import Game
from ..manga import Manga
from ..topic import Topic
from ..video import Video
from ..exceptions import *
from .utils import get_api
from ..live import LiveRoom
from ..dynamic import Dynamic
from .short import get_real_url
from ..note import Note, NoteType
from ..black_room import BlackRoom
from .credential import Credential
from ..audio import Audio, AudioList
from ..bangumi import Bangumi, Episode
from ..article import Article, ArticleList
from ..cheese import CheeseList, CheeseVideo
from ..interactive_video import InteractiveVideo
from ..favorite_list import FavoriteList, FavoriteListType
from ..user import User, ChannelSeries, ChannelSeriesType, get_self_info
from .initial_state import get_initial_state
class ResourceType(Enum):
"""
链接类型类。
+ VIDEO: 视频
+ BANGUMI: 番剧
+ EPISODE: 番剧剧集
+ FAVORITE_LIST: 视频收藏夹
+ CHEESE: 课程
+ CHEESE_VIDEO: 课程视频
+ AUDIO: 音频
+ AUDIO_LIST: 歌单
+ ARTICLE: 专栏
+ USER: 用户
+ LIVE: 直播间
+ CHANNEL_SERIES: 合集与列表
+ BLACK_ROOM: 小黑屋
+ GAME: 游戏
+ TOPIC: 话题
+ MANGA: 漫画
+ NOTE: 笔记
+ FAILED: 错误
"""
VIDEO = "video"
INTERACTIVE_VIDEO = "interactive_video"
BANGUMI = "bangumi"
EPISODE = "episode"
FAVORITE_LIST = "favorite_list"
CHEESE_VIDEO = "cheese_video"
AUDIO = "audio"
AUDIO_LIST = "audio_list"
ARTICLE = "article"
USER = "user"
LIVE = "live"
CHANNEL_SERIES = "channel_series"
ARTICLE_LIST = "article_list"
DYNAMIC = "dynamic"
BLACK_ROOM = "black_room"
GAME = "game"
TOPIC = "topic"
MANGA = "manga"
NOTE = "note"
FAILED = "failed"
async def parse_link(
url: str, credential: Union[Credential, None] = None
) -> Union[
Tuple[Video, Literal[ResourceType.VIDEO]],
Tuple[InteractiveVideo, Literal[ResourceType.INTERACTIVE_VIDEO]],
Tuple[Bangumi, Literal[ResourceType.BANGUMI]],
Tuple[Episode, Literal[ResourceType.EPISODE]],
Tuple[FavoriteList, Literal[ResourceType.FAVORITE_LIST]],
Tuple[CheeseVideo, Literal[ResourceType.CHEESE_VIDEO]],
Tuple[Audio, Literal[ResourceType.AUDIO]],
Tuple[AudioList, Literal[ResourceType.AUDIO_LIST]],
Tuple[Article, Literal[ResourceType.ARTICLE]],
Tuple[User, Literal[ResourceType.USER]],
Tuple[LiveRoom, Literal[ResourceType.LIVE]],
Tuple[ChannelSeries, Literal[ResourceType.CHANNEL_SERIES]],
Tuple[ArticleList, Literal[ResourceType.ARTICLE_LIST]],
Tuple[Dynamic, Literal[ResourceType.DYNAMIC]],
Tuple[BlackRoom, Literal[ResourceType.BLACK_ROOM]],
Tuple[Game, Literal[ResourceType.GAME]],
Tuple[Topic, Literal[ResourceType.TOPIC]],
Tuple[Manga, Literal[ResourceType.MANGA]],
Tuple[Note, Literal[ResourceType.NOTE]],
Tuple[Literal[-1], Literal[ResourceType.FAILED]],
]:
"""
调用 yarl 解析 bilibili url 的函数。
Args:
url(str) : 链接
credential(Credential): 凭据类
Returns:
Tuple[obj, ResourceType]: (对象,类型) 或 -1,-1 表示出错
"""
credential = credential if credential else Credential()
url = url.replace("\\", "/") # 说多了都是泪
try:
obj = None
# 排除 bvxxxxxxxxxx 等缩写
sobj = await check_short_name(url, credential)
if sobj != -1:
sobj[0].credential = credential
return sobj
# 删去首尾部空格
url = url.strip()
# 添加 https: 协议头
if url.lstrip("https:") == url:
url = "https:" + url
# 转换为 yarl
url = URL(url) # type: ignore
# 排除小黑屋
black_room = parse_black_room(url, credential) # type: ignore
if not black_room == -1:
obj = (black_room, ResourceType.BLACK_ROOM)
return obj # type: ignore
# 过滤 https://space.bilibili.com/
if url.host == "space.bilibili.com" and url.path == "/" or url.path == "": # type: ignore
try:
info = await get_self_info(credential)
except Exception as e:
return (-1, ResourceType.FAILED)
else:
return (User(info["mid"], credential=credential), ResourceType.USER)
channel = parse_season_series(
url, credential # type: ignore
) # 不需要 real_url,提前处理
if channel != -1:
return (channel, ResourceType.CHANNEL_SERIES) # type: ignore
url = await get_real_url(str(url)) # type: ignore
url = URL(url) # type: ignore
fl_space = parse_space_favorite_list(url, credential) # type: ignore
if fl_space != -1:
return fl_space # type: ignore
game = parse_game(url, credential) # type: ignore
if game != -1:
game.credential = credential # type: ignore
return (game, ResourceType.GAME) # type: ignore
topic = parse_topic(url, credential) # type: ignore
if topic != -1:
topic.credential = credential # type: ignore
return (topic, ResourceType.TOPIC) # type: ignore
festival_video = await parse_festival(url, credential) # type: ignore
if festival_video != -1:
festival_video.credential = credential # type: ignore
return (festival_video, ResourceType.VIDEO) # type: ignore
note = parse_note(url, credential) # type: ignore
if note != -1:
return (note, ResourceType.NOTE) # type: ignore
obj = None
video = await parse_video(url, credential) # type: ignore
if not video == -1:
obj = video # auto_convert_video 会判断类型
bangumi = parse_bangumi(url, credential) # type: ignore
if not bangumi == -1:
obj = (bangumi, ResourceType.BANGUMI)
episode = await parse_episode(url, credential) # type: ignore
if not episode == -1:
obj = (episode, ResourceType.EPISODE)
favorite_list = parse_favorite_list(url, credential) # type: ignore
if not favorite_list == -1:
obj = (favorite_list, ResourceType.FAVORITE_LIST)
cheese_video = await parse_cheese_video(url, credential) # type: ignore
if not cheese_video == -1:
obj = (cheese_video, ResourceType.CHEESE_VIDEO)
audio = parse_audio(url, credential) # type: ignore
if not audio == -1:
obj = (audio, ResourceType.AUDIO)
audio_list = parse_audio_list(url, credential) # type: ignore
if not audio_list == -1:
obj = (audio_list, ResourceType.AUDIO_LIST)
article = parse_article(url, credential) # type: ignore
if not article == -1:
obj = (article, ResourceType.ARTICLE)
article_list = parse_article_list(url, credential) # type: ignore
if not article_list == -1:
obj = (article_list, ResourceType.ARTICLE_LIST)
user = parse_user(url, credential) # type: ignore
if not user == -1:
obj = (user, ResourceType.USER)
live = parse_live(url, credential) # type: ignore
if not live == -1:
obj = (live, ResourceType.LIVE)
dynamic = parse_dynamic(url, credential) # type: ignore
if not dynamic == -1:
obj = (dynamic, ResourceType.DYNAMIC)
manga = parse_manga(url, credential) # type: ignore
if not manga == -1:
obj = (manga, ResourceType.MANGA)
opus_dynamic = parse_opus_dynamic(url, credential) # type: ignore
if not opus_dynamic == -1:
obj = (opus_dynamic, ResourceType.DYNAMIC)
if obj == None or obj[0] == None:
return (-1, ResourceType.FAILED)
else:
obj[0].credential = credential # type: ignore
return obj # type: ignore
except Exception as e:
raise e
# return (-1, ResourceType.FAILED)
async def auto_convert_video(
video: Video, credential: Union[Credential, None] = None
) -> Tuple[Union[Video, Episode, InteractiveVideo], ResourceType]:
# check interactive video
video_info = await video.get_info()
if video_info["rights"]["is_stein_gate"] == 1:
return (
InteractiveVideo(video.get_bvid(), credential=credential),
ResourceType.INTERACTIVE_VIDEO,
)
# check episode
if "redirect_url" in video_info:
reparse_link = await parse_link(
await get_real_url(video_info["redirect_url"]), credential=credential
) # type: ignore
return reparse_link # type: ignore
# return video
return (video, ResourceType.VIDEO)
async def check_short_name(
name: str, credential: Credential
) -> Union[
Tuple[Video, Literal[ResourceType.VIDEO]],
Tuple[Episode, Literal[ResourceType.EPISODE]],
Tuple[CheeseVideo, Literal[ResourceType.CHEESE_VIDEO]],
Tuple[FavoriteList, Literal[ResourceType.FAVORITE_LIST]],
Tuple[User, Literal[ResourceType.USER]],
Tuple[Article, Literal[ResourceType.ARTICLE]],
Tuple[Audio, Literal[ResourceType.AUDIO]],
Tuple[AudioList, Literal[ResourceType.AUDIO_LIST]],
Tuple[ArticleList, Literal[ResourceType.ARTICLE_LIST]],
Literal[-1],
]:
"""
解析:
- mlxxxxxxxxxx
- uidxxxxxxxxx
- cvxxxxxxxxxx
- auxxxxxxxxxx
- amxxxxxxxxxx
- rlxxxxxxxxxx
"""
if name[:2].upper() == "AV":
v = Video(aid=int(name[2:]), credential=credential)
return await auto_convert_video(v, credential=credential) # type: ignore
elif name[:2].upper() == "BV":
v = Video(bvid=name, credential=credential)
return await auto_convert_video(v, credential=credential) # type: ignore
elif name[:2].upper() == "ML":
return (
FavoriteList(FavoriteListType.VIDEO, int(name[2:]), credential=credential),
ResourceType.FAVORITE_LIST,
)
elif name[:3].upper() == "UID":
return (User(int(name[3:]), credential=credential), ResourceType.USER)
elif name[:2].upper() == "CV":
return (Article(int(name[2:]), credential=credential), ResourceType.ARTICLE)
elif name[:2].upper() == "AU":
return (Audio(int(name[2:]), credential=credential), ResourceType.AUDIO)
elif name[:2].upper() == "AM":
return (
AudioList(int(name[2:]), credential=credential),
ResourceType.AUDIO_LIST,
)
elif name[:2].upper() == "RL":
return (
ArticleList(int(name[2:]), credential=credential),
ResourceType.ARTICLE_LIST,
)
else:
return -1
async def parse_video(
url: URL, credential: Credential
) -> Union[Tuple[Union[Video, Episode, InteractiveVideo], ResourceType], Literal[-1]]:
"""
解析视频,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and url.parts[1] == "video":
raw_video_id = url.parts[2]
if raw_video_id[:2].upper() == "AV":
aid = int(raw_video_id[2:])
v = Video(aid=aid, credential=credential)
elif raw_video_id[:2].upper() == "BV":
v = Video(bvid=raw_video_id, credential=credential)
else:
return -1
return await auto_convert_video(v, credential=credential)
else:
return -1
def parse_bangumi(url: URL, credential: Credential) -> Union[Bangumi, int]:
"""
解析番剧,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and len(url.parts) >= 4:
if url.parts[:3] == ("/", "bangumi", "media"):
media_id = int(url.parts[3][2:])
return Bangumi(media_id=media_id, credential=credential)
return -1
async def parse_episode(url: URL, credential: Credential) -> Union[Episode, int]:
"""
解析番剧剧集,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and len(url.parts) >= 3:
if url.parts[1] == "bangumi" and url.parts[2] == "play":
video_short_id = url.parts[3]
if video_short_id[:2].upper() == "EP":
epid = int(video_short_id[2:])
return Episode(epid=epid)
elif video_short_id[:2].upper() == "SS":
bangumi = Bangumi(ssid=int(video_short_id[2:]))
epid = (await bangumi.get_episodes())[0].get_epid()
return Episode(epid=epid)
return -1
def parse_favorite_list(url: URL, credential: Credential) -> Union[FavoriteList, int]:
"""
解析收藏夹,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and len(url.parts) >= 4:
if url.parts[:3] == ("/", "medialist", "detail"):
media_id = int(url.parts[3][2:])
return FavoriteList(media_id=media_id, credential=credential)
return -1
async def parse_cheese_video(
url: URL, credential: Credential
) -> Union[CheeseVideo, int]:
"""
解析课程视频,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and len(url.parts) >= 4:
if url.parts[1] == "cheese" and url.parts[2] == "play":
if url.parts[3][:2].upper() == "EP":
epid = int(url.parts[3][2:])
return CheeseVideo(epid=epid, credential=credential)
elif url.parts[3][:2].upper() == "SS":
clid = int(url.parts[3][2:])
cl = CheeseList(season_id=clid, credential=credential)
return CheeseVideo(
epid=(await cl.get_list_raw())["items"][0]["id"],
credential=credential,
)
return -1
def parse_audio(url: URL, credential: Credential) -> Union[Audio, int]:
"""
解析音频,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and url.parts[1] == "audio":
if url.parts[2][:2].upper() == "AU":
auid = int(url.parts[2][2:])
return Audio(auid=auid, credential=credential)
return -1
def parse_audio_list(url: URL, credential: Credential) -> Union[AudioList, int]:
"""
解析歌单,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and url.parts[1] == "audio":
if url.parts[2][:2].upper() == "AM":
amid = int(url.parts[2][2:])
return AudioList(amid=amid, credential=credential)
return -1
def parse_article(url: URL, credential: Credential) -> Union[Article, int]:
"""
解析专栏,如果不是返回 -1,否则返回对应类
"""
if url.host == "www.bilibili.com" and len(url.parts) >= 3:
if url.parts[1] == "read" and url.parts[2][:2].upper() == "CV":
cvid = int(url.parts[2][2:])
return Article(cvid=cvid, credential=credential)
return -1
def parse_user(url: URL, credential: Credential) -> Union[User, int]:
if url.host == "space.bilibili.com":
if len(url.parts) >= 2:
uid = url.parts[1]
return User(uid=int(uid), credential=credential)
return -1
def parse_live(url: URL, credential: Credential) -> Union[LiveRoom, int]:
if url.host == "live.bilibili.com":
if len(url.parts) >= 2:
room_display_id = int(url.parts[1])
return LiveRoom(room_display_id=room_display_id, credential=credential)
return -1
def parse_season_series(url: URL, credential: Credential) -> Union[ChannelSeries, int]:
if url.host == "space.bilibili.com":
if len(url.parts) >= 2: # path 存在 uid
try:
uid = int(url.parts[1])
except:
pass # uid 无效
else:
if len(url.parts) >= 4: # path 存在 collectiondetail 或者 seriesdetail
if url.parts[3] == "collectiondetail":
# https://space.bilibili.com/51537052/channel/collectiondetail?sid=22780&ctype=0
if url.query.get("sid") is not None:
sid = int(url.query["sid"])
return ChannelSeries(
uid,
ChannelSeriesType.SEASON,
id_=sid,
credential=credential,
)
elif url.parts[3] == "seriesdetail":
# https://space.bilibili.com/558830935/channel/seriesdetail?sid=2972810&ctype=0
if url.query.get("sid") is not None:
sid = int(url.query["sid"])
return ChannelSeries(
uid,
ChannelSeriesType.SERIES,
id_=sid,
credential=credential,
)
elif url.host == "www.bilibili.com":
if url.parts[1] == "list":
# https://www.bilibili.com/list/660303135?sid=2908236 旧版合集,不需要 real_url
if len(url.parts) >= 3:
uid = int(url.parts[2])
if "sid" in url.query:
sid = int(url.query["sid"])
return ChannelSeries(
uid, ChannelSeriesType.SERIES, id_=sid, credential=credential
)
# https://www.bilibili.com/medialist/play/660303135?business=space 新版合集
elif url.parts[1] == "medialist" and url.parts[2] == "play":
if len(url.parts) >= 4:
uid = int(url.parts[3])
if "business_id" in url.query:
sid = int(url.query["business_id"])
return ChannelSeries(
uid, ChannelSeriesType.SERIES, id_=sid, credential=credential
)
return -1
def parse_space_favorite_list(
url: URL, credential: Credential
) -> Union[
Tuple[FavoriteList, ResourceType], Tuple[ChannelSeries, ResourceType], Literal[-1]
]:
if url.host == "space.bilibili.com":
uid = url.parts[1] # 获取 uid
if len(url.parts) >= 3: # path 存在 favlist
if url.parts[2] == "favlist":
if (
len(url.parts) == 3 and url.query.get("fid") == None
): # query 中不存在 fid 则返回默认收藏夹
api = get_api("favorite-list")["info"]["list_list"]
params = {"up_mid": uid, "type": 2}
favorite_lists = (
Api(**api, credential=credential)
.update_params(**params)
.result_sync
)
if favorite_lists == None:
return -1
else:
default_favorite_id = int(favorite_lists["list"][0]["id"])
return (
FavoriteList(
media_id=default_favorite_id, credential=credential
),
ResourceType.FAVORITE_LIST,
)
elif len(url.query) != 0:
fid = url.query.get("fid") # 未知数据类型
ctype = url.query.get("ctype")
try: # 尝试转换为 int 类型并设置 fid_is_int
fid = int(fid) # type: ignore
fid_is_int = True
except:
fid_is_int = False
if ctype is None and fid_is_int:
# 我的视频收藏夹
fid = int(fid) # type: ignore
return (FavoriteList(media_id=fid), ResourceType.FAVORITE_LIST)
elif fid_is_int:
if int(ctype) == 11: # type: ignore
fid = int(fid) # 转换为 int 类型 # type: ignore
fid_is_int = True
return (
FavoriteList(media_id=fid, credential=credential),
ResourceType.FAVORITE_LIST,
)
elif int(ctype) == 21: # type: ignore
fid = int(fid) # type: ignore
fid_is_int = True
return (
ChannelSeries(
id_=fid,
type_=ChannelSeriesType.SEASON,
credential=credential,
),
ResourceType.CHANNEL_SERIES,
)
elif fid_is_int == False:
# ctype 不存在且 fid 非 int 类型
if fid == FavoriteListType.ARTICLE.value:
return (
FavoriteList(
FavoriteListType.ARTICLE, credential=credential
),
ResourceType.FAVORITE_LIST,
)
elif fid == FavoriteListType.CHEESE.value:
return (
FavoriteList(
FavoriteListType.CHEESE, credential=credential
),
ResourceType.FAVORITE_LIST,
)
return -1
def parse_article_list(url: URL, credential: Credential) -> Union[ArticleList, int]:
if url.host == "www.bilibili.com" and len(url.parts) >= 3:
if url.parts[:3] == ("/", "read", "readlist"):
rlid = int(url.parts[3][2:])
return ArticleList(rlid=rlid, credential=credential)
return -1
def parse_dynamic(url: URL, credential: Credential) -> Union[Dynamic, int]:
if url.host == "t.bilibili.com":
if len(url.parts) >= 2:
dynamic_id = int(url.parts[1])
return Dynamic(dynamic_id, credential=credential)
return -1
def parse_black_room(url: URL, credential: Credential) -> Union[BlackRoom, int]:
if len(url.parts) >= 3:
if url.parts[:3] == ("/", "blackroom", "ban"):
if len(url.parts) >= 4: # 存在 id
return BlackRoom(int(url.parts[3]), credential=credential)
return -1
def parse_game(url: URL, credential: Credential) -> Union[Game, int]:
if (
url.host == "www.biligame.com"
and url.parts[1] == "detail"
and url.query.get("id") is not None
):
return Game(int(url.query["id"]), credential=credential)
return -1
def parse_topic(url: URL, credential: Credential) -> Union[Topic, int]:
if url.host == "www.bilibili.com" and len(url.parts) >= 4:
if (
url.parts[:4] == ("/", "v", "topic", "detail")
and url.query.get("topic_id") is not None
):
return Topic(int(url.query["topic_id"]), credential=credential)
return -1
def parse_manga(url: URL, credential: Credential) -> Union[Manga, int]:
if url.host == "manga.bilibili.com" and url.parts[1] == "detail":
return Manga(int(url.parts[2][2:]), credential=credential)
return -1
async def parse_festival(url: URL, credential: Credential) -> Union[Video, int]:
bvid = url.query.get("bvid")
if bvid is not None: # get bvid if provided
return Video(bvid, credential=credential)
if (
url.host == "www.bilibili.com" and url.parts[1] == "festival"
): # use __initial_state__ to fetch
content, content_type = await get_initial_state(
url=str(url), credential=credential
)
return Video(
content["videoSections"][0]["episodes"][0]["bvid"], credential=credential
) # 返回当前第一个视频
return -1
def parse_note(url: URL, credential: Credential) -> Union[Note, int]:
# https://www.bilibili.com/h5/note-app/view?cvid=21385583
if url.host == "www.bilibili.com" and url.parts[1:4] == ("h5", "note-app", "view"):
if url.query.get("cvid") == None:
return -1
return Note(cvid=int(url.query.get("cvid")), note_type=NoteType.PUBLIC, credential=credential) # type: ignore
return -1
def parse_nianshizhiwang(url: URL) -> None:
# https://www.bilibili.com/festival/nianshizhiwang?bvid=BV1yt4y1Q7SS&spm_id_from=trigger_reload
pass
# 貌似 parse_bnj 已经可以判断了
def parse_opus_dynamic(url: URL, credential: Credential) -> Union[Dynamic, int]:
# https://www.bilibili.com/opus/767674573455884292
if url.host == "www.bilibili.com" and url.parts[:2] == ("/", "opus"):
return Dynamic(dynamic_id=int(url.parts[-1]), credential=credential)
return -1