Spaces:
Runtime error
Runtime error
File size: 13,551 Bytes
e67043b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 |
import requests
from lxml import etree
import pandas as pd
import re
from ...tool import Tool
from typing import List
from typing_extensions import TypedDict
class ComingMovieInfo(TypedDict):
date: str
title: str
cate: str
region: str
wantWatchPeopleNum: str
link: str
class PlayingMovieInfo(TypedDict):
title: str
score: str
region: str
director: str
actors: str
link: str
class DoubanAPI:
def __init__(self) -> None:
self._endpoint = "https://movie.douban.com"
self._headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/108.0.0.0 Safari/537.36"
}
def fetch_page(self, url: str):
"""fetch_page(url: str) print html text of url"""
s = requests.session()
s.keep_alive = False
response = s.get(url, headers=self._headers, verify=False)
return response
def get_coming(self) -> List[ComingMovieInfo]:
response = self.fetch_page(f"{self._endpoint}/coming")
ret: List[ComingMovieInfo] = []
parser = etree.HTMLParser(encoding="utf-8")
tree = etree.HTML(response.text, parser=parser)
movies_table_path = '//*[@id="content"]/div/div[1]/table/tbody'
movies_table = tree.xpath(movies_table_path)
for filmChild in movies_table[0].iter("tr"):
filmTime = filmChild.xpath("td[1]/text()")[0].strip()
filmName = filmChild.xpath("td[2]/a/text()")[0]
filmType = filmChild.xpath("td[3]/text()")[0].strip()
filmRegion = filmChild.xpath("td[4]/text()")[0].strip()
filmWantWatching = filmChild.xpath("td[5]/text()")[0].strip()
filmLink = filmChild.xpath("td[2]/a/@href")[0]
ret.append(
ComingMovieInfo(
date=filmTime,
title=filmName,
cate=filmType,
region=filmRegion,
wantWatchPeopleNum=filmWantWatching,
link=filmLink,
)
)
return ret
def get_now_playing(self) -> List[PlayingMovieInfo]:
# Get the movie list currently on show, the movie list of different cities is the same
response = self.fetch_page(f"{self._endpoint}/cinema/nowplaying/beijing/")
ret: List[PlayingMovieInfo] = []
parser = etree.HTMLParser(encoding="utf-8")
tree = etree.HTML(response.text, parser=parser)
movies_table_path = './/div[@id="nowplaying"]/div[2]/ul'
movies_table = tree.xpath(movies_table_path)
for filmChild in movies_table[0]:
filmName = filmChild.xpath("@data-title")[0]
filmScore = filmChild.xpath("@data-score")[0]
filmRegion = filmChild.xpath("@data-region")[0]
filmDirector = filmChild.xpath("@data-director")[0]
filmActors = filmChild.xpath("@data-actors")[0]
filmLink = filmChild.xpath("ul/li[1]/a/@href")[0]
ret.append(
PlayingMovieInfo(
title=filmName,
score=filmScore,
region=filmRegion,
director=filmDirector,
actors=filmActors,
link=filmLink,
)
)
return ret
def get_movie_detail(self, url: str) -> str:
response = self.fetch_page(url)
parser = etree.HTMLParser(encoding="utf-8")
tree = etree.HTML(response.text, parser=parser)
info_path = './/div[@class="subject clearfix"]/div[2]'
director = tree.xpath(f"{info_path}/span[1]/span[2]/a/text()")[0]
actors = []
actors_spans = tree.xpath(f"{info_path}/span[3]/span[2]")[0]
for actors_span in actors_spans:
actors.append(actors_span.text)
actors = "、".join(actors[:3])
types = []
spans = tree.xpath(f"{info_path}")[0]
for span in spans.iter("span"):
if "property" in span.attrib and span.attrib["property"] == "v:genre":
types.append(span.text)
types = "、".join(types)
for span in spans:
if span.text == "制片国家/地区:":
region = span.tail.strip()
break
Synopsis = tree.xpath('.//div[@class="related-info"]/div/span')[0].text.strip()
detail = f"是一部{region}的{types}电影,由{director}导演,{actors}等人主演.\n剧情简介:{Synopsis}"
return detail
def build_tool(config) -> Tool:
tool = Tool(
"Film Search Plugin",
"search for up-to-date film information.",
name_for_model="Film Search",
description_for_model="Plugin for search for up-to-date film information.",
logo_url="https://your-app-url.com/.well-known/logo.png",
contact_email="hello@contact.com",
legal_info_url="hello@legal.com",
)
if "debug" in config and config["debug"]:
douban_api = config["douban_api"]
else:
douban_api = DoubanAPI()
@tool.get("/coming_out_filter")
def coming_out_filter(args: str):
"""coming_out_filter(args: str) prints the details of the filtered [outNum] coming films now according to region, cate and outNum.
args is a list like 'str1, str2, str3, str4'
str1 represents Production country or region. If you cannot find a region, str1 is 全部
str2 represents movie's category. If you cannot find a category, str2 is 全部
str3 can be a integer number that agent want to get. If you cannot find a number, str2 is 100. If the found movie's num is less than str2, Final Answer only print [the found movie's num] movies.
str4 can be a True or False that refluct whether agent want the result sorted by people number which look forward to the movie.
Final answer should be complete.
This is an example:
Thought: I need to find the upcoming Chinese drama movies and the top 2 most wanted movies
Action: coming_out_filter
Action Input: {"args" : "中国, 剧情, 2, True"}
Observation: {"date":{"23":"04月28日","50":"07月"},"title":{"23":"长空之王","50":"热烈"},"cate":{"23":"剧情 / 动作","50":"剧情 / 喜剧"},"region":{"23":"中国大陆","50":"中国大陆"},"wantWatchPeopleNum":{"23":"39303人","50":"26831人"}}
Thought: I now know the top 2 upcoming Chinese drama movies
Final Answer: 即将上映的中国剧情电影有2部:长空之王、热烈,大家最想看的前2部分别是:长空之王、热烈。
"""
args = re.findall(r"\b\w+\b", args)
region = args[0]
if region == "全部":
region = ""
cate = args[1]
if cate == "全部":
cate = ""
outNum = int(args[2])
WantSort = True if args[3] == "True" else False
coming_movies = []
for movie in douban_api.get_coming():
if (cate in movie["cate"]) and (region in movie["region"]):
coming_movies.append(
{
"date": movie["date"],
"title": movie["title"],
"cate": movie["cate"],
"region": movie["region"],
"wantWatchPeopleNum": int(
movie["wantWatchPeopleNum"].replace("人", "")
),
"link": movie["link"],
}
)
# Sort by people that are looking forward to the movie
if WantSort:
coming_movies = sorted(
coming_movies, key=lambda x: x["wantWatchPeopleNum"], reverse=True
)
ret = {
"date": {},
"title": {},
"cate": {},
"region": {},
"wantWatchPeopleNum": {},
}
for i, movie in enumerate(coming_movies[:outNum]):
i = str(i)
ret["date"][i] = movie["date"]
ret["title"][i] = movie["title"]
ret["cate"][i] = movie["cate"]
ret["region"][i] = movie["region"]
ret["wantWatchPeopleNum"][i] = "{}人".format(movie["wantWatchPeopleNum"])
return ret
@tool.get("/now_playing_out_filter")
def now_playing_out_filter(args: str):
"""NowPlayingOutFilter(args: str) prints the details of the filtered [outNum] playing films now according to region, scoreSort
args is a list like 'str1, str2, str3'
str1 can be '中国','日本' or other Production country or region. If you cannot find a region, str1 is 全部
str2 can be a integer number that agent want to get. If you cannot find a number, str2 is 100. If the found movie's num is less than str2, Final Answer only print [the found movie's num] movies.
str3 can be a True or False that refluct whether agent want the result sorted by score.
Final answer should be complete.
This is an example:
Input: 您知道现在有正在上映中国的电影吗?请输出3部
Thought: I need to find the currently playing movies with the highest scores
Action: now_playing_out_filter
Action Input: {"args" : "全部, 3, True"}
Observation: {"title":{"34":"切腹","53":"吉赛尔","31":"小森林 夏秋篇"},"score":{"34":"9.4","53":"9.2","31":"9.0"},"region":{"34":"日本","53":"西德","31":"日本"},"director":{"34":"小林正树","53":"Hugo Niebeling","31":"森淳一"},"actors":{"34":"仲代达矢 / 石浜朗 / 岩下志麻","53":"卡拉·弗拉奇 / 埃里克·布鲁恩 / Bruce Marks","31":"桥本爱 / 三浦贵大 / 松冈茉优"}}
Thought: I now know the currently playing movies with the highest scores
Final Answer: 现在上映的评分最高的3部电影是:切腹、吉赛尔、小森林 夏秋篇
"""
args = re.findall(r"\b\w+\b", args)
region = args[0]
if region == "全部":
region = ""
outNum = int(args[1])
scoreSort = True if args[2] == "True" else False
playing_movies = []
for movie in douban_api.get_now_playing():
if region in movie["region"]:
playing_movies.append(
{
"title": movie["title"],
"score": float(movie["score"]),
"region": movie["region"],
"director": movie["director"],
"actors": movie["actors"],
"link": movie["link"],
}
)
# Sort by score
if scoreSort:
playing_movies = sorted(
playing_movies, key=lambda x: x["score"], reverse=True
)
ret = {
"title": {},
"score": {},
"region": {},
"director": {},
"actors": {},
}
for i, movie in enumerate(playing_movies[:outNum]):
i = str(i)
ret["title"][i] = movie["title"]
ret["score"][i] = "{}".format(movie["score"])
ret["region"][i] = movie["region"]
ret["director"][i] = movie["director"]
ret["actors"][i] = movie["actors"]
return ret
@tool.get("/print_detail")
def print_detail(args: str):
"""parsing_detail_page(args) prints the details of a movie, giving its name.
args is a list like 'str1'
str1 is target movie's name.
step1: apply function parse_coming_page and parse_nowplaying_page and get all movie's links and other infomation.
step2: get the target movie's link from df_coming or df_nowplaying
step3: get detail from step2's link
This is an example:
Input: "电影流浪地球2怎么样?"
Thought: I need to find the movie's information
Action: print_detail
Action Input: {"args" : "流浪地球2"}
Observation: "是一部中国大陆的科幻、冒险、灾难电影,由郭帆导演,吴京、刘德华、李雪健等人主演.\n剧情简介:太阳即将毁灭,人类在地球表面建造出巨大的推进器,寻找新的家园。然而宇宙之路危机四伏,为了拯救地球,流浪地球时代的年轻人再次挺身而出,展开争分夺秒的生死之战。"
Thought: I now know the final answer
Final Answer: 流浪地球2是一部中国大陆的科幻、冒险、灾难电影,由郭帆导演,吴京、刘德华、李雪健等人主演,剧情简介是太阳即将毁灭,人类在地球表面建造出巨大的推进器,寻找新的家园,然而宇宙之路危机四伏,为了拯救地球,流浪地球时代的年轻人再次挺身而出,
"""
args = re.findall(r"\b\w+\b", args)
filmName = args[0]
link = None
if link is None:
for movie in douban_api.get_coming():
if movie["title"] == filmName:
link = movie["link"]
break
if link is None:
for movie in douban_api.get_now_playing():
if movie["title"] == filmName:
link = movie["link"]
break
if link is None:
return "没有找到该电影"
return "{}{}".format(filmName, douban_api.get_movie_detail(link))
return tool
|