File size: 13,551 Bytes
e67043b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
import requests
from lxml import etree
import pandas as pd
import re
from ...tool import Tool
from typing import List
from typing_extensions import TypedDict


class ComingMovieInfo(TypedDict):
    date: str
    title: str
    cate: str
    region: str
    wantWatchPeopleNum: str
    link: str


class PlayingMovieInfo(TypedDict):
    title: str
    score: str
    region: str
    director: str
    actors: str
    link: str


class DoubanAPI:
    def __init__(self) -> None:
        self._endpoint = "https://movie.douban.com"
        self._headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
            "Chrome/108.0.0.0 Safari/537.36"
        }

    def fetch_page(self, url: str):
        """fetch_page(url: str) print html text of url"""
        s = requests.session()
        s.keep_alive = False
        response = s.get(url, headers=self._headers, verify=False)

        return response

    def get_coming(self) -> List[ComingMovieInfo]:
        response = self.fetch_page(f"{self._endpoint}/coming")
        ret: List[ComingMovieInfo] = []

        parser = etree.HTMLParser(encoding="utf-8")
        tree = etree.HTML(response.text, parser=parser)

        movies_table_path = '//*[@id="content"]/div/div[1]/table/tbody'
        movies_table = tree.xpath(movies_table_path)
        for filmChild in movies_table[0].iter("tr"):
            filmTime = filmChild.xpath("td[1]/text()")[0].strip()
            filmName = filmChild.xpath("td[2]/a/text()")[0]
            filmType = filmChild.xpath("td[3]/text()")[0].strip()
            filmRegion = filmChild.xpath("td[4]/text()")[0].strip()
            filmWantWatching = filmChild.xpath("td[5]/text()")[0].strip()
            filmLink = filmChild.xpath("td[2]/a/@href")[0]
            ret.append(
                ComingMovieInfo(
                    date=filmTime,
                    title=filmName,
                    cate=filmType,
                    region=filmRegion,
                    wantWatchPeopleNum=filmWantWatching,
                    link=filmLink,
                )
            )
        return ret

    def get_now_playing(self) -> List[PlayingMovieInfo]:
        # Get the movie list currently on show, the movie list of different cities is the same
        response = self.fetch_page(f"{self._endpoint}/cinema/nowplaying/beijing/")
        ret: List[PlayingMovieInfo] = []

        parser = etree.HTMLParser(encoding="utf-8")
        tree = etree.HTML(response.text, parser=parser)

        movies_table_path = './/div[@id="nowplaying"]/div[2]/ul'
        movies_table = tree.xpath(movies_table_path)
        for filmChild in movies_table[0]:
            filmName = filmChild.xpath("@data-title")[0]
            filmScore = filmChild.xpath("@data-score")[0]
            filmRegion = filmChild.xpath("@data-region")[0]
            filmDirector = filmChild.xpath("@data-director")[0]
            filmActors = filmChild.xpath("@data-actors")[0]
            filmLink = filmChild.xpath("ul/li[1]/a/@href")[0]
            ret.append(
                PlayingMovieInfo(
                    title=filmName,
                    score=filmScore,
                    region=filmRegion,
                    director=filmDirector,
                    actors=filmActors,
                    link=filmLink,
                )
            )
        return ret

    def get_movie_detail(self, url: str) -> str:
        response = self.fetch_page(url)
        parser = etree.HTMLParser(encoding="utf-8")
        tree = etree.HTML(response.text, parser=parser)
        info_path = './/div[@class="subject clearfix"]/div[2]'

        director = tree.xpath(f"{info_path}/span[1]/span[2]/a/text()")[0]

        actors = []
        actors_spans = tree.xpath(f"{info_path}/span[3]/span[2]")[0]
        for actors_span in actors_spans:
            actors.append(actors_span.text)
        actors = "、".join(actors[:3])

        types = []
        spans = tree.xpath(f"{info_path}")[0]
        for span in spans.iter("span"):
            if "property" in span.attrib and span.attrib["property"] == "v:genre":
                types.append(span.text)
        types = "、".join(types)

        for span in spans:
            if span.text == "制片国家/地区:":
                region = span.tail.strip()
                break
        Synopsis = tree.xpath('.//div[@class="related-info"]/div/span')[0].text.strip()
        detail = f"是一部{region}{types}电影,由{director}导演,{actors}等人主演.\n剧情简介:{Synopsis}"
        return detail


def build_tool(config) -> Tool:
    tool = Tool(
        "Film Search Plugin",
        "search for up-to-date film information.",
        name_for_model="Film Search",
        description_for_model="Plugin for search for up-to-date film information.",
        logo_url="https://your-app-url.com/.well-known/logo.png",
        contact_email="hello@contact.com",
        legal_info_url="hello@legal.com",
    )

    if "debug" in config and config["debug"]:
        douban_api = config["douban_api"]
    else:
        douban_api = DoubanAPI()

    @tool.get("/coming_out_filter")
    def coming_out_filter(args: str):
        """coming_out_filter(args: str) prints the details of the filtered [outNum] coming films now according to region, cate and outNum.
        args is a list like 'str1, str2, str3, str4'
        str1 represents Production country or region. If you cannot find a region, str1 is 全部
        str2 represents movie's category. If you cannot find a category, str2 is 全部
        str3 can be a integer number that agent want to get. If you cannot find a number, str2 is 100. If the found movie's num is less than str2, Final Answer only print [the found movie's num] movies.
        str4 can be a True or False that refluct whether agent want the result sorted by people number which look forward to the movie.
        Final answer should be complete.

        This is an example:
        Thought: I need to find the upcoming Chinese drama movies and the top 2 most wanted movies
        Action: coming_out_filter
        Action Input: {"args" : "中国, 剧情, 2, True"}
        Observation: {"date":{"23":"04月28日","50":"07月"},"title":{"23":"长空之王","50":"热烈"},"cate":{"23":"剧情 / 动作","50":"剧情 / 喜剧"},"region":{"23":"中国大陆","50":"中国大陆"},"wantWatchPeopleNum":{"23":"39303人","50":"26831人"}}
        Thought: I now know the top 2 upcoming Chinese drama movies
        Final Answer: 即将上映的中国剧情电影有2部:长空之王、热烈,大家最想看的前2部分别是:长空之王、热烈。
        """
        args = re.findall(r"\b\w+\b", args)
        region = args[0]
        if region == "全部":
            region = ""
        cate = args[1]
        if cate == "全部":
            cate = ""
        outNum = int(args[2])
        WantSort = True if args[3] == "True" else False

        coming_movies = []
        for movie in douban_api.get_coming():
            if (cate in movie["cate"]) and (region in movie["region"]):
                coming_movies.append(
                    {
                        "date": movie["date"],
                        "title": movie["title"],
                        "cate": movie["cate"],
                        "region": movie["region"],
                        "wantWatchPeopleNum": int(
                            movie["wantWatchPeopleNum"].replace("人", "")
                        ),
                        "link": movie["link"],
                    }
                )

        # Sort by people that are looking forward to the movie
        if WantSort:
            coming_movies = sorted(
                coming_movies, key=lambda x: x["wantWatchPeopleNum"], reverse=True
            )

        ret = {
            "date": {},
            "title": {},
            "cate": {},
            "region": {},
            "wantWatchPeopleNum": {},
        }
        for i, movie in enumerate(coming_movies[:outNum]):
            i = str(i)
            ret["date"][i] = movie["date"]
            ret["title"][i] = movie["title"]
            ret["cate"][i] = movie["cate"]
            ret["region"][i] = movie["region"]
            ret["wantWatchPeopleNum"][i] = "{}人".format(movie["wantWatchPeopleNum"])
        return ret

    @tool.get("/now_playing_out_filter")
    def now_playing_out_filter(args: str):
        """NowPlayingOutFilter(args: str) prints the details of the filtered [outNum] playing films now according to region, scoreSort
        args is a list like 'str1, str2, str3'
        str1 can be '中国','日本' or other Production country or region. If you cannot find a region, str1 is 全部
        str2 can be a integer number that agent want to get. If you cannot find a number, str2 is 100. If the found movie's num is less than str2, Final Answer only print [the found movie's num] movies.
        str3 can be a True or False that refluct whether agent want the result sorted by score.
        Final answer should be complete.

        This is an example:
        Input: 您知道现在有正在上映中国的电影吗?请输出3部
        Thought: I need to find the currently playing movies with the highest scores
        Action: now_playing_out_filter
        Action Input: {"args" : "全部, 3, True"}
        Observation: {"title":{"34":"切腹","53":"吉赛尔","31":"小森林 夏秋篇"},"score":{"34":"9.4","53":"9.2","31":"9.0"},"region":{"34":"日本","53":"西德","31":"日本"},"director":{"34":"小林正树","53":"Hugo Niebeling","31":"森淳一"},"actors":{"34":"仲代达矢 / 石浜朗 / 岩下志麻","53":"卡拉·弗拉奇 / 埃里克·布鲁恩 / Bruce Marks","31":"桥本爱 / 三浦贵大 / 松冈茉优"}}
        Thought: I now know the currently playing movies with the highest scores
        Final Answer: 现在上映的评分最高的3部电影是:切腹、吉赛尔、小森林 夏秋篇

        """
        args = re.findall(r"\b\w+\b", args)
        region = args[0]
        if region == "全部":
            region = ""
        outNum = int(args[1])
        scoreSort = True if args[2] == "True" else False

        playing_movies = []
        for movie in douban_api.get_now_playing():
            if region in movie["region"]:
                playing_movies.append(
                    {
                        "title": movie["title"],
                        "score": float(movie["score"]),
                        "region": movie["region"],
                        "director": movie["director"],
                        "actors": movie["actors"],
                        "link": movie["link"],
                    }
                )

        # Sort by score
        if scoreSort:
            playing_movies = sorted(
                playing_movies, key=lambda x: x["score"], reverse=True
            )

        ret = {
            "title": {},
            "score": {},
            "region": {},
            "director": {},
            "actors": {},
        }
        for i, movie in enumerate(playing_movies[:outNum]):
            i = str(i)
            ret["title"][i] = movie["title"]
            ret["score"][i] = "{}".format(movie["score"])
            ret["region"][i] = movie["region"]
            ret["director"][i] = movie["director"]
            ret["actors"][i] = movie["actors"]
        return ret

    @tool.get("/print_detail")
    def print_detail(args: str):
        """parsing_detail_page(args) prints the details of a movie, giving its name.
        args is a list like 'str1'
        str1 is target movie's name.
        step1: apply function parse_coming_page and parse_nowplaying_page and get all movie's links and other infomation.
        step2: get the target movie's link from df_coming or df_nowplaying
        step3: get detail from step2's link

        This is an example:
        Input: "电影流浪地球2怎么样?"
        Thought: I need to find the movie's information
        Action: print_detail
        Action Input: {"args" : "流浪地球2"}
        Observation: "是一部中国大陆的科幻、冒险、灾难电影,由郭帆导演,吴京、刘德华、李雪健等人主演.\n剧情简介:太阳即将毁灭,人类在地球表面建造出巨大的推进器,寻找新的家园。然而宇宙之路危机四伏,为了拯救地球,流浪地球时代的年轻人再次挺身而出,展开争分夺秒的生死之战。"
        Thought: I now know the final answer
        Final Answer: 流浪地球2是一部中国大陆的科幻、冒险、灾难电影,由郭帆导演,吴京、刘德华、李雪健等人主演,剧情简介是太阳即将毁灭,人类在地球表面建造出巨大的推进器,寻找新的家园,然而宇宙之路危机四伏,为了拯救地球,流浪地球时代的年轻人再次挺身而出,

        """
        args = re.findall(r"\b\w+\b", args)
        filmName = args[0]

        link = None

        if link is None:
            for movie in douban_api.get_coming():
                if movie["title"] == filmName:
                    link = movie["link"]
                    break

        if link is None:
            for movie in douban_api.get_now_playing():
                if movie["title"] == filmName:
                    link = movie["link"]
                    break

        if link is None:
            return "没有找到该电影"

        return "{}{}".format(filmName, douban_api.get_movie_detail(link))

    return tool