Gabriel commited on
Commit
ca4c9d6
โ€ข
1 Parent(s): 55bd2da

First commit

Browse files
Home.py ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from skimage import io
2
+ import matplotlib.pyplot as plt
3
+ import streamlit as st
4
+ from streamlit.logger import get_logger
5
+
6
+ st.set_page_config(page_title="Home", page_icon="๐Ÿ‘‹")
7
+ LOGGER = get_logger(__name__)
8
+
9
+
10
+ def run():
11
+ st.write("# Welcome to this Anime sugestion app! ๐Ÿ‘‹")
12
+
13
+ st.sidebar.success("Select an option above.")
14
+
15
+ st.markdown(
16
+ """
17
+ This webapp offers a recommendation based on content information available
18
+ on the MyAnimeList website, such suggestions don't include series that have
19
+ yet to air and some long running shows that do not have a known number of episodes.
20
+ This means that sadly One Piece and Case Closed won't be recommended by this app,
21
+ but both are definitely worth the reading if the prospect of 1000+ episodes feels
22
+ too long for you.
23
+
24
+ The nature of the dataset allowed for a the
25
+ """
26
+ )
27
+
28
+ st.image("https://img1.ak.crunchyroll.com/i/spire4/9b3f967b806812e4b8ec9e8194e3a52a1658316525_main.jpg")
29
+
30
+
31
+
32
+ if __name__ == "__main__":
33
+ run()
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Gabriel Sebastiรฃo
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md CHANGED
@@ -5,7 +5,7 @@ colorFrom: purple
5
  colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.10.0
8
- app_file: app.py
9
  pinned: false
10
  license: mit
11
  ---
 
5
  colorTo: gray
6
  sdk: streamlit
7
  sdk_version: 1.10.0
8
+ app_file: Home.py
9
  pinned: false
10
  license: mit
11
  ---
pages/1๐Ÿฎ_Recommendation.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from skimage import io
4
+ import matplotlib.pyplot as plt
5
+ from wordcloud import WordCloud
6
+ import itertools
7
+ import scipy
8
+ import numpy as np
9
+ import validators
10
+ from ast import literal_eval
11
+ from sklearn.metrics.pairwise import sigmoid_kernel
12
+ from sklearn.metrics.pairwise import linear_kernel
13
+ from sklearn.feature_extraction.text import TfidfVectorizer
14
+
15
+ def show_Cover(url: str) -> None:
16
+ a = io.imread(url)
17
+ plt.imshow(a)
18
+ plt.axis('off')
19
+ plt.show()
20
+
21
+ @st.experimental_memo
22
+ def preprocess_lists(df: pd.DataFrame, column_list: list) -> pd.DataFrame:
23
+ for column in column_list:
24
+ string = column + '_treated'
25
+ df_hold = df.loc[:,column]
26
+ df_hold = df_hold.apply(lambda x: literal_eval(x) if len(x) > 2 else [])
27
+ df[string] = df_hold
28
+ df.drop(column, axis = 1, inplace = True)
29
+ return df
30
+
31
+ def gen_wordcloud(df: pd.DataFrame, column_name: str) -> None:
32
+ list_wc = df[column_name].tolist()
33
+ list_wc = list(itertools.chain(*list_wc))
34
+ strings = ' '.join(list_wc)
35
+
36
+ plt.figure(figsize=(10,10))
37
+ wordcloud = WordCloud(max_words=100,background_color="white",width=800, height=400, min_font_size = 10).generate(strings)
38
+ fig, ax = plt.subplots(figsize = (10, 10))
39
+ ax.imshow(wordcloud)
40
+ plt.axis("off")
41
+ plt.tight_layout(pad=0)
42
+ st.pyplot(fig)
43
+
44
+ @st.cache
45
+ def vect_Tfid(series: pd.Series) -> scipy.sparse.csr_matrix:
46
+ tfv = TfidfVectorizer(min_df=3, max_features=None,
47
+ analyzer='word',
48
+ ngram_range=(1, 3),
49
+ stop_words = 'english')
50
+ return tfv.fit_transform(series)
51
+
52
+ def sim_score(df: pd.DataFrame, kernel: str = 'sigmoid') -> np.ndarray:
53
+ tfv_matrix = vect_Tfid(df['synopsis'])
54
+ if kernel == 'sigmoid':
55
+ return sigmoid_kernel(tfv_matrix, tfv_matrix)
56
+ elif kernel == 'linear':
57
+ return linear_kernel(tfv_matrix, tfv_matrix)
58
+
59
+
60
+ @st.cache
61
+ def get_rec(entry: str, df: pd.DataFrame, sug_num: int, rec_type: str) -> pd.DataFrame:
62
+ idx = pd.Series(df.index, index=df['title']).drop_duplicates()[entry]
63
+
64
+ df_sim = list(enumerate(sim_score(df, rec_type)[idx]))
65
+
66
+ sim_scores = sorted(df_sim, key = lambda x: x[1], reverse = True)
67
+
68
+ sim_recs = sim_scores[1:sug_num]
69
+
70
+ anime_indices = [y[0] for y in sim_recs]
71
+
72
+ return df['title'].iloc[anime_indices]
73
+
74
+ def data_frame_demo() -> None:
75
+ @st.experimental_memo
76
+ def get_Anime_data() -> None:
77
+ df = pd.read_csv('./myanimelist/anime.csv')
78
+ return df
79
+
80
+ @st.experimental_memo
81
+ def preprocess(dataframe: pd.DataFrame) -> pd.DataFrame:
82
+ columns = ['title', 'type', 'score', 'scored_by', 'status', 'episodes', 'members',
83
+ 'favorites', 'rating', 'sfw', 'genres', 'themes', 'demographics',
84
+ 'studios', 'producers', 'licensors','synopsis']
85
+ return dataframe[columns]
86
+
87
+ df = get_Anime_data()
88
+ df_pred = preprocess(df)
89
+ df_pred.fillna(value = 'Not Found in MAL', inplace=True)
90
+
91
+ list_columns = ['genres','themes','demographics','studios'
92
+ ,'producers','licensors']
93
+
94
+ df_pred = preprocess_lists(df_pred, list_columns)
95
+
96
+ anime_list = st.multiselect(
97
+ "Choose some anime", list(df.title)
98
+ )
99
+ #st.dataframe(df.head()) Used for testing
100
+ #st.dataframe(df_pred.head()) Used for testing
101
+ if not anime_list:
102
+ st.error("Please select an anime.")
103
+ else:
104
+ df_subset = df[df["title"].isin(anime_list)]
105
+ r_type = st.selectbox('Which kernel to be used for the recommendation?',
106
+ ('sigmoid', 'linear'))
107
+ rec_num = st.slider('How many recommendations?', 10, 50, 20)
108
+ for anime, picture, url, trailer in zip(anime_list, df_subset.main_picture, df_subset.url, df_subset.trailer_url):
109
+ col1, col2, col3 = st.columns([2,4,4])
110
+ with col1:
111
+ st.write(f'Anime selected: {anime}')
112
+ #st.dataframe(df_subset) used for testing
113
+ st.image(picture, caption = picture)
114
+ st.write(f'[MAL page]({url})')
115
+ if validators.url(trailer):
116
+ st.video(trailer)
117
+ with col2:
118
+ rec_list = get_rec(anime, df_pred, rec_num, r_type)
119
+ rec_df = df_pred[df_pred["title"].isin(rec_list)]
120
+ st.dataframe(rec_df[['title','licensors_treated','sfw']],
121
+ height=550, width= 810)
122
+ with col3:
123
+ gen_wordcloud(rec_df,'genres_treated')
124
+ gen_wordcloud(rec_df,'themes_treated')
125
+
126
+
127
+ st.set_option('deprecation.showPyplotGlobalUse', False)
128
+ st.set_page_config(page_title="Recommendation", page_icon="๐Ÿฎ", layout="wide")
129
+ st.markdown("# Anime Suggestion")
130
+ st.sidebar.header("Anime Suggestion")
131
+ st.write(
132
+ """In this section choose an anime or a theme that you really like. The model will take care of the rest. Enjoy!"""
133
+ )
134
+
135
+ data_frame_demo()
pages/2๐Ÿ“œ_Dataset_information.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import time
3
+ import numpy as np
4
+
5
+ st.set_page_config(page_title="Dataset information", page_icon="๐Ÿ“œ")
6
+
7
+
8
+ st.markdown(
9
+ """
10
+ # Dataset information
11
+
12
+ This webapp is uses the MyAnimeList Anime and Manga Datasets from Andreu Vall Hernร ndez
13
+ available on Kaggle as such dataset has both info scraped with the official API and Jikan API.
14
+ Which makes it the best option available, since it's weekly updated and covers both anime and manga.
15
+
16
+ Link to the dataset: <https://www.kaggle.com/datasets/andreuvallhernndez/myanimelist>
17
+ """
18
+ )
19
+ st.image('https://i.imgur.com/vEy5Zaq.png', width=300, caption = 'MyAnimeList Logo')
20
+ st.image('https://www.kaggle.com/static/images/logos/kaggle-logo-gray-300.png', width=300, caption = 'Kaggle Logo')
requirements.txt ADDED
Binary file (118 Bytes). View file