Spaces:

gabedewitt
/

anime_recsys

Runtime error

App Files Files Community

Gabriel commited on Sep 2, 2022

Commit

ca4c9d6

•

1 Parent(s): 55bd2da

First commit

Browse files

Files changed (6) hide show

Home.py +33 -0
LICENSE +21 -0
README.md +1 -1
pages/1🏮_Recommendation.py +135 -0
pages/2📜_Dataset_information.py +20 -0
requirements.txt +0 -0

Home.py ADDED Viewed

	@@ -0,0 +1,33 @@

+from skimage import io
+import matplotlib.pyplot as plt
+import streamlit as st
+from streamlit.logger import get_logger
+st.set_page_config(page_title="Home", page_icon="👋")
+LOGGER = get_logger(__name__)
+def run():
+    st.write("# Welcome to this Anime sugestion app! 👋")
+    st.sidebar.success("Select an option above.")
+    st.markdown(
+        """
+        This webapp offers a recommendation based on content information available
+        on the MyAnimeList website, such suggestions don't include series that have
+        yet to air and some long running shows that do not have a known number of episodes.
+        This means that sadly One Piece and Case Closed won't be recommended by this app,
+        but both are definitely worth the reading if the prospect of 1000+ episodes feels
+        too long for you.
+        The nature of the dataset allowed for a the
+        """
+    )
+    st.image("https://img1.ak.crunchyroll.com/i/spire4/9b3f967b806812e4b8ec9e8194e3a52a1658316525_main.jpg")
+if __name__ == "__main__":
+    run()

LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2022 Gabriel Sebastião
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

README.md CHANGED Viewed

@@ -5,7 +5,7 @@ colorFrom: purple
 colorTo: gray
 sdk: streamlit
 sdk_version: 1.10.0
-app_file: app.py
 pinned: false
 license: mit
 ---

 colorTo: gray
 sdk: streamlit
 sdk_version: 1.10.0
+app_file: Home.py
 pinned: false
 license: mit
 ---

pages/1🏮_Recommendation.py ADDED Viewed

	@@ -0,0 +1,135 @@

+import streamlit as st
+import pandas as pd
+from skimage import io
+import matplotlib.pyplot as plt
+from wordcloud import WordCloud
+import itertools
+import scipy
+import numpy as np
+import validators
+from ast import literal_eval
+from sklearn.metrics.pairwise import sigmoid_kernel
+from sklearn.metrics.pairwise import linear_kernel
+from sklearn.feature_extraction.text import TfidfVectorizer
+def show_Cover(url: str) -> None:
+    a = io.imread(url)
+    plt.imshow(a)
+    plt.axis('off')
+    plt.show()
+@st.experimental_memo
+def preprocess_lists(df: pd.DataFrame, column_list: list) -> pd.DataFrame:
+    for column in column_list:
+        string = column + '_treated'
+        df_hold = df.loc[:,column]
+        df_hold = df_hold.apply(lambda x: literal_eval(x) if len(x) > 2 else [])
+        df[string] = df_hold
+        df.drop(column, axis = 1, inplace = True)
+    return df
+def gen_wordcloud(df: pd.DataFrame, column_name: str) -> None:
+    list_wc = df[column_name].tolist()
+    list_wc = list(itertools.chain(*list_wc))
+    strings = ' '.join(list_wc)
+    plt.figure(figsize=(10,10))
+    wordcloud = WordCloud(max_words=100,background_color="white",width=800, height=400, min_font_size = 10).generate(strings)
+    fig, ax = plt.subplots(figsize = (10, 10))
+    ax.imshow(wordcloud)
+    plt.axis("off")
+    plt.tight_layout(pad=0)
+    st.pyplot(fig)
+@st.cache
+def vect_Tfid(series: pd.Series) -> scipy.sparse.csr_matrix:
+    tfv = TfidfVectorizer(min_df=3,  max_features=None,
+                      analyzer='word',
+                      ngram_range=(1, 3),
+                      stop_words = 'english')
+    return tfv.fit_transform(series)
+def sim_score(df: pd.DataFrame, kernel: str = 'sigmoid') -> np.ndarray:
+    tfv_matrix = vect_Tfid(df['synopsis'])
+    if kernel == 'sigmoid':
+        return sigmoid_kernel(tfv_matrix, tfv_matrix)
+    elif kernel == 'linear':
+        return linear_kernel(tfv_matrix, tfv_matrix)
+@st.cache
+def get_rec(entry: str, df: pd.DataFrame, sug_num: int, rec_type: str) -> pd.DataFrame:
+    idx = pd.Series(df.index, index=df['title']).drop_duplicates()[entry]
+    df_sim = list(enumerate(sim_score(df, rec_type)[idx]))
+    sim_scores = sorted(df_sim, key = lambda x: x[1], reverse = True)
+    sim_recs = sim_scores[1:sug_num]
+    anime_indices = [y[0] for y in sim_recs]
+    return df['title'].iloc[anime_indices]
+def data_frame_demo() -> None:
+    @st.experimental_memo
+    def get_Anime_data() -> None:
+        df = pd.read_csv('./myanimelist/anime.csv')
+        return df
+    @st.experimental_memo
+    def preprocess(dataframe: pd.DataFrame) -> pd.DataFrame:
+        columns = ['title', 'type', 'score', 'scored_by', 'status', 'episodes', 'members',
+           'favorites', 'rating', 'sfw', 'genres', 'themes', 'demographics',
+           'studios', 'producers', 'licensors','synopsis']
+        return dataframe[columns]
+    df = get_Anime_data()
+    df_pred = preprocess(df)
+    df_pred.fillna(value = 'Not Found in MAL', inplace=True)
+    list_columns = ['genres','themes','demographics','studios'
+               ,'producers','licensors']
+    df_pred = preprocess_lists(df_pred, list_columns)
+    anime_list = st.multiselect(
+         "Choose some anime", list(df.title)
+    )
+    #st.dataframe(df.head()) Used for testing
+    #st.dataframe(df_pred.head()) Used for testing
+    if not anime_list:
+        st.error("Please select an anime.")
+    else:
+        df_subset = df[df["title"].isin(anime_list)]
+        r_type = st.selectbox('Which kernel to be used for the recommendation?',
+        ('sigmoid', 'linear'))
+        rec_num = st.slider('How many recommendations?', 10, 50, 20)
+        for anime, picture, url, trailer in zip(anime_list, df_subset.main_picture, df_subset.url, df_subset.trailer_url):
+            col1, col2, col3 = st.columns([2,4,4])
+            with col1:
+                st.write(f'Anime selected: {anime}')
+                #st.dataframe(df_subset) used for testing
+                st.image(picture, caption = picture)
+                st.write(f'[MAL page]({url})')
+                if validators.url(trailer):
+                    st.video(trailer)
+            with col2:
+                rec_list = get_rec(anime, df_pred, rec_num, r_type)
+                rec_df = df_pred[df_pred["title"].isin(rec_list)]
+                st.dataframe(rec_df[['title','licensors_treated','sfw']],
+                height=550, width= 810)
+            with col3:
+                gen_wordcloud(rec_df,'genres_treated')
+                gen_wordcloud(rec_df,'themes_treated')
+st.set_option('deprecation.showPyplotGlobalUse', False)
+st.set_page_config(page_title="Recommendation", page_icon="🏮", layout="wide")
+st.markdown("# Anime Suggestion")
+st.sidebar.header("Anime Suggestion")
+st.write(
+    """In this section choose an anime or a theme that you really like. The model will take care of the rest. Enjoy!"""
+)
+data_frame_demo()

pages/2📜_Dataset_information.py ADDED Viewed

	@@ -0,0 +1,20 @@

+import streamlit as st
+import time
+import numpy as np
+st.set_page_config(page_title="Dataset information", page_icon="📜")
+st.markdown(
+        """
+        # Dataset information
+        This webapp is uses the MyAnimeList Anime and Manga Datasets from Andreu Vall Hernàndez
+        available on Kaggle as such dataset has both info scraped with the official API and Jikan API.
+        Which makes it the best option available, since it's weekly updated and covers both anime and manga.
+        Link to the dataset: <https://www.kaggle.com/datasets/andreuvallhernndez/myanimelist>
+        """
+    )
+st.image('https://i.imgur.com/vEy5Zaq.png', width=300, caption = 'MyAnimeList Logo')
+st.image('https://www.kaggle.com/static/images/logos/kaggle-logo-gray-300.png', width=300, caption = 'Kaggle Logo')

requirements.txt ADDED Viewed

Binary file (118 Bytes). View file