{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv('../data/ratings.csv')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#df['rating'] = df['rating']-3\n", "#df" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[4. 5. 3. 2. 1. 4.5 3.5 2.5 0.5 1.5]\n", "['userId' 'movieId' 'rating' 'timestamp']\n", "[ 1 3 6 ... 160836 163937 163981]\n" ] } ], "source": [ "rating = df['rating'].unique()\n", "print(rating)\n", "print(df.columns.values)\n", "\n", "#df['userId'] = 'u'+df['userId']\n", "#df['movieId'] = 'm'+df['movieId'].unique()\n", "\n", "movie = df['movieId'].unique()\n", "print(movie)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# garantir que os ids dof filmes e usuários sejam diferentes\n", "df['movieId'] = df['movieId'] + 610" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import networkx as nx\n", "from networkx.algorithms import bipartite\n", "\n", "#df = df.query(expr='rating>3.5')\n", "user_id_list = df['userId'].unique()\n", "movie_id_list = df['movieId'].unique()\n", "df = df.filter(['userId','movieId','rating'])\n", "df_b = df.filter(['userId','movieId'])\n", "df.filter(['userId','movieId'])\n", "\n", "B = nx.Graph()\n", "# Add nodes with the node attribute \"bipartite\"\n", "B.add_nodes_from(np.array(user_id_list), bipartite=0)\n", "B.add_nodes_from(np.array(movie_id_list), bipartite=1)\n", "# Add edges only between nodes of opposite node sets\n", "B.add_edges_from(np.array(df_b))\n", "nx.write_gexf(B, 'graph_b.gexf')\n", "\n", "df_r45 = df.query(expr='rating>=4').filter(['userId','movieId']) #gostou\n", "B_r45 = nx.Graph()\n", "# Add nodes with the node attribute \"bipartite\"\n", "B_r45.add_nodes_from(np.array(user_id_list), bipartite=0)\n", "B_r45.add_nodes_from(np.array(movie_id_list), bipartite=1)\n", "# Add edges only between nodes of opposite node sets\n", "B_r45.add_edges_from(np.array(df_r45))\n", "nx.write_gexf(B_r45, 'graph_b_r45.gexf')\n", "\n", "df_indiferente = df.query(expr='rating<4 & rating>=2').filter(['userId','movieId']) #indiferente\n", "B_indiferente = nx.Graph()\n", "# Add nodes with the node attribute \"bipartite\"\n", "B_indiferente.add_nodes_from(np.array(user_id_list), bipartite=0)\n", "B_indiferente.add_nodes_from(np.array(movie_id_list), bipartite=1)\n", "# Add edges only between nodes of opposite node sets\n", "B_indiferente.add_edges_from(np.array(df_indiferente))\n", "nx.write_gexf(B_indiferente, 'graph_b_indiferente.gexf')\n", "\n", "df_r01 = df.query(expr='rating<2').filter(['userId','movieId']) #não gostou\n", "B_r01 = nx.Graph()\n", "# Add nodes with the node attribute \"bipartite\"\n", "B_r01.add_nodes_from(np.array(user_id_list), bipartite=0)\n", "B_r01.add_nodes_from(np.array(movie_id_list), bipartite=1)\n", "# Add edges only between nodes of opposite node sets\n", "B_r01.add_edges_from(np.array(df_r01))\n", "nx.write_gexf(B_r01, 'graph_b_r01.gexf')\n", "\n", "# rating centralizado\n", "df['rating'] = df['rating'] -2.75\n", "# B= bipartido W=weight/rating\n", "BW = nx.Graph()\n", "# Add nodes with the node attribute \"bipartite\"\n", "BW.add_nodes_from(np.array(user_id_list), bipartite=0)\n", "BW.add_nodes_from(np.array(movie_id_list), bipartite=1)\n", "# Add edges only between nodes of opposite node sets\n", "BW.add_weighted_edges_from(np.array(df))\n", "nx.write_gexf(BW, 'graph_bw.gexf')\n", "\n", "am = bipartite.biadjacency_matrix(B, row_order=user_id_list)\n", "G = bipartite.from_biadjacency_matrix(am)\n", "nx.write_gexf(G, 'graph_g.gexf')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "am_user = am.dot(am.T)\n", "df = pd.DataFrame(am_user)\n", "G = nx.Graph(am_user)\n", "nx.write_gexf(G, 'graph_am_user.gexf')\n", "\n", "#muito pesado\n", "#am_movie = am.T.dot(am)\n", "#df = pd.DataFrame(am_movie)\n", "#G = nx.Graph(am_movie)\n", "#nx.write_gexf(G, 'graph_am_movie.gexf')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "10334\n", "9724\n" ] } ], "source": [ "print(len(movie_id_list)+len(user_id_list))\n", "print(len(movie_id_list))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.4" } }, "nbformat": 4, "nbformat_minor": 2 }