film_mlmodule / src /data_retrieving.py
root
first hf commit
6defa3d
raw history blame
No virus
1.55 kB
import logging
import os
from abc import ABC, abstractmethod
import pandas as pd
sql_folder_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "sql_files")
class RetrieveData(ABC):
"""
Abstract class for all data retrieving strategy:
- Retrieve 3 tables: movies, users, ratings
"""
@abstractmethod
def retrieve_data_sql(self, conn):
"""
Args:
conn: connection to postgresql database
Returns:
None
"""
pass
class Baseline(RetrieveData):
"""
Baseline strategy, or simplest strategy
- Movie: get id + vote_average
- User: get id of user who is not staff
- Ratings: get everything
"""
def retrieve_data_sql(self, conn):
with open(os.path.join(sql_folder_path, "baseline", 'get_movies_id.sql'), 'r') as file:
query_movie = file.read()
df_movie = pd.read_sql_query(query_movie, conn)
logging.info(f"Movie data: \n{df_movie}")
with open(os.path.join(sql_folder_path, "baseline", 'get_users_id.sql'), 'r') as file:
query_user = file.read()
df_user = pd.read_sql_query(query_user, conn)
logging.info(f"User data: \n{df_user}")
with open(os.path.join(sql_folder_path, "baseline", 'get_ratings.sql'), 'r') as file:
query_rating = file.read()
df_rating = pd.read_sql_query(query_rating, conn)
logging.info(f"Rating data: \n{df_rating}")
conn.close()
return df_movie, df_user, df_rating