import pandas as pd from pathlib import Path class DataManager: """ A class to manage data processing tasks. Attributes: db_file (str): The filename of the main CSV database. """ def __init__(self): """ Initialize DataManager object with the default database files. """ self.db_file = Path("data").joinpath("database.csv") self.db_level = Path("data").joinpath("db_level") def get_id(self, level): """ Find the question corresponding to the given level value. Args: level (str): The level value to search for. Returns: str: The corresponding question text if found, otherwise None. """ df_level = pd.read_csv(self.db_level) question_text = df_level.loc[df_level['level'] == level, 'question'].values return question_text[0] if len(question_text) > 0 else None @staticmethod def cleaner(texts): """ Clean text data by stripping unnecessary characters. Args: texts (str): Text data separated by '|'. Returns: list of str: Cleaned text data. """ return [txt.strip('"\n') for txt in texts.split("|")] @staticmethod def make_id(level_id, num, next_id): """ Generate IDs based on a root ID, level ID, and a number. Args: level_id (str): The level ID. num (int): The number of IDs to generate. next_id (list of str): The next IDs. Returns: list of str: Generated IDs. """ if level_id != "root": return [f"L{level_id}/{next_id[i]}" for i in range(num)] else: return [f"L{next_id[i]}" for i in range(num)] def make_row( self, level, question_text, options, feedback, next_id ): """ Generate rows based on input data. Args: level (str): ID of the question. question_text (str): Text of the question. options (str): Text of the options. feedback (str): Text of the actions. next_id (str): IDs of the next questions. Returns: list of list: Generated rows. """ next_list = self.cleaner(next_id) action_list = self.cleaner(feedback) option_list = self.cleaner(options) list_id = self.make_id(level, len(option_list), next_list,) return [ [list_id[i], question_text, option_list[i], action_list[i], next_list[i]] for i in range(len(list_id)) ] def _create_dataframe( self, level, question_text, options, feedback, next_id ): """ Create a DataFrame from input rows. Args: level (str): ID of the question. question_text (str): Text of the question. options (str): Texts of the options. feedback (str): Text of the actions. next_id (str): IDs of the next questions. Returns: pd.DataFrame: Constructed DataFrame. """ rows = self.make_row( level, question_text, options, feedback, next_id ) return pd.DataFrame( columns=["level", "question_text", "options", "feedback", "next"], data=rows, ) @staticmethod def _clean_dataframe(df_unclean): """ Clean the DataFrame by removing duplicates and NaN values. Args: df_unclean (pd.DataFrame): The unclean DataFrame. Returns: pd.DataFrame: The cleaned DataFrame. """ return df_unclean.drop_duplicates().dropna() def save_to_database( self, level, question_text, options, feedback, next_id ): """ Save the DataFrame to a CSV file after cleaning and combining with existing data. Args: level (str): ID of the question. question_text (str): Text of the question. options (str): Text of the options. feedback (str): Text of the actions. next_id (str): IDs of the next questions. """ df_input = self._create_dataframe( level, question_text, options, feedback, next_id ) df_database = self.read_db() df_combined = pd.concat([df_database, df_input], ignore_index=True) df_cleaned = self._clean_dataframe(df_combined) df_cleaned.to_csv(self.db_file, index=False) def read_db(self): """ Read data from a CSV file and return it as a DataFrame. Returns: pd.DataFrame: The DataFrame read from the CSV file. """ return pd.read_csv(self.db_file)