Spaces:

digitalai
/

ai-assistant

Sleeping

File size: 4,866 Bytes

6736fcd

import pandas as pd
from pathlib import Path

class DataManager:
    """
    A class to manage data processing tasks.

    Attributes:
        db_file (str): The filename of the main CSV database.
    """

    def __init__(self):
        """
        Initialize DataManager object with the default database files.
        """
        self.db_file = Path("data").joinpath("database.csv")
        self.db_level = Path("data").joinpath("db_level")


    def get_id(self, level):
        """
        Find the question corresponding to the given level value.

        Args:
            level (str): The level value to search for.

        Returns:
            str: The corresponding question text if found, otherwise None.
        """
        df_level = pd.read_csv(self.db_level)
        question_text = df_level.loc[df_level['level'] == level, 'question'].values

        return question_text[0] if len(question_text) > 0 else None

    @staticmethod
    def cleaner(texts):
        """
        Clean text data by stripping unnecessary characters.

        Args:
            texts (str): Text data separated by '|'.

        Returns:
            list of str: Cleaned text data.
        """
        return [txt.strip('"\n') for txt in texts.split("|")]

    @staticmethod
    def make_id(level_id, num, next_id):
        """
        Generate IDs based on a root ID, level ID, and a number.

        Args:

            level_id (str): The level ID.
            num (int): The number of IDs to generate.
            next_id (list of str): The next IDs.

        Returns:
            list of str: Generated IDs.
        """
        if level_id != "root":
            return [f"L{level_id}/{next_id[i]}" for i in range(num)]
        else:
            return [f"L{next_id[i]}" for i in range(num)]

    def make_row(
            self, level, question_text, options, feedback, next_id
    ):
        """
        Generate rows based on input data.

        Args:
            level (str): ID of the question.
            question_text (str): Text of the question.
            options (str): Text of the options.
            feedback (str): Text of the actions.
            next_id (str): IDs of the next questions.

        Returns:
            list of list: Generated rows.
        """
        next_list = self.cleaner(next_id)
        action_list = self.cleaner(feedback)
        option_list = self.cleaner(options)
        list_id = self.make_id(level, len(option_list), next_list,)
        return [
            [list_id[i], question_text, option_list[i], action_list[i], next_list[i]]
            for i in range(len(list_id))
        ]

    def _create_dataframe(
            self, level, question_text, options, feedback, next_id
    ):
        """
        Create a DataFrame from input rows.

        Args:
            level (str): ID of the question.
            question_text (str): Text of the question.

            options (str): Texts of the options.
            feedback (str): Text of the actions.
            next_id (str): IDs of the next questions.

        Returns:
            pd.DataFrame: Constructed DataFrame.
        """
        rows = self.make_row(
                level, question_text,
                options, feedback, next_id
        )
        return pd.DataFrame(
                columns=["level", "question_text", "options", "feedback", "next"],
                data=rows,
        )

    @staticmethod
    def _clean_dataframe(df_unclean):
        """
        Clean the DataFrame by removing duplicates and NaN values.

        Args:
            df_unclean (pd.DataFrame): The unclean DataFrame.

        Returns:
            pd.DataFrame: The cleaned DataFrame.
        """
        return df_unclean.drop_duplicates().dropna()

    def save_to_database(
            self, level, question_text, options, feedback, next_id
    ):
        """
        Save the DataFrame to a CSV file after cleaning and combining with existing data.

        Args:
            level (str): ID of the question.
            question_text (str): Text of the question.
            options (str): Text of the options.
            feedback (str): Text of the actions.
            next_id (str): IDs of the next questions.
        """
        df_input = self._create_dataframe(
                level,
                question_text,
                options,
                feedback,
                next_id
        )
        df_database = self.read_db()
        df_combined = pd.concat([df_database, df_input], ignore_index=True)
        df_cleaned = self._clean_dataframe(df_combined)
        df_cleaned.to_csv(self.db_file, index=False)

    def read_db(self):
        """
        Read data from a CSV file and return it as a DataFrame.

        Returns:
            pd.DataFrame: The DataFrame read from the CSV file.
        """
        return pd.read_csv(self.db_file)