Spaces:
Sleeping
Sleeping
import pandas as pd | |
from pathlib import Path | |
class DataManager: | |
""" | |
A class to manage data processing tasks. | |
Attributes: | |
db_file (str): The filename of the main CSV database. | |
""" | |
def __init__(self): | |
""" | |
Initialize DataManager object with the default database files. | |
""" | |
self.db_file = Path("data").joinpath("database.csv") | |
self.db_level = Path("data").joinpath("db_level") | |
def get_id(self, level): | |
""" | |
Find the question corresponding to the given level value. | |
Args: | |
level (str): The level value to search for. | |
Returns: | |
str: The corresponding question text if found, otherwise None. | |
""" | |
df_level = pd.read_csv(self.db_level) | |
question_text = df_level.loc[df_level['level'] == level, 'question'].values | |
return question_text[0] if len(question_text) > 0 else None | |
def cleaner(texts): | |
""" | |
Clean text data by stripping unnecessary characters. | |
Args: | |
texts (str): Text data separated by '|'. | |
Returns: | |
list of str: Cleaned text data. | |
""" | |
return [txt.strip('"\n') for txt in texts.split("|")] | |
def make_id(level_id, num, next_id): | |
""" | |
Generate IDs based on a root ID, level ID, and a number. | |
Args: | |
level_id (str): The level ID. | |
num (int): The number of IDs to generate. | |
next_id (list of str): The next IDs. | |
Returns: | |
list of str: Generated IDs. | |
""" | |
if level_id != "root": | |
return [f"L{level_id}/{next_id[i]}" for i in range(num)] | |
else: | |
return [f"L{next_id[i]}" for i in range(num)] | |
def make_row( | |
self, level, question_text, options, feedback, next_id | |
): | |
""" | |
Generate rows based on input data. | |
Args: | |
level (str): ID of the question. | |
question_text (str): Text of the question. | |
options (str): Text of the options. | |
feedback (str): Text of the actions. | |
next_id (str): IDs of the next questions. | |
Returns: | |
list of list: Generated rows. | |
""" | |
next_list = self.cleaner(next_id) | |
action_list = self.cleaner(feedback) | |
option_list = self.cleaner(options) | |
list_id = self.make_id(level, len(option_list), next_list,) | |
return [ | |
[list_id[i], question_text, option_list[i], action_list[i], next_list[i]] | |
for i in range(len(list_id)) | |
] | |
def _create_dataframe( | |
self, level, question_text, options, feedback, next_id | |
): | |
""" | |
Create a DataFrame from input rows. | |
Args: | |
level (str): ID of the question. | |
question_text (str): Text of the question. | |
options (str): Texts of the options. | |
feedback (str): Text of the actions. | |
next_id (str): IDs of the next questions. | |
Returns: | |
pd.DataFrame: Constructed DataFrame. | |
""" | |
rows = self.make_row( | |
level, question_text, | |
options, feedback, next_id | |
) | |
return pd.DataFrame( | |
columns=["level", "question_text", "options", "feedback", "next"], | |
data=rows, | |
) | |
def _clean_dataframe(df_unclean): | |
""" | |
Clean the DataFrame by removing duplicates and NaN values. | |
Args: | |
df_unclean (pd.DataFrame): The unclean DataFrame. | |
Returns: | |
pd.DataFrame: The cleaned DataFrame. | |
""" | |
return df_unclean.drop_duplicates().dropna() | |
def save_to_database( | |
self, level, question_text, options, feedback, next_id | |
): | |
""" | |
Save the DataFrame to a CSV file after cleaning and combining with existing data. | |
Args: | |
level (str): ID of the question. | |
question_text (str): Text of the question. | |
options (str): Text of the options. | |
feedback (str): Text of the actions. | |
next_id (str): IDs of the next questions. | |
""" | |
df_input = self._create_dataframe( | |
level, | |
question_text, | |
options, | |
feedback, | |
next_id | |
) | |
df_database = self.read_db() | |
df_combined = pd.concat([df_database, df_input], ignore_index=True) | |
df_cleaned = self._clean_dataframe(df_combined) | |
df_cleaned.to_csv(self.db_file, index=False) | |
def read_db(self): | |
""" | |
Read data from a CSV file and return it as a DataFrame. | |
Returns: | |
pd.DataFrame: The DataFrame read from the CSV file. | |
""" | |
return pd.read_csv(self.db_file) |