Spaces:
Sleeping
Sleeping
File size: 4,866 Bytes
6736fcd |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 |
import pandas as pd
from pathlib import Path
class DataManager:
"""
A class to manage data processing tasks.
Attributes:
db_file (str): The filename of the main CSV database.
"""
def __init__(self):
"""
Initialize DataManager object with the default database files.
"""
self.db_file = Path("data").joinpath("database.csv")
self.db_level = Path("data").joinpath("db_level")
def get_id(self, level):
"""
Find the question corresponding to the given level value.
Args:
level (str): The level value to search for.
Returns:
str: The corresponding question text if found, otherwise None.
"""
df_level = pd.read_csv(self.db_level)
question_text = df_level.loc[df_level['level'] == level, 'question'].values
return question_text[0] if len(question_text) > 0 else None
@staticmethod
def cleaner(texts):
"""
Clean text data by stripping unnecessary characters.
Args:
texts (str): Text data separated by '|'.
Returns:
list of str: Cleaned text data.
"""
return [txt.strip('"\n') for txt in texts.split("|")]
@staticmethod
def make_id(level_id, num, next_id):
"""
Generate IDs based on a root ID, level ID, and a number.
Args:
level_id (str): The level ID.
num (int): The number of IDs to generate.
next_id (list of str): The next IDs.
Returns:
list of str: Generated IDs.
"""
if level_id != "root":
return [f"L{level_id}/{next_id[i]}" for i in range(num)]
else:
return [f"L{next_id[i]}" for i in range(num)]
def make_row(
self, level, question_text, options, feedback, next_id
):
"""
Generate rows based on input data.
Args:
level (str): ID of the question.
question_text (str): Text of the question.
options (str): Text of the options.
feedback (str): Text of the actions.
next_id (str): IDs of the next questions.
Returns:
list of list: Generated rows.
"""
next_list = self.cleaner(next_id)
action_list = self.cleaner(feedback)
option_list = self.cleaner(options)
list_id = self.make_id(level, len(option_list), next_list,)
return [
[list_id[i], question_text, option_list[i], action_list[i], next_list[i]]
for i in range(len(list_id))
]
def _create_dataframe(
self, level, question_text, options, feedback, next_id
):
"""
Create a DataFrame from input rows.
Args:
level (str): ID of the question.
question_text (str): Text of the question.
options (str): Texts of the options.
feedback (str): Text of the actions.
next_id (str): IDs of the next questions.
Returns:
pd.DataFrame: Constructed DataFrame.
"""
rows = self.make_row(
level, question_text,
options, feedback, next_id
)
return pd.DataFrame(
columns=["level", "question_text", "options", "feedback", "next"],
data=rows,
)
@staticmethod
def _clean_dataframe(df_unclean):
"""
Clean the DataFrame by removing duplicates and NaN values.
Args:
df_unclean (pd.DataFrame): The unclean DataFrame.
Returns:
pd.DataFrame: The cleaned DataFrame.
"""
return df_unclean.drop_duplicates().dropna()
def save_to_database(
self, level, question_text, options, feedback, next_id
):
"""
Save the DataFrame to a CSV file after cleaning and combining with existing data.
Args:
level (str): ID of the question.
question_text (str): Text of the question.
options (str): Text of the options.
feedback (str): Text of the actions.
next_id (str): IDs of the next questions.
"""
df_input = self._create_dataframe(
level,
question_text,
options,
feedback,
next_id
)
df_database = self.read_db()
df_combined = pd.concat([df_database, df_input], ignore_index=True)
df_cleaned = self._clean_dataframe(df_combined)
df_cleaned.to_csv(self.db_file, index=False)
def read_db(self):
"""
Read data from a CSV file and return it as a DataFrame.
Returns:
pd.DataFrame: The DataFrame read from the CSV file.
"""
return pd.read_csv(self.db_file) |