m7mdal7aj commited on
Commit
020595f
1 Parent(s): 589a546

Update my_model/dataset/dataset_processor.py

Browse files
my_model/dataset/dataset_processor.py CHANGED
@@ -35,6 +35,7 @@ class OKVQADatasetProcessor:
35
  self.df_answers = pd.DataFrame(self.annotations)
36
  self.merged_df = None
37
 
 
38
  def load_data_files(self) -> Tuple[List[dict], List[dict]]:
39
  """
40
  Loads the question and annotation data from JSON files.
@@ -52,6 +53,7 @@ class OKVQADatasetProcessor:
52
 
53
  return questions, annotations
54
 
 
55
  @staticmethod
56
  def find_most_frequent(my_list: List[str]) -> Optional[str]:
57
  """
@@ -69,6 +71,7 @@ class OKVQADatasetProcessor:
69
  most_common = counter.most_common(1)
70
  return most_common[0][0]
71
 
 
72
  def merge_data(self) -> None:
73
  """
74
  Merges the question and answer DataFrames on a common key.
@@ -81,9 +84,10 @@ class OKVQADatasetProcessor:
81
  self.merged_df = pd.merge(self.df_questions, self.df_answers, on=['question_id', 'image_id'])
82
 
83
  def join_words_with_hyphen(self, sentence):
84
-
85
  return '-'.join(sentence.split())
86
 
 
87
  def process_answers(self) -> None:
88
  """
89
  Processes answers from merged DataFrame by extracting and identifying the most frequent answers.
@@ -103,6 +107,7 @@ class OKVQADatasetProcessor:
103
  self.merged_df['single_word_answers'] = self.merged_df['most_frequent_processed_answer'].apply(
104
  self.join_words_with_hyphen)
105
 
 
106
  def get_processed_data(self) -> Optional[pd.DataFrame]:
107
  """
108
  Retrieves the processed DataFrame.
@@ -117,6 +122,7 @@ class OKVQADatasetProcessor:
117
  print("DataFrame is empty or not processed yet.")
118
  return None
119
 
 
120
  def save_to_csv(self, df: pd.DataFrame, saved_file_name: Optional[str]) -> None:
121
  """
122
  Saves the DataFrame to a CSV file.
@@ -134,6 +140,7 @@ class OKVQADatasetProcessor:
134
  else:
135
  df.to_csv("data.csv", index=None)
136
 
 
137
  def display_dataframe(self) -> None:
138
  """
139
  Displays the processed DataFrame.
 
35
  self.df_answers = pd.DataFrame(self.annotations)
36
  self.merged_df = None
37
 
38
+
39
  def load_data_files(self) -> Tuple[List[dict], List[dict]]:
40
  """
41
  Loads the question and annotation data from JSON files.
 
53
 
54
  return questions, annotations
55
 
56
+
57
  @staticmethod
58
  def find_most_frequent(my_list: List[str]) -> Optional[str]:
59
  """
 
71
  most_common = counter.most_common(1)
72
  return most_common[0][0]
73
 
74
+
75
  def merge_data(self) -> None:
76
  """
77
  Merges the question and answer DataFrames on a common key.
 
84
  self.merged_df = pd.merge(self.df_questions, self.df_answers, on=['question_id', 'image_id'])
85
 
86
  def join_words_with_hyphen(self, sentence):
87
+
88
  return '-'.join(sentence.split())
89
 
90
+
91
  def process_answers(self) -> None:
92
  """
93
  Processes answers from merged DataFrame by extracting and identifying the most frequent answers.
 
107
  self.merged_df['single_word_answers'] = self.merged_df['most_frequent_processed_answer'].apply(
108
  self.join_words_with_hyphen)
109
 
110
+
111
  def get_processed_data(self) -> Optional[pd.DataFrame]:
112
  """
113
  Retrieves the processed DataFrame.
 
122
  print("DataFrame is empty or not processed yet.")
123
  return None
124
 
125
+
126
  def save_to_csv(self, df: pd.DataFrame, saved_file_name: Optional[str]) -> None:
127
  """
128
  Saves the DataFrame to a CSV file.
 
140
  else:
141
  df.to_csv("data.csv", index=None)
142
 
143
+
144
  def display_dataframe(self) -> None:
145
  """
146
  Displays the processed DataFrame.