m3hrdadfi commited on
Commit
606a511
1 Parent(s): 0f70e83

Fix data preparation

Browse files
src/preparaing_recipe_nlg_dataset.py CHANGED
@@ -81,6 +81,7 @@ def main():
81
  cache_dir=data_args.cache_dir
82
  )
83
 
 
84
  def cleaning(text, item_type="ner"):
85
  # NOTE: DO THE CLEANING LATER
86
  text = normalizer(text, do_lowercase=True)
@@ -92,9 +93,9 @@ def main():
92
  ingredients = item_dict["ingredients"]
93
  steps = item_dict["directions"]
94
 
95
- condition_1 = filter_by_item(ner, 4)
96
- condition_2 = filter_by_length(title, 10)
97
- condition_3 = filter_by_item(ingredients, 4)
98
  condition_4 = filter_by_item(steps, 2)
99
  condition_5 = filter_by_steps(" ".join(steps))
100
 
@@ -140,6 +141,10 @@ def main():
140
  data_dict.append(item)
141
 
142
  data_df = pd.DataFrame(data_dict)
 
 
 
 
143
  train, test = train_test_split(data_df, test_size=0.05, random_state=101)
144
 
145
  train = train.reset_index(drop=True)
 
81
  cache_dir=data_args.cache_dir
82
  )
83
 
84
+
85
  def cleaning(text, item_type="ner"):
86
  # NOTE: DO THE CLEANING LATER
87
  text = normalizer(text, do_lowercase=True)
 
93
  ingredients = item_dict["ingredients"]
94
  steps = item_dict["directions"]
95
 
96
+ condition_1 = filter_by_item(ner, 3)
97
+ condition_2 = filter_by_length(title, 3)
98
+ condition_3 = filter_by_item(ingredients, 3)
99
  condition_4 = filter_by_item(steps, 2)
100
  condition_5 = filter_by_steps(" ".join(steps))
101
 
 
141
  data_dict.append(item)
142
 
143
  data_df = pd.DataFrame(data_dict)
144
+
145
+ logger.info(f"Preparation - [before] consists of {len(dataset[subset])} records!")
146
+ logger.info(f"Preparation - [after] consists of {len(data_df)} records!")
147
+
148
  train, test = train_test_split(data_df, test_size=0.05, random_state=101)
149
 
150
  train = train.reset_index(drop=True)
src/run.sh CHANGED
@@ -52,4 +52,5 @@ python run_ed_recipe_nlg.py \
52
  --do_train \
53
  --do_eval \
54
  --overwrite_output_dir \
55
- --predict_with_generate
 
 
52
  --do_train \
53
  --do_eval \
54
  --overwrite_output_dir \
55
+ --predict_with_generate \
56
+ --push_to_hub