alistairmcleay commited on
Commit
19cbc2a
1 Parent(s): 76277a1

Fixing data paths

Browse files
src/crazyneuraluser/UBAR_code/config.py CHANGED
@@ -10,29 +10,25 @@ class _Config:
10
  def _multiwoz_ubar_init(self):
11
  self.gpt_path = "distilgpt2"
12
 
13
- self.vocab_path_train = "cambridge-masters-project/data/preprocessed/UBAR/multi-woz-processed/vocab"
14
  self.vocab_path_eval = None
15
- self.data_path = "cambridge-masters-project/data/preprocessed/UBAR/multi-woz-processed/"
16
  self.data_file = "data_for_ubar.json"
17
- self.dev_list = "cambridge-masters-project/data/raw/UBAR/multi-woz/valListFile.json"
18
- self.test_list = "cambridge-masters-project/data/raw/UBAR/multi-woz/testListFile.json"
19
  self.dbs = {
20
- "attraction": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/attraction_db_processed.json",
21
- "hospital": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/hospital_db_processed.json",
22
- "hotel": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/hotel_db_processed.json",
23
- "police": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/police_db_processed.json",
24
- "restaurant": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/restaurant_db_processed.json",
25
- "taxi": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/taxi_db_processed.json",
26
- "train": "cambridge-masters-project/data/preprocessed/UBAR/db_processed/train_db_processed.json",
27
  }
28
- self.glove_path = "cambridge-masters-project/data/glove/glove.6B.50d.txt"
29
- self.domain_file_path = "cambridge-masters-project/data/preprocessed/UBAR/multi-woz-processed/domain_files.json"
30
- self.slot_value_set_path = (
31
- "cambridge-masters-project/data/preprocessed/UBAR/db_processed/value_set_processed.json"
32
- )
33
- self.multi_acts_path = (
34
- "cambridge-masters-project/data/preprocessed/UBAR/multi-woz-processed/multi_act_mapping_train.json"
35
- )
36
  self.exp_path = "to be generated"
37
  self.log_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
38
 
10
  def _multiwoz_ubar_init(self):
11
  self.gpt_path = "distilgpt2"
12
 
13
+ self.vocab_path_train = "data/preprocessed/UBAR/multi-woz-processed/vocab"
14
  self.vocab_path_eval = None
15
+ self.data_path = "data/preprocessed/UBAR/multi-woz-processed/"
16
  self.data_file = "data_for_ubar.json"
17
+ self.dev_list = "data/raw/UBAR/multi-woz/valListFile.json"
18
+ self.test_list = "data/raw/UBAR/multi-woz/testListFile.json"
19
  self.dbs = {
20
+ "attraction": "data/preprocessed/UBAR/db_processed/attraction_db_processed.json",
21
+ "hospital": "data/preprocessed/UBAR/db_processed/hospital_db_processed.json",
22
+ "hotel": "data/preprocessed/UBAR/db_processed/hotel_db_processed.json",
23
+ "police": "data/preprocessed/UBAR/db_processed/police_db_processed.json",
24
+ "restaurant": "data/preprocessed/UBAR/db_processed/restaurant_db_processed.json",
25
+ "taxi": "data/preprocessed/UBAR/db_processed/taxi_db_processed.json",
26
+ "train": "data/preprocessed/UBAR/db_processed/train_db_processed.json",
27
  }
28
+ self.glove_path = "data/glove/glove.6B.50d.txt"
29
+ self.domain_file_path = "data/preprocessed/UBAR/multi-woz-processed/domain_files.json"
30
+ self.slot_value_set_path = "data/preprocessed/UBAR/db_processed/value_set_processed.json"
31
+ self.multi_acts_path = "data/preprocessed/UBAR/multi-woz-processed/multi_act_mapping_train.json"
 
 
 
 
32
  self.exp_path = "to be generated"
33
  self.log_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
34
 
src/crazyneuraluser/UBAR_code/reader.py CHANGED
@@ -301,7 +301,7 @@ class MultiWozReader(_ReaderBase):
301
  self.exp_files[fn.replace(".json", "")] = 1
302
  #
303
 
304
- self._load_data()
305
 
306
  if cfg.limit_bspn_vocab:
307
  self.bspn_masks = self._construct_bspn_constraint()
@@ -497,7 +497,7 @@ class MultiWozReader(_ReaderBase):
497
 
498
  if os.path.exists(encoded_file):
499
  logging.info("Reading encoded data from {}".format(encoded_file))
500
- self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
501
  encoded_data = json.loads(open(encoded_file, "r", encoding="utf-8").read())
502
  self.train = encoded_data["train"]
503
  self.dev = encoded_data["dev"]
@@ -505,7 +505,7 @@ class MultiWozReader(_ReaderBase):
505
  else:
506
  logging.info("Encoding data now and save the encoded data in {}".format(encoded_file))
507
  # not exists, encode data and save
508
- self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
509
  self.train, self.dev, self.test = [], [], []
510
  for fn, dial in self.data.items():
511
  if ".json" in fn:
301
  self.exp_files[fn.replace(".json", "")] = 1
302
  #
303
 
304
+ # self._load_data()
305
 
306
  if cfg.limit_bspn_vocab:
307
  self.bspn_masks = self._construct_bspn_constraint()
497
 
498
  if os.path.exists(encoded_file):
499
  logging.info("Reading encoded data from {}".format(encoded_file))
500
+ # self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
501
  encoded_data = json.loads(open(encoded_file, "r", encoding="utf-8").read())
502
  self.train = encoded_data["train"]
503
  self.dev = encoded_data["dev"]
505
  else:
506
  logging.info("Encoding data now and save the encoded data in {}".format(encoded_file))
507
  # not exists, encode data and save
508
+ # self.data = json.loads(open(cfg.data_path + cfg.data_file, "r", encoding="utf-8").read().lower())
509
  self.train, self.dev, self.test = [], [], []
510
  for fn, dial in self.data.items():
511
  if ".json" in fn: