GetGo-AI / src /data_processing /combined_data.py
pphuc25's picture
chore: add file
5cfff57
raw
history blame contribute delete
No virus
1.29 kB
import json
from glob import glob
import os
def load_json(path):
with open(path, 'r') as f:
return json.load(f)
def save_json(path, file_save):
with open(path, 'w') as f: return json.dump(file_save, f, ensure_ascii=False)
keys_unuse = ["SpecialDesc", "AdsProviders", "CategoryGroupKey", "ReviewsTest", "PromotionPlainTitle"]
dict_locations = {}
categories = os.listdir("/home/pphuc/Coding/Project/trip-personal-advise/data/crawl/vung_tau/")
for category in categories:
data_paths = glob(f"/home/pphuc/Coding/Project/trip-personal-advise/data/crawl/vung_tau/{category}/*")
dict_locations[category] = {}
for file_ in data_paths:
data = load_json(file_)
base_name = " ".join(os.path.basename(file_).split(".")[0].replace("-", " ").split()[:-1])
dict_locations[category][base_name] = []
for location in data:
temps = location["searchItems"]
for temp in temps:
if not temp["IsOpening"]:
continue
for key_ in keys_unuse:
del temp[key_]
dict_locations[category][base_name].extend([temp])
save_json("/home/pphuc/Coding/Project/trip-personal-advise/data/processed/locations.json", dict_locations)
# print(dict_locations)