|
import json |
|
from glob import glob |
|
import os |
|
|
|
def load_json(path): |
|
with open(path, 'r') as f: |
|
return json.load(f) |
|
|
|
def save_json(path, file_save): |
|
with open(path, 'w') as f: return json.dump(file_save, f, ensure_ascii=False) |
|
|
|
keys_unuse = ["SpecialDesc", "AdsProviders", "CategoryGroupKey", "ReviewsTest", "PromotionPlainTitle"] |
|
dict_locations = {} |
|
categories = os.listdir("/home/pphuc/Coding/Project/trip-personal-advise/data/crawl/vung_tau/") |
|
for category in categories: |
|
data_paths = glob(f"/home/pphuc/Coding/Project/trip-personal-advise/data/crawl/vung_tau/{category}/*") |
|
dict_locations[category] = {} |
|
for file_ in data_paths: |
|
data = load_json(file_) |
|
base_name = " ".join(os.path.basename(file_).split(".")[0].replace("-", " ").split()[:-1]) |
|
dict_locations[category][base_name] = [] |
|
for location in data: |
|
temps = location["searchItems"] |
|
for temp in temps: |
|
if not temp["IsOpening"]: |
|
continue |
|
for key_ in keys_unuse: |
|
del temp[key_] |
|
dict_locations[category][base_name].extend([temp]) |
|
|
|
save_json("/home/pphuc/Coding/Project/trip-personal-advise/data/processed/locations.json", dict_locations) |
|
|