File size: 1,292 Bytes
5cfff57
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import json
from glob import glob
import os

def load_json(path):
    with open(path, 'r') as f:
        return json.load(f)
    
def save_json(path, file_save):
    with open(path, 'w') as f: return json.dump(file_save, f, ensure_ascii=False)

keys_unuse = ["SpecialDesc", "AdsProviders", "CategoryGroupKey", "ReviewsTest", "PromotionPlainTitle"]
dict_locations = {}
categories = os.listdir("/home/pphuc/Coding/Project/trip-personal-advise/data/crawl/vung_tau/")
for category in categories:
    data_paths = glob(f"/home/pphuc/Coding/Project/trip-personal-advise/data/crawl/vung_tau/{category}/*")
    dict_locations[category] = {}
    for file_ in data_paths:
        data = load_json(file_)
        base_name = " ".join(os.path.basename(file_).split(".")[0].replace("-", " ").split()[:-1])
        dict_locations[category][base_name] = []
        for location in data: 
            temps = location["searchItems"]
            for temp in temps:
                if not temp["IsOpening"]:
                    continue
                for key_ in keys_unuse:
                    del temp[key_]
                dict_locations[category][base_name].extend([temp])

save_json("/home/pphuc/Coding/Project/trip-personal-advise/data/processed/locations.json", dict_locations)
# print(dict_locations)