from config import settings import pymongo import json import requests import json from bs4 import BeautifulSoup import pandas as pd from urllib.parse import urlparse def Talabat_mongo_data_add(url): client = pymongo.MongoClient(settings.MONGO_URL) def restro_details(url,location): db = client.Restaurants_in_dubai collection = db.Restaurant_details headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } response = requests.get(url, headers=headers) soup = BeautifulSoup(response.text, 'html.parser') parsed_url = urlparse(url,location) platform_name = parsed_url.netloc.split('.')[1].capitalize() restaurant_tag = soup.find('h1', {'data-testid': 'restaurant-title'}) restaurant_name = restaurant_tag.contents[0].strip() restaurant_details = { 'url' : url, 'platform_name' : platform_name, "restaurant_name" : restaurant_name, "location" : location } result = collection.insert_one(restaurant_details) print("Inserted document IDs:", result.inserted_id) def main(url): def extract_choices(item_id,restaurant_id): choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices" response = requests.get(choice_url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') choice_data = json.loads(soup.string.strip()) return choice_data else: print("Failed to retrieve choices for item ID:", item_id) return None url = url parsed_url = urlparse(url) path_segments = parsed_url.path.split('/') restaurant_id = path_segments[-2] restaurant_name = path_segments[-1] headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' } response = requests.get(url, headers=headers) if response.status_code == 200: soup = BeautifulSoup(response.text, 'html.parser') script_tag = soup.find('script', id='__NEXT_DATA__') j = 0 category_name_list = [] if script_tag: json_content = json.loads(script_tag.string.strip()) menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items'] location = json_content.get('props', {}).get('pageProps', {}).get('gtmEventData', {}).get('area', {}).get('name') items = [] for i,item in enumerate(menu_data): item_id = item['id'] name = item['name'] description = item['description'] price = item['price'] original_image = item['originalImage'] original_section = item['originalSection'] Category_id = item['sectionId'] has_choices = item['hasChoices'] if original_section not in category_name_list: category_name_list.append(original_section) j = j+1 Category_position = j else: Category_position = j item_info = { 'category': original_section, 'category_postion': Category_position, 'category_id': Category_id, 'item_name': name, 'item_position': i, 'item_image': original_image, 'description': description, 'price': price, 'item_id': item_id, 'has_choices' : has_choices, } if has_choices: option_groups_info = [] choice_data = extract_choices(item_id,restaurant_id) if choice_data: choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list choice_sections = choice_for_item['choiceSections'] for option_group in choice_sections: option_group_info = { 'option_group_name': option_group['nm'], 'min_quantity': option_group['mnq'], 'max_quantity': option_group['mxq'], 'option_group_names': [] } if 'ich' in option_group: option_group_names = option_group['ich'] for option_group_name in option_group_names: option_group_name_info = { 'option_name': option_group_name['nm'], 'option_price': option_group_name['pr'] } option_group_info['option_group_names'].append(option_group_name_info) option_groups_info.append(option_group_info) item_info['option_groups'] = option_groups_info items.append(item_info) # with open(f"{restaurant_name}.json", "w") as json_file: # json.dump(items, json_file, indent=4) # print(f"josn named {restaurant_name}.json created succesfully") # excel_extract(url) # print("excel Created succesfully") else: print("Script tag with id '__NEXT_DATA__' not found.") else: print("Failed to retrieve the webpage. Status code:", response.status_code) return items,json_content,location def extract_item(items,url): db = client.Restaurants_in_dubai collection = db.Items json_data = items category_collection = db['Category'] restro_collection = db['Restaurant_details'] items_info = [] for item in json_data: id = item['item_id'] name = item['item_name'] description = item['description'] price = item['price'] img_url= item['item_image'] category_name = item['category'] item_position = item['item_position'] has_choices = item['has_choices'] if has_choices == True: modifires = [] for option_group in item.get('option_groups', []): modifires.append(option_group['option_group_name']) else: modifires = "None" restro = restro_collection.find_one({'url': url}) if restro: restro_id = restro['_id'] restro_ref_id = restro_id category = category_collection.find_one({ 'category_name': category_name, 'restro_ref_id': restro_ref_id }) if category: category_id = category['_id'] else: category_id = None ref_id = category_id item_info = { 'item_id': id, 'name': name, 'description': description, 'amount': price, 'image': img_url, 'category_name':category_name, 'item_position':item_position, 'modifires':modifires, 'ref_id_category' : ref_id, 'restro_ref_id' : restro_ref_id } items_info.append(item_info) result = collection.insert_many(items_info) print("Inserted document IDs:", result.inserted_ids) def extract_category(items,json_content,url): db = client.Restaurants_in_dubai collection = db.Category json_data = items restro_collection = db['Restaurant_details'] def item_extract_category(json_content,name): menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['categories'] items_list = [] for category in menu_data: if category["name"] == name: for item in category["items"]: item_info = { "id": item["id"], "name": item["name"] } items_list.append(item_info) return items_list categories_info = [] existing_categories = set() for item in json_data: name = item['category'] if name not in existing_categories: category_positin = 1 category_isActive = True items = item_extract_category(json_content, name) restro = restro_collection.find_one({'url': url}) if restro: restro_id = restro['_id'] restro_ref_id = restro_id category_info = { 'category_name': name, 'category_position': category_positin, 'category_isActive': category_isActive, 'items': items, 'restro_ref_id' : restro_ref_id } categories_info.append(category_info) existing_categories.add(name) result = collection.insert_many(categories_info) print("Inserted document IDs:", result.inserted_ids) def extract_option_group(items,url): db = client.Restaurants_in_dubai collection = db.OptionGroup option_group_info = [] existing_categories = [] option_group_names_list = [] restro_collection = db['Restaurant_details'] for item in items: for option_group in item.get('option_groups', []): flag = 1 restro = restro_collection.find_one({'url': url}) if restro: restro_id = restro['_id'] restro_ref_id = restro_id option_group_name = option_group["option_group_name"] min_quantity = option_group["min_quantity"] max_quantity = option_group["max_quantity"] option_names = [] option_names_trial_all = [] for option in option_group.get("option_group_names", []): option_name = option["option_name"] option_price = option["option_price"] option_names.append(option_name) option_names_trial = { 'option_name': option_name, 'option_price': option_price } option_names_trial_all.append(option_names_trial) item_id = [] for item in items: for option_group in item.get('option_groups', []): option_group_name2 = option_group["option_group_name"] option_group_names1 = option_group.get('option_group_names',[]) if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['option_name'])) == sorted(option_names_trial_all, key=lambda x: x['option_name']) ): item_id.append(item['item_id']) restro = restro_collection.find_one({'url': url}) if restro: restro_id = restro['_id'] restro_ref_id = restro_id option_group_information = { "option_group_name" : option_group_name, "min_quantity" : min_quantity, "max_quantity" : max_quantity, "option_names" : option_names, "item_id" : item_id, "restro_ref_id" : restro_ref_id } option_group_check ={ "option_group_name" : option_group_name, "option_names" : option_names } for category in existing_categories: if (category['option_group_name'] == option_group_check['option_group_name'] and sorted(category['option_names']) == sorted(option_group_check['option_names'])): flag = 0 if flag==1: option_group_info.append(option_group_information) existing_categories.append(option_group_check) if option_group_info: result = collection.insert_many(option_group_info) print("Inserted document IDs:", result.inserted_ids) def extract_option_group_names(items,url): db = client.Restaurants_in_dubai collection = db.OptionName option_group_collection = db['OptionGroup'] restro_collection = db['Restaurant_details'] json_data = items option_names = [] option_names_list = [] for item in json_data: has_choices = item['has_choices'] if has_choices == True: flag = 1 for option_group in item.get('option_groups', []): option_group_name2 = option_group["option_group_name"] for option in option_group.get("option_group_names", []): restro = restro_collection.find_one({'url': url}) if restro: restro_id = restro['_id'] restro_ref_id = restro_id option_name = option["option_name"] option_price = option["option_price"] query = { 'restro_ref_id': restro_id, 'option_names': option_name } matching_documents = option_group_collection.find(query) matching_ids = [doc['_id'] for doc in matching_documents] # option_group_name_true = option_group_collection.find_one({'option_group_name': option_group_name2, # 'restro_ref_id' : restro_id}) # if option_group_name_true: # option_group_id = option_group_name_true['_id'] option_group_name = { "option_name" : option_name, "option_price" : option_price, "ref_option_group_id" : matching_ids, "restro_ref_id" : restro_ref_id } if (option_name in option_names_list ): flag = 0 if flag==1: option_names.append(option_group_name) option_names_list.append(option_name) if option_names: result = collection.insert_many(option_names) print("Inserted document IDs:", result.inserted_ids) items,json_content,location = main(url) restro_details(url,location) extract_category(items,json_content,url) extract_item(items,url) extract_option_group(items,url) extract_option_group_names(items,url) return True