Spaces:
Running
Running
def Talabat_Json_extract(url): | |
import json | |
import requests | |
import json | |
from bs4 import BeautifulSoup | |
import pandas as pd | |
from urllib.parse import urlparse | |
from io import BytesIO | |
def extract_choices(item_id): | |
choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices" | |
response = requests.get(choice_url, headers=headers) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.text, 'html.parser') | |
choice_data = json.loads(soup.string.strip()) | |
return choice_data | |
else: | |
print("Failed to retrieve choices for item ID:", item_id) | |
return None | |
# url = input("enter retro URL : ") | |
parsed_url = urlparse(url) | |
path_segments = parsed_url.path.split('/') | |
restaurant_id = path_segments[-2] | |
restaurant_name = path_segments[-1] | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3' | |
} | |
response = requests.get(url, headers=headers) | |
if response.status_code == 200: | |
soup = BeautifulSoup(response.text, 'html.parser') | |
script_tag = soup.find('script', id='__NEXT_DATA__') | |
if script_tag: | |
json_content = json.loads(script_tag.string.strip()) | |
menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items'] | |
items = [] | |
for item in menu_data: | |
item_id = item['id'] | |
name = item['name'] | |
description = item['description'] | |
price = item['price'] | |
original_image = item['originalImage'] | |
original_section = item['originalSection'] | |
has_choices = item['hasChoices'] | |
item_info = { | |
'category': original_section, | |
'category_postion': 1, | |
'item_name': name, | |
'item_position': 1, | |
'original_image': original_image, | |
'description': description, | |
'price': price, | |
'item_id': item_id, | |
} | |
if has_choices: | |
option_groups_info = [] | |
choice_data = extract_choices(item_id,restaurant_id) | |
if choice_data: | |
choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list | |
choice_sections = choice_for_item['choiceSections'] | |
for option_group in choice_sections: | |
option_group_info = { | |
'option_group_name': option_group['nm'], | |
'min_quantity': option_group['mnq'], | |
'max_quantity': option_group['mxq'], | |
'option_group_names': [] | |
} | |
if 'ich' in option_group: | |
option_group_names = option_group['ich'] | |
for option_group_name in option_group_names: | |
option_group_name_info = { | |
'option_name': option_group_name['nm'], | |
'option_price': option_group_name['pr'] | |
} | |
option_group_info['option_group_names'].append(option_group_name_info) | |
option_groups_info.append(option_group_info) | |
item_info['option_groups'] = option_groups_info | |
items.append(item_info) | |
# with open(f"{restaurant_name}.json", "w") as json_file: | |
# json.dump(items, json_file, indent=4) | |
json_content = json.dumps(items, indent=4) | |
# Create BytesIO object to hold the JSON content | |
output = BytesIO() | |
output.write(json_content.encode('utf-8')) | |
output.seek(0) | |
return restaurant_name,output | |
else: | |
print("Script tag with id '__NEXT_DATA__' not found.") | |
else: | |
print("Failed to retrieve the webpage. Status code:", response.status_code) | |
return True | |