TE-Scrapper / Talabat_files /Talabat_json_final.py
viraj
Initial Commit
e79fbb1
def Talabat_Json_extract(url):
import json
import requests
import json
from bs4 import BeautifulSoup
import pandas as pd
from urllib.parse import urlparse
from io import BytesIO
def extract_choices(item_id):
choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
response = requests.get(choice_url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
choice_data = json.loads(soup.string.strip())
return choice_data
else:
print("Failed to retrieve choices for item ID:", item_id)
return None
# url = input("enter retro URL : ")
parsed_url = urlparse(url)
path_segments = parsed_url.path.split('/')
restaurant_id = path_segments[-2]
restaurant_name = path_segments[-1]
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
}
response = requests.get(url, headers=headers)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
script_tag = soup.find('script', id='__NEXT_DATA__')
if script_tag:
json_content = json.loads(script_tag.string.strip())
menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
items = []
for item in menu_data:
item_id = item['id']
name = item['name']
description = item['description']
price = item['price']
original_image = item['originalImage']
original_section = item['originalSection']
has_choices = item['hasChoices']
item_info = {
'category': original_section,
'category_postion': 1,
'item_name': name,
'item_position': 1,
'original_image': original_image,
'description': description,
'price': price,
'item_id': item_id,
}
if has_choices:
option_groups_info = []
choice_data = extract_choices(item_id,restaurant_id)
if choice_data:
choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list
choice_sections = choice_for_item['choiceSections']
for option_group in choice_sections:
option_group_info = {
'option_group_name': option_group['nm'],
'min_quantity': option_group['mnq'],
'max_quantity': option_group['mxq'],
'option_group_names': []
}
if 'ich' in option_group:
option_group_names = option_group['ich']
for option_group_name in option_group_names:
option_group_name_info = {
'option_name': option_group_name['nm'],
'option_price': option_group_name['pr']
}
option_group_info['option_group_names'].append(option_group_name_info)
option_groups_info.append(option_group_info)
item_info['option_groups'] = option_groups_info
items.append(item_info)
# with open(f"{restaurant_name}.json", "w") as json_file:
# json.dump(items, json_file, indent=4)
json_content = json.dumps(items, indent=4)
# Create BytesIO object to hold the JSON content
output = BytesIO()
output.write(json_content.encode('utf-8'))
output.seek(0)
return restaurant_name,output
else:
print("Script tag with id '__NEXT_DATA__' not found.")
else:
print("Failed to retrieve the webpage. Status code:", response.status_code)
return True