Spaces:

Viraj2307
/

TE-Scrapper

Sleeping

TE-Scrapper / Talabat_files /Talabat_json_final.py

viraj

Initial Commit

e79fbb1 8 months ago

4.53 kB

	def Talabat_Json_extract(url):
	import json
	import requests
	import json
	from bs4 import BeautifulSoup
	import pandas as pd
	from urllib.parse import urlparse
	from io import BytesIO

	def extract_choices(item_id):
	choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
	response = requests.get(choice_url, headers=headers)
	if response.status_code == 200:
	soup = BeautifulSoup(response.text, 'html.parser')
	choice_data = json.loads(soup.string.strip())
	return choice_data
	else:
	print("Failed to retrieve choices for item ID:", item_id)
	return None


	# url = input("enter retro URL : ")
	parsed_url = urlparse(url)
	path_segments = parsed_url.path.split('/')

	restaurant_id = path_segments[-2]
	restaurant_name = path_segments[-1]


	headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
	}
	response = requests.get(url, headers=headers)

	if response.status_code == 200:
	soup = BeautifulSoup(response.text, 'html.parser')
	script_tag = soup.find('script', id='__NEXT_DATA__')

	if script_tag:
	json_content = json.loads(script_tag.string.strip())

	menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']

	items = []
	for item in menu_data:
	item_id = item['id']
	name = item['name']
	description = item['description']
	price = item['price']
	original_image = item['originalImage']
	original_section = item['originalSection']
	has_choices = item['hasChoices']

	item_info = {
	'category': original_section,
	'category_postion': 1,
	'item_name': name,
	'item_position': 1,
	'original_image': original_image,
	'description': description,
	'price': price,
	'item_id': item_id,
	}

	if has_choices:
	option_groups_info = []
	choice_data = extract_choices(item_id,restaurant_id)
	if choice_data:
	choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list
	choice_sections = choice_for_item['choiceSections']
	for option_group in choice_sections:
	option_group_info = {
	'option_group_name': option_group['nm'],
	'min_quantity': option_group['mnq'],
	'max_quantity': option_group['mxq'],
	'option_group_names': []
	}
	if 'ich' in option_group:
	option_group_names = option_group['ich']
	for option_group_name in option_group_names:
	option_group_name_info = {
	'option_name': option_group_name['nm'],
	'option_price': option_group_name['pr']
	}
	option_group_info['option_group_names'].append(option_group_name_info)
	option_groups_info.append(option_group_info)
	item_info['option_groups'] = option_groups_info
	items.append(item_info)
	# with open(f"{restaurant_name}.json", "w") as json_file:
	# json.dump(items, json_file, indent=4)
	json_content = json.dumps(items, indent=4)

	# Create BytesIO object to hold the JSON content
	output = BytesIO()
	output.write(json_content.encode('utf-8'))
	output.seek(0)

	return restaurant_name,output




	else:
	print("Script tag with id '__NEXT_DATA__' not found.")
	else:
	print("Failed to retrieve the webpage. Status code:", response.status_code)
	return True