TE-Scrapper / Main_function_For_location.py
viraj
Initial Commit
e79fbb1
# url = input("Enter Restro Url : ")
from urllib.parse import urlparse
import pymongo
from config import settings
def main(url,location,inside_location):
client = pymongo.MongoClient(settings.MONGO_URL)
def restro_details(url,location_area,location_name):
db = client.Restaurants_in_dubai
collection = db.Restaurant_details
parsed_url = urlparse(url)
platform_name = parsed_url.netloc.split('.')[1].capitalize()
restaurant_name = parsed_url.path.strip('/').split('/')[-1]
restaurant_details = {
'url' : url,
'platform_name' : platform_name,
"restaurant_name" : restaurant_name,
"location_area" : location_area ,
"location_name" : location_name
}
result = collection.insert_one(restaurant_details)
print("Inserted document IDs:", result.inserted_id)
def main_excel_file(url_input):
import requests
import json
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import math
payload1 = {'restId': '17902'}
files = []
headers2 = {
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
}
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
parsed_url = urlparse(url_input)
restaurant_code = parsed_url.path.strip('/').split('/')[-1]
url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
data = {
"restCode": restaurant_code
}
response = requests.post(url, data=data)
menu_items_list = []
category_name_list = []
j = 0
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
img = item_box.find('img') # Find the img tag within the div
if img: # Check if img tag exists
image_url = img.get('data-image') # Get the value of 'data-image' attribute
else:
image_url = None # If image URL is not found
menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
if menu_item_details:
type_ = menu_item_details.find('p', class_='type').text.strip()
name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
description = menu_item_details.find('p', itemprop='description').text.strip()
price = menu_item_details.find('div', class_='menu-item-price').text.strip()
else:
# If menu-item-details not found, set defaults
type_ = ""
name = ""
description = ""
price = ""
menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
if menu_list_options:
value = menu_list_options.get('value') # Get the value attribute
else:
value = None
if type_ not in category_name_list:
category_name_list.append(type_)
j = j+1
Category_position = j
else:
Category_position = j
menu_item = {
"Category": type_,
"Category_position": Category_position,
"Item_name": name,
"Item_position": i,
"Image": image_url,
"description": description,
"price": price,
"id": value,
}
menu_items_list.append(menu_item) # Append menu item before the request
if value is not None:
option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/"
option_response = requests.post(option_url, headers=headers2, data=payload1)
if option_response.status_code == 200:
try:
json_data = json.loads(option_response.text)
extracted_data = []
if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
for choice in json_data['arrResult']['arrFoodChoice']:
extracted_data.append({
'Option_group_name': choice['choice_name'],
'Option_name': choice['name'],
'Extra_price': choice['price'],
'Min': choice.get('mandatory', 0),
'Max': choice.get('max_choice', 1)
})
grouped_data = {}
for choice in extracted_data:
group_name = choice['Option_group_name']
if group_name not in grouped_data:
grouped_data[group_name] = {
'Option_group_name': group_name,
'Min': choice['Min'],
'Max': max(choice['Max'], '1'),
}
num_options = sum(key.startswith('Option ') for key in grouped_data[group_name])
option_index = num_options + 1 # Index for the new option
grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name']
grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price']
for group_data in grouped_data.values():
menu_items_list.append(group_data)
except json.JSONDecodeError:
print("JSON decoding error. Response content may not be in valid JSON format.")
else:
print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}")
df = pd.DataFrame(menu_items_list)
# Ensure 'Max' column exists before renaming or shifting
if 'Max' in df.columns:
max_column_index = df.columns.get_loc('Max')
for i in range(max_column_index + 1, len(df.columns)):
df.rename(columns={df.columns[i]: ''}, inplace=True)
option_group_name_index = df.columns.get_loc('Option_group_name')
for i in range(option_group_name_index, len(df.columns)):
df.iloc[:, i] = df.iloc[:, i].shift(-1)
excel_file = f"{restaurant_code}_menu.xlsx"
df.to_excel(excel_file, index=False)
print(f"Menu items saved to {excel_file}")
else:
print(f"Failed to get menu items. Status code: {response.status_code}")
def main_json(url_input):
import requests
import json
from bs4 import BeautifulSoup
from urllib.parse import urlparse
payload1 = {'restId': '17902'}
files = []
headers2 = {
'Content-Type': 'application/x-www-form-urlencoded',
'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
}
headers = {
'Content-Type': 'application/x-www-form-urlencoded',
}
parsed_url = urlparse(url_input)
restaurant_code = parsed_url.path.strip('/').split('/')[-1]
url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
data = {
"restCode": restaurant_code
}
response = requests.post(url_input, data=data)
menu_items_list = []
category_name_list = []
j = 0
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
location_area = soup.find('div', class_='location-area').text
location_name = soup.find('div', class_='location').text
for i,item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
img = item_box.find('img') # Find the img tag within the div
if img: # Check if img tag exists
image_url = img.get('data-image') # Get the value of 'data-image' attribute
else:
image_url = None # If image URL is not found
menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
if menu_item_details:
category = menu_item_details.find('p', class_='type').text.strip()
name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
description = menu_item_details.find('p', itemprop='description').text.strip()
price = menu_item_details.find('div', class_='menu-item-price').text.strip()
else:
# If menu-item-details not found, set defaults
category = ""
name = ""
description = ""
price = ""
menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
if menu_list_options:
item_id = menu_list_options.get('value') # Get the value attribute
else:
item_id = None
if category not in category_name_list:
category_name_list.append(category)
j = j+1
Category_position = j
else:
Category_position = j
menu_item = {
"Category": category,
"Category_position": Category_position,
"Item_name": name,
"Item_position": i,
"Image": image_url,
"Description": description,
"Price": price,
"ID": item_id,
"Option_groups": []
}
if item_id is not None:
url1 = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{item_id}/1/"
response = requests.request("POST", url1, headers=headers2, data=payload1, files=files)
if response.status_code == 200:
try:
json_data = response.json()
option_group_data = {}
if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
for option_group in json_data['arrResult']['arrFoodChoice']:
group_name = option_group['choice_name']
min_quantity = option_group['mandatory']
max_quantity = max((option_group['max_choice']), "1")
option_group_item = next((x for x in menu_item['Option_groups'] if x['Option_group_name'] == group_name), None)
if option_group_item:
option_group_item['Option_group_names'].append({
"Option_name": option_group['name'],
"Option_price": option_group['price']
})
else:
menu_item['Option_groups'].append({
"Option_group_name": group_name,
"Min": min_quantity,
"Max": max_quantity,
"Option_group_names": [{
"Option_name": option_group['name'],
"Option_price": option_group['price']
}]
})
except json.JSONDecodeError:
print("JSON decoding error. Response content may not be in valid JSON format.")
else:
print(f"Failed to get data for item with value {item_id}. Status code: {response.status_code}")
if not menu_item['Option_groups']:
menu_item["Has_choice"] = False
else:
menu_item["Has_choice"] = True
menu_items_list.append(menu_item)
else:
print(f"Failed to get menu items. Status code: {response.status_code}")
# Convert the list of menu items into JSON
# with open(f'{restaurant_code}.json', 'w') as json_file:
# json.dump(menu_items_list, json_file, indent=4)
print(f"Menu items saved to {restaurant_code}.json file.")
# main_excel_file(url_input)
return menu_items_list,location_area,location_name
def extract_category(items,url):
db = client.Restaurants_in_dubai
collection = db.Category
json_data = items
restro_collection = db['Restaurant_details']
def item_extract_category(json,name):
items_list = []
for item in json:
if item['Category'] == name:
item_info = {
"id": item["ID"],
"name": item["Item_name"]
}
items_list.append(item_info)
return items_list
categories_info = []
existing_categories = set()
for item in json_data:
name = item['Category']
if name not in existing_categories:
# category_position = 1
# category_isActive = True
items = item_extract_category(json_data, name)
restro = restro_collection.find_one({'url': url})
if restro:
restro_id = restro['_id']
restro_ref_id = restro_id
category_info = {
'category_name': name,
'items': items,
'restro_ref_id' : restro_ref_id
}
categories_info.append(category_info)
existing_categories.add(name)
result = collection.insert_many(categories_info)
print("Inserted document IDs:", result.inserted_ids)
def extract_item(items,url):
db = client.Restaurants_in_dubai
collection = db.Items
json_data = items
category_collection = db['Category']
restro_collection = db['Restaurant_details']
items_info = []
for item in json_data:
id = item['ID']
name = item['Item_name']
description = item['Description']
price = item['Price']
img_url= item['Image']
category_name = item['Category']
item_position = item['Item_position']
has_choices = item['Has_choice']
if has_choices == True:
modifires = []
for option_group in item.get('Option_groups', []):
modifires.append(option_group['Option_group_name'])
else:
modifires = "None"
restro = restro_collection.find_one({'url': url})
if restro:
restro_id = restro['_id']
restro_ref_id = restro_id
category = category_collection.find_one(
{
'category_name': category_name,
'restro_ref_id': restro_ref_id
})
if category:
category_id = category['_id']
ref_id = category_id
item_info = {
'item_id': id,
'name': name,
'description': description,
'amount': price,
'image': img_url,
'category_name':category_name,
'modifires':modifires,
'ref_id_category' : ref_id,
'restro_ref_id' : restro_ref_id
}
items_info.append(item_info)
result = collection.insert_many(items_info)
print("Inserted document IDs:", result.inserted_ids)
def extract_option_group(items,url):
db = client.Restaurants_in_dubai
collection = db.OptionGroup
option_group_info = []
existing_categories = []
option_group_names_list = []
restro_collection = db['Restaurant_details']
for item in items:
for option_group in item.get('Option_groups', []):
flag = 1
restro = restro_collection.find_one({'url': url})
if restro:
restro_id = restro['_id']
restro_ref_id = restro_id
option_group_name = option_group["Option_group_name"]
min_quantity = option_group["Min"]
max_quantity = option_group["Max"]
option_names = []
option_names_trial_all = []
for option in option_group.get("Option_group_names", []):
option_name = option["Option_name"]
option_price = option["Option_price"]
option_names.append(option_name)
option_names_trial = {
'Option_name': option_name,
'Option_price': option_price
}
option_names_trial_all.append(option_names_trial)
item_id = []
for item in items:
for option_group in item.get('Option_groups', []):
option_group_name2 = option_group["Option_group_name"]
option_group_names1 = option_group.get('Option_group_names',[])
if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['Option_name'])) == sorted(option_names_trial_all, key=lambda x: x['Option_name']) ):
item_id.append(item['ID'])
option_group_information = {
"option_group_name" : option_group_name,
"min_quantity" : min_quantity,
"max_quantity" : max_quantity,
"option_names" : option_names,
"item_id" : item_id,
"restro_ref_id" : restro_ref_id
}
option_group_check ={
"option_group_name" : option_group_name,
"option_names" : option_names
}
for category in existing_categories:
if (category['option_group_name'] == option_group_check['option_group_name'] and
sorted(category['option_names']) == sorted(option_group_check['option_names'])):
flag = 0
if flag==1:
option_group_info.append(option_group_information)
existing_categories.append(option_group_check)
result = collection.insert_many(option_group_info)
print("Inserted document IDs:", result.inserted_ids)
def extract_option_group_names(items,url):
db = client.Restaurants_in_dubai
collection = db.OptionName
option_group_collection = db['OptionGroup']
json_data = items
option_names = []
option_names_list = []
restro_collection = db['Restaurant_details']
for item in json_data:
has_choices = item['Has_choice']
if has_choices == True:
flag = 1
for option_group in item.get('Option_groups', []):
option_group_name2 = option_group["Option_group_name"]
for option in option_group.get("Option_group_names", []):
restro = restro_collection.find_one({'url': url})
if restro:
restro_id = restro['_id']
restro_ref_id = restro_id
option_name = option["Option_name"]
option_price = option["Option_price"]
query = {
'restro_ref_id': restro_id,
'option_names': option_name
}
matching_documents = option_group_collection.find(query)
matching_ids = [doc['_id'] for doc in matching_documents]
# if option_group_name_true:
# option_group_id = option_group_name_true['_id']
# restro = restro_collection.find_one({'url': url})
option_group_name = {
"option_name" : option_name,
"option_price" : option_price,
"ref_option_group_id" : matching_ids,
"restro_ref_id" : restro_ref_id
}
if (option_name in option_names_list ):
flag = 0
if flag==1:
option_names.append(option_group_name)
option_names_list.append(option_name)
result = collection.insert_many(option_names)
print("Inserted document IDs:", result.inserted_ids)
items,location_area,location_name = main_json(url)
restro_details(url,location,inside_location)
extract_category(items,url)
extract_item(items,url)
extract_option_group(items,url)
extract_option_group_names(items,url)
return True
# main(url)