viraj commited on
Commit
e79fbb1
·
1 Parent(s): 34661bd

Initial Commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10
2
+
3
+ WORKDIR /code
4
+
5
+ COPY ./requirements.txt /code/requirements.txt
6
+
7
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
8
+
9
+ COPY . .
10
+
11
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
EatEasy_EveryThing_From_url.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # url = input("Enter Restro Url : ")
2
+ from urllib.parse import urlparse
3
+ import pymongo
4
+ from config import settings
5
+
6
+ def EatEasy_EveryThing_From_url(url):
7
+
8
+ client = pymongo.MongoClient(settings.MONGO_URL)
9
+
10
+ def restro_details(url,location_area,location_name):
11
+
12
+
13
+ db = client.Restaurants_in_dubai
14
+ collection = db.Restaurant_details
15
+
16
+ parsed_url = urlparse(url)
17
+ platform_name = parsed_url.netloc.split('.')[1].capitalize()
18
+ restaurant_name = parsed_url.path.strip('/').split('/')[-1]
19
+ restaurant_details = {
20
+ 'url' : url,
21
+ 'platform_name' : platform_name,
22
+ "restaurant_name" : restaurant_name,
23
+ "location_area" : location_area ,
24
+ "location_name" : location_name
25
+ }
26
+ result = collection.insert_one(restaurant_details)
27
+ print("Inserted document IDs:", result.inserted_id)
28
+
29
+ def main_excel_file(url_input):
30
+ import requests
31
+ import json
32
+ import pandas as pd
33
+ from bs4 import BeautifulSoup
34
+ from urllib.parse import urlparse
35
+ import math
36
+
37
+ payload1 = {'restId': '17902'}
38
+ files = []
39
+
40
+ headers2 = {
41
+ 'Content-Type': 'application/x-www-form-urlencoded',
42
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
43
+ }
44
+ headers = {
45
+ 'Content-Type': 'application/x-www-form-urlencoded',
46
+ }
47
+
48
+
49
+
50
+ parsed_url = urlparse(url_input)
51
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
52
+
53
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
54
+ data = {
55
+ "restCode": restaurant_code
56
+ }
57
+
58
+ response = requests.post(url, data=data)
59
+
60
+ menu_items_list = []
61
+ category_name_list = []
62
+ j = 0
63
+
64
+ if response.status_code == 200:
65
+ soup = BeautifulSoup(response.text, 'html.parser')
66
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
67
+
68
+ for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
69
+ img = item_box.find('img') # Find the img tag within the div
70
+ if img: # Check if img tag exists
71
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
72
+ else:
73
+ image_url = None # If image URL is not found
74
+
75
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
76
+ if menu_item_details:
77
+ type_ = menu_item_details.find('p', class_='type').text.strip()
78
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
79
+ description = menu_item_details.find('p', itemprop='description').text.strip()
80
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
81
+ else:
82
+ # If menu-item-details not found, set defaults
83
+ type_ = ""
84
+ name = ""
85
+ description = ""
86
+ price = ""
87
+
88
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
89
+ if menu_list_options:
90
+ value = menu_list_options.get('value') # Get the value attribute
91
+ else:
92
+ value = None
93
+
94
+ if type_ not in category_name_list:
95
+ category_name_list.append(type_)
96
+ j = j+1
97
+ Category_position = j
98
+
99
+ else:
100
+ Category_position = j
101
+
102
+ menu_item = {
103
+ "Category": type_,
104
+ "Category_position": Category_position,
105
+ "Item_name": name,
106
+ "Item_position": i,
107
+ "Image": image_url,
108
+ "description": description,
109
+ "price": price,
110
+ "id": value,
111
+ }
112
+
113
+ menu_items_list.append(menu_item) # Append menu item before the request
114
+ if value is not None:
115
+ option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/"
116
+ option_response = requests.post(option_url, headers=headers2, data=payload1)
117
+ if option_response.status_code == 200:
118
+ try:
119
+ json_data = json.loads(option_response.text)
120
+ extracted_data = []
121
+
122
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
123
+ for choice in json_data['arrResult']['arrFoodChoice']:
124
+ extracted_data.append({
125
+ 'Option_group_name': choice['choice_name'],
126
+ 'Option_name': choice['name'],
127
+ 'Extra_price': choice['price'],
128
+ 'Min': choice.get('mandatory', 0),
129
+ 'Max': choice.get('max_choice', 1)
130
+ })
131
+
132
+ grouped_data = {}
133
+ for choice in extracted_data:
134
+ group_name = choice['Option_group_name']
135
+ if group_name not in grouped_data:
136
+ grouped_data[group_name] = {
137
+ 'Option_group_name': group_name,
138
+ 'Min': choice['Min'],
139
+ 'Max': max(choice['Max'], '1'),
140
+ }
141
+
142
+ num_options = sum(key.startswith('Option ') for key in grouped_data[group_name])
143
+ option_index = num_options + 1 # Index for the new option
144
+ grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name']
145
+ grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price']
146
+
147
+ for group_data in grouped_data.values():
148
+ menu_items_list.append(group_data)
149
+
150
+ except json.JSONDecodeError:
151
+ print("JSON decoding error. Response content may not be in valid JSON format.")
152
+ else:
153
+ print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}")
154
+
155
+ df = pd.DataFrame(menu_items_list)
156
+
157
+ # Ensure 'Max' column exists before renaming or shifting
158
+ if 'Max' in df.columns:
159
+ max_column_index = df.columns.get_loc('Max')
160
+ for i in range(max_column_index + 1, len(df.columns)):
161
+ df.rename(columns={df.columns[i]: ''}, inplace=True)
162
+
163
+ option_group_name_index = df.columns.get_loc('Option_group_name')
164
+ for i in range(option_group_name_index, len(df.columns)):
165
+ df.iloc[:, i] = df.iloc[:, i].shift(-1)
166
+
167
+ excel_file = f"{restaurant_code}_menu.xlsx"
168
+ df.to_excel(excel_file, index=False)
169
+ print(f"Menu items saved to {excel_file}")
170
+
171
+ else:
172
+ print(f"Failed to get menu items. Status code: {response.status_code}")
173
+
174
+
175
+
176
+ def main_json(url_input):
177
+ import requests
178
+ import json
179
+ from bs4 import BeautifulSoup
180
+ from urllib.parse import urlparse
181
+
182
+ payload1 = {'restId': '17902'}
183
+ files = []
184
+
185
+ headers2 = {
186
+ 'Content-Type': 'application/x-www-form-urlencoded',
187
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
188
+ }
189
+
190
+ headers = {
191
+ 'Content-Type': 'application/x-www-form-urlencoded',
192
+ }
193
+
194
+
195
+
196
+ parsed_url = urlparse(url_input)
197
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
198
+
199
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
200
+ data = {
201
+ "restCode": restaurant_code
202
+ }
203
+
204
+ response = requests.post(url_input, data=data)
205
+
206
+ menu_items_list = []
207
+ category_name_list = []
208
+ j = 0
209
+
210
+ if response.status_code == 200:
211
+ soup = BeautifulSoup(response.text, 'html.parser')
212
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
213
+ location_area = soup.find('div', class_='location-area').text
214
+ location_name = soup.find('div', class_='location').text
215
+
216
+ for i,item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
217
+ img = item_box.find('img') # Find the img tag within the div
218
+ if img: # Check if img tag exists
219
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
220
+ else:
221
+ image_url = None # If image URL is not found
222
+
223
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
224
+ if menu_item_details:
225
+ category = menu_item_details.find('p', class_='type').text.strip()
226
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
227
+ description = menu_item_details.find('p', itemprop='description').text.strip()
228
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
229
+ else:
230
+ # If menu-item-details not found, set defaults
231
+ category = ""
232
+ name = ""
233
+ description = ""
234
+ price = ""
235
+
236
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
237
+ if menu_list_options:
238
+ item_id = menu_list_options.get('value') # Get the value attribute
239
+ else:
240
+ item_id = None
241
+
242
+ if category not in category_name_list:
243
+ category_name_list.append(category)
244
+ j = j+1
245
+ Category_position = j
246
+
247
+ else:
248
+ Category_position = j
249
+
250
+ menu_item = {
251
+ "Category": category,
252
+ "Category_position": Category_position,
253
+ "Item_name": name,
254
+ "Item_position": i,
255
+ "Image": image_url,
256
+ "Description": description,
257
+ "Price": price,
258
+ "ID": item_id,
259
+ "Option_groups": []
260
+
261
+ }
262
+
263
+ if item_id is not None:
264
+ url1 = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{item_id}/1/"
265
+ response = requests.request("POST", url1, headers=headers2, data=payload1, files=files)
266
+ if response.status_code == 200:
267
+ try:
268
+ json_data = response.json()
269
+ option_group_data = {}
270
+
271
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
272
+ for option_group in json_data['arrResult']['arrFoodChoice']:
273
+ group_name = option_group['choice_name']
274
+ min_quantity = option_group['mandatory']
275
+ max_quantity = max((option_group['max_choice']), "1")
276
+
277
+ option_group_item = next((x for x in menu_item['Option_groups'] if x['Option_group_name'] == group_name), None)
278
+ if option_group_item:
279
+ option_group_item['Option_group_names'].append({
280
+ "Option_name": option_group['name'],
281
+ "Option_price": option_group['price']
282
+ })
283
+ else:
284
+ menu_item['Option_groups'].append({
285
+ "Option_group_name": group_name,
286
+ "Min": min_quantity,
287
+ "Max": max_quantity,
288
+ "Option_group_names": [{
289
+ "Option_name": option_group['name'],
290
+ "Option_price": option_group['price']
291
+ }]
292
+ })
293
+
294
+ except json.JSONDecodeError:
295
+ print("JSON decoding error. Response content may not be in valid JSON format.")
296
+ else:
297
+ print(f"Failed to get data for item with value {item_id}. Status code: {response.status_code}")
298
+ if not menu_item['Option_groups']:
299
+ menu_item["Has_choice"] = False
300
+
301
+
302
+ else:
303
+ menu_item["Has_choice"] = True
304
+ menu_items_list.append(menu_item)
305
+ else:
306
+ print(f"Failed to get menu items. Status code: {response.status_code}")
307
+
308
+ # Convert the list of menu items into JSON
309
+ # with open(f'{restaurant_code}.json', 'w') as json_file:
310
+ # json.dump(menu_items_list, json_file, indent=4)
311
+
312
+ print(f"Menu items saved to {restaurant_code}.json file.")
313
+
314
+ # main_excel_file(url_input)
315
+ return menu_items_list,location_area,location_name
316
+
317
+
318
+ def extract_category(items,url):
319
+ db = client.Restaurants_in_dubai
320
+ collection = db.Category
321
+ json_data = items
322
+ restro_collection = db['Restaurant_details']
323
+
324
+
325
+ def item_extract_category(json,name):
326
+ items_list = []
327
+ for item in json:
328
+ if item['Category'] == name:
329
+ item_info = {
330
+ "id": item["ID"],
331
+ "name": item["Item_name"]
332
+ }
333
+ items_list.append(item_info)
334
+
335
+ return items_list
336
+
337
+
338
+ categories_info = []
339
+ existing_categories = set()
340
+ for item in json_data:
341
+ name = item['Category']
342
+ if name not in existing_categories:
343
+ # category_position = 1
344
+ # category_isActive = True
345
+ items = item_extract_category(json_data, name)
346
+
347
+ restro = restro_collection.find_one({'url': url})
348
+ if restro:
349
+ restro_id = restro['_id']
350
+
351
+ restro_ref_id = restro_id
352
+
353
+ category_info = {
354
+ 'category_name': name,
355
+ 'items': items,
356
+ 'restro_ref_id' : restro_ref_id
357
+ }
358
+ categories_info.append(category_info)
359
+ existing_categories.add(name)
360
+ result = collection.insert_many(categories_info)
361
+ print("Inserted document IDs:", result.inserted_ids)
362
+
363
+ def extract_item(items,url):
364
+ db = client.Restaurants_in_dubai
365
+ collection = db.Items
366
+ json_data = items
367
+ category_collection = db['Category']
368
+ restro_collection = db['Restaurant_details']
369
+
370
+ items_info = []
371
+
372
+ for item in json_data:
373
+ id = item['ID']
374
+ name = item['Item_name']
375
+ description = item['Description']
376
+ price = item['Price']
377
+ img_url= item['Image']
378
+ category_name = item['Category']
379
+ item_position = item['Item_position']
380
+ has_choices = item['Has_choice']
381
+ if has_choices == True:
382
+ modifires = []
383
+ for option_group in item.get('Option_groups', []):
384
+ modifires.append(option_group['Option_group_name'])
385
+ else:
386
+ modifires = "None"
387
+
388
+ restro = restro_collection.find_one({'url': url})
389
+ if restro:
390
+ restro_id = restro['_id']
391
+
392
+ restro_ref_id = restro_id
393
+
394
+ category = category_collection.find_one(
395
+ {
396
+ 'category_name': category_name,
397
+ 'restro_ref_id': restro_ref_id
398
+ })
399
+ if category:
400
+ category_id = category['_id']
401
+
402
+ ref_id = category_id
403
+
404
+
405
+ item_info = {
406
+ 'item_id': id,
407
+ 'name': name,
408
+ 'description': description,
409
+ 'amount': price,
410
+ 'image': img_url,
411
+ 'category_name':category_name,
412
+ 'modifires':modifires,
413
+ 'ref_id_category' : ref_id,
414
+ 'restro_ref_id' : restro_ref_id
415
+
416
+ }
417
+ items_info.append(item_info)
418
+ result = collection.insert_many(items_info)
419
+ print("Inserted document IDs:", result.inserted_ids)
420
+
421
+
422
+ def extract_option_group(items,url):
423
+ db = client.Restaurants_in_dubai
424
+ collection = db.OptionGroup
425
+ option_group_info = []
426
+ existing_categories = []
427
+ option_group_names_list = []
428
+ restro_collection = db['Restaurant_details']
429
+
430
+ for item in items:
431
+ for option_group in item.get('Option_groups', []):
432
+ flag = 1
433
+ restro = restro_collection.find_one({'url': url})
434
+ if restro:
435
+ restro_id = restro['_id']
436
+
437
+ restro_ref_id = restro_id
438
+
439
+
440
+ option_group_name = option_group["Option_group_name"]
441
+ min_quantity = option_group["Min"]
442
+ max_quantity = option_group["Max"]
443
+ option_names = []
444
+ option_names_trial_all = []
445
+
446
+ for option in option_group.get("Option_group_names", []):
447
+ option_name = option["Option_name"]
448
+ option_price = option["Option_price"]
449
+ option_names.append(option_name)
450
+ option_names_trial = {
451
+ 'Option_name': option_name,
452
+ 'Option_price': option_price
453
+ }
454
+ option_names_trial_all.append(option_names_trial)
455
+ item_id = []
456
+ for item in items:
457
+ for option_group in item.get('Option_groups', []):
458
+ option_group_name2 = option_group["Option_group_name"]
459
+ option_group_names1 = option_group.get('Option_group_names',[])
460
+ if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['Option_name'])) == sorted(option_names_trial_all, key=lambda x: x['Option_name']) ):
461
+ item_id.append(item['ID'])
462
+
463
+ option_group_information = {
464
+ "option_group_name" : option_group_name,
465
+ "min_quantity" : min_quantity,
466
+ "max_quantity" : max_quantity,
467
+ "option_names" : option_names,
468
+ "item_id" : item_id,
469
+ "restro_ref_id" : restro_ref_id
470
+ }
471
+ option_group_check ={
472
+ "option_group_name" : option_group_name,
473
+ "option_names" : option_names
474
+ }
475
+ for category in existing_categories:
476
+ if (category['option_group_name'] == option_group_check['option_group_name'] and
477
+ sorted(category['option_names']) == sorted(option_group_check['option_names'])):
478
+ flag = 0
479
+ if flag==1:
480
+ option_group_info.append(option_group_information)
481
+
482
+ existing_categories.append(option_group_check)
483
+ result = collection.insert_many(option_group_info)
484
+ print("Inserted document IDs:", result.inserted_ids)
485
+
486
+ def extract_option_group_names(items):
487
+ db = client.Restaurants_in_dubai
488
+ collection = db.OptionName
489
+ option_group_collection = db['OptionGroup']
490
+ json_data = items
491
+ option_names = []
492
+ option_names_list = []
493
+ restro_collection = db['Restaurant_details']
494
+
495
+
496
+ for item in json_data:
497
+ has_choices = item['Has_choice']
498
+ if has_choices == True:
499
+ flag = 1
500
+ for option_group in item.get('Option_groups', []):
501
+ option_group_name2 = option_group["Option_group_name"]
502
+ for option in option_group.get("Option_group_names", []):
503
+ restro = restro_collection.find_one({'url': url})
504
+ if restro:
505
+ restro_id = restro['_id']
506
+
507
+ restro_ref_id = restro_id
508
+ option_name = option["Option_name"]
509
+ option_price = option["Option_price"]
510
+
511
+ query = {
512
+
513
+ 'restro_ref_id': restro_id,
514
+ 'option_names': option_name
515
+ }
516
+
517
+ matching_documents = option_group_collection.find(query)
518
+ matching_ids = [doc['_id'] for doc in matching_documents]
519
+ # if option_group_name_true:
520
+ # option_group_id = option_group_name_true['_id']
521
+ # restro = restro_collection.find_one({'url': url})
522
+ option_group_name = {
523
+ "option_name" : option_name,
524
+ "option_price" : option_price,
525
+ "ref_option_group_id" : matching_ids,
526
+ "restro_ref_id" : restro_ref_id
527
+ }
528
+ if (option_name in option_names_list ):
529
+ flag = 0
530
+ if flag==1:
531
+ option_names.append(option_group_name)
532
+ option_names_list.append(option_name)
533
+
534
+ result = collection.insert_many(option_names)
535
+ print("Inserted document IDs:", result.inserted_ids)
536
+
537
+ items,location_area,location_name = main_json(url)
538
+ restro_details(url,location_area,location_name)
539
+ extract_category(items)
540
+ extract_item(items)
541
+ extract_option_group(items)
542
+ extract_option_group_names(items)
543
+ return True
544
+
545
+
546
+ # main(url)
547
+
548
+
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+
557
+
558
+
559
+
560
+
561
+
562
+
563
+
564
+
565
+
EatEasy_Json_only.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def EatEasy_Json_extract(url_input):
2
+ import requests
3
+ import json
4
+ from bs4 import BeautifulSoup
5
+ from urllib.parse import urlparse
6
+
7
+ payload1 = {'restId': '17902'}
8
+ files = []
9
+
10
+ headers2 = {
11
+ 'Content-Type': 'application/x-www-form-urlencoded',
12
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
13
+ }
14
+
15
+ headers = {
16
+ 'Content-Type': 'application/x-www-form-urlencoded',
17
+ }
18
+
19
+
20
+
21
+ parsed_url = urlparse(url_input)
22
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
23
+
24
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
25
+ data = {
26
+ "restCode": restaurant_code
27
+ }
28
+
29
+ response = requests.post(url_input, data=data)
30
+
31
+ menu_items_list = []
32
+ category_name_list = []
33
+ j = 0
34
+
35
+ if response.status_code == 200:
36
+ soup = BeautifulSoup(response.text, 'html.parser')
37
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
38
+ location_area = soup.find('div', class_='location-area').text
39
+ location_name = soup.find('div', class_='location').text
40
+
41
+ for i,item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
42
+ img = item_box.find('img') # Find the img tag within the div
43
+ if img: # Check if img tag exists
44
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
45
+ else:
46
+ image_url = None # If image URL is not found
47
+
48
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
49
+ if menu_item_details:
50
+ category = menu_item_details.find('p', class_='type').text.strip()
51
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
52
+ description = menu_item_details.find('p', itemprop='description').text.strip()
53
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
54
+ else:
55
+ # If menu-item-details not found, set defaults
56
+ category = ""
57
+ name = ""
58
+ description = ""
59
+ price = ""
60
+
61
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
62
+ if menu_list_options:
63
+ item_id = menu_list_options.get('value') # Get the value attribute
64
+ else:
65
+ item_id = None
66
+
67
+ if category not in category_name_list:
68
+ category_name_list.append(category)
69
+ j = j+1
70
+ Category_position = j
71
+
72
+ else:
73
+ Category_position = j
74
+
75
+ menu_item = {
76
+ "Category": category,
77
+ "Category_position": Category_position,
78
+ "Item_name": name,
79
+ "Item_position": i,
80
+ "Image": image_url,
81
+ "Description": description,
82
+ "Price": price,
83
+ "ID": item_id,
84
+ "Option_groups": []
85
+
86
+ }
87
+
88
+ if item_id is not None:
89
+ url1 = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{item_id}/1/"
90
+ response = requests.request("POST", url1, headers=headers2, data=payload1, files=files)
91
+ if response.status_code == 200:
92
+ try:
93
+ json_data = response.json()
94
+ option_group_data = {}
95
+
96
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
97
+ for option_group in json_data['arrResult']['arrFoodChoice']:
98
+ group_name = option_group['choice_name']
99
+ min_quantity = option_group['mandatory']
100
+ max_quantity = max((option_group['max_choice']), "1")
101
+
102
+ option_group_item = next((x for x in menu_item['Option_groups'] if x['Option_group_name'] == group_name), None)
103
+ if option_group_item:
104
+ option_group_item['Option_group_names'].append({
105
+ "Option_name": option_group['name'],
106
+ "Option_price": option_group['price']
107
+ })
108
+ else:
109
+ menu_item['Option_groups'].append({
110
+ "Option_group_name": group_name,
111
+ "Min": min_quantity,
112
+ "Max": max_quantity,
113
+ "Option_group_names": [{
114
+ "Option_name": option_group['name'],
115
+ "Option_price": option_group['price']
116
+ }]
117
+ })
118
+
119
+ except json.JSONDecodeError:
120
+ print("JSON decoding error. Response content may not be in valid JSON format.")
121
+ else:
122
+ print(f"Failed to get data for item with value {item_id}. Status code: {response.status_code}")
123
+ if not menu_item['Option_groups']:
124
+ menu_item["Has_choice"] = False
125
+
126
+
127
+ else:
128
+ menu_item["Has_choice"] = True
129
+ menu_items_list.append(menu_item)
130
+ else:
131
+ print(f"Failed to get menu items. Status code: {response.status_code}")
132
+
133
+ # Convert the list of menu items into JSON
134
+ # with open(f'{restaurant_code}.json', 'w') as json_file:
135
+ # json.dump(menu_items_list, json_file, indent=4)
136
+
137
+ print(f"Menu items saved to {restaurant_code}.json file.")
138
+ return json.dumps(menu_items_list, indent=4) , restaurant_code
EatEasy_excel_only.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def EatEasy_excel_extract(url_input):
2
+
3
+ import requests
4
+ import json
5
+ import pandas as pd
6
+ from bs4 import BeautifulSoup
7
+ from urllib.parse import urlparse
8
+ import math
9
+ from io import BytesIO
10
+
11
+ payload1 = {'restId': '17902'}
12
+ files = []
13
+
14
+ headers2 = {
15
+ 'Content-Type': 'application/x-www-form-urlencoded',
16
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
17
+ }
18
+ headers = {
19
+ 'Content-Type': 'application/x-www-form-urlencoded',
20
+ }
21
+
22
+ # url_input = input("Enter the restaurant URL: ")
23
+
24
+ parsed_url = urlparse(url_input)
25
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
26
+
27
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
28
+ data = {
29
+ "restCode": restaurant_code
30
+ }
31
+
32
+ response = requests.post(url, data=data)
33
+
34
+ menu_items_list = []
35
+ category_name_list = []
36
+ j = 0
37
+
38
+ if response.status_code == 200:
39
+ soup = BeautifulSoup(response.text, 'html.parser')
40
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
41
+
42
+ for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
43
+ img = item_box.find('img') # Find the img tag within the div
44
+ if img: # Check if img tag exists
45
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
46
+ else:
47
+ image_url = None # If image URL is not found
48
+
49
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
50
+ if menu_item_details:
51
+ type_ = menu_item_details.find('p', class_='type').text.strip()
52
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
53
+ description = menu_item_details.find('p', itemprop='description').text.strip()
54
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
55
+ else:
56
+ # If menu-item-details not found, set defaults
57
+ type_ = ""
58
+ name = ""
59
+ description = ""
60
+ price = ""
61
+
62
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
63
+ if menu_list_options:
64
+ value = menu_list_options.get('value') # Get the value attribute
65
+ else:
66
+ value = None
67
+
68
+ if type_ not in category_name_list:
69
+ category_name_list.append(type_)
70
+ j = j+1
71
+ Category_position = j
72
+
73
+ else:
74
+ Category_position = j
75
+
76
+ menu_item = {
77
+ "Category": type_,
78
+ "Category_position": Category_position,
79
+ "Item_name": name,
80
+ "Item_position": i,
81
+ "Image": image_url,
82
+ "description": description,
83
+ "price": price,
84
+ "id": value,
85
+ }
86
+
87
+ menu_items_list.append(menu_item) # Append menu item before the request
88
+ if value is not None:
89
+ option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/"
90
+ option_response = requests.post(option_url, headers=headers2, data=payload1)
91
+ if option_response.status_code == 200:
92
+ try:
93
+ json_data = json.loads(option_response.text)
94
+ extracted_data = []
95
+
96
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
97
+ for choice in json_data['arrResult']['arrFoodChoice']:
98
+ extracted_data.append({
99
+ 'Option_group_name': choice['choice_name'],
100
+ 'Option_name': choice['name'],
101
+ 'Extra_price': choice['price'],
102
+ 'Min': choice.get('mandatory', 0),
103
+ 'Max': choice.get('max_choice', 1)
104
+ })
105
+
106
+ grouped_data = {}
107
+ for choice in extracted_data:
108
+ group_name = choice['Option_group_name']
109
+ if group_name not in grouped_data:
110
+ grouped_data[group_name] = {
111
+ 'Option_group_name': group_name,
112
+ 'Min': choice['Min'],
113
+ 'Max': max(choice['Max'], '1'),
114
+ }
115
+
116
+ num_options = sum(key.startswith('Option ') for key in grouped_data[group_name])
117
+ option_index = num_options + 1 # Index for the new option
118
+ grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name']
119
+ grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price']
120
+
121
+ for group_data in grouped_data.values():
122
+ menu_items_list.append(group_data)
123
+
124
+ except json.JSONDecodeError:
125
+ print("JSON decoding error. Response content may not be in valid JSON format.")
126
+ else:
127
+ print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}")
128
+
129
+ df = pd.DataFrame(menu_items_list)
130
+
131
+ # Ensure 'Max' column exists before renaming or shifting
132
+ if 'Max' in df.columns:
133
+ max_column_index = df.columns.get_loc('Max')
134
+ for i in range(max_column_index + 1, len(df.columns)):
135
+ df.rename(columns={df.columns[i]: ''}, inplace=True)
136
+
137
+ option_group_name_index = df.columns.get_loc('Option_group_name')
138
+ for i in range(option_group_name_index, len(df.columns)):
139
+ df.iloc[:, i] = df.iloc[:, i].shift(-1)
140
+
141
+ df_cleaned = df.dropna(how='all')
142
+
143
+ output = BytesIO()
144
+ with pd.ExcelWriter(output, engine='openpyxl') as writer:
145
+ df_cleaned.to_excel(writer, index=False, sheet_name='Menu')
146
+ output.seek(0)
147
+
148
+ return output, f"{restaurant_code}_menu.xlsx"
149
+ # excel_file = f"{restaurant_code}_menu.xlsx"
150
+ # df.to_excel(excel_file, index=False)
151
+ # print(f"Menu items saved to {excel_file}")
152
+
153
+ else:
154
+ print(f"Failed to get menu items. Status code: {response.status_code}")
155
+
156
+ return True
Eateasy_Multi_Urls.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ def multi_urls(URLS):
2
+ from Mongo_Data_add import EatEasy_Mongo_Data_add
3
+
4
+ for URL in URLS:
5
+ EatEasy_Mongo_Data_add(URL)
Main_function_For_location.py ADDED
@@ -0,0 +1,565 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # url = input("Enter Restro Url : ")
2
+ from urllib.parse import urlparse
3
+ import pymongo
4
+ from config import settings
5
+
6
+ def main(url,location,inside_location):
7
+
8
+ client = pymongo.MongoClient(settings.MONGO_URL)
9
+
10
+
11
+ def restro_details(url,location_area,location_name):
12
+
13
+ db = client.Restaurants_in_dubai
14
+ collection = db.Restaurant_details
15
+
16
+ parsed_url = urlparse(url)
17
+ platform_name = parsed_url.netloc.split('.')[1].capitalize()
18
+ restaurant_name = parsed_url.path.strip('/').split('/')[-1]
19
+ restaurant_details = {
20
+ 'url' : url,
21
+ 'platform_name' : platform_name,
22
+ "restaurant_name" : restaurant_name,
23
+ "location_area" : location_area ,
24
+ "location_name" : location_name
25
+ }
26
+ result = collection.insert_one(restaurant_details)
27
+ print("Inserted document IDs:", result.inserted_id)
28
+
29
+ def main_excel_file(url_input):
30
+ import requests
31
+ import json
32
+ import pandas as pd
33
+ from bs4 import BeautifulSoup
34
+ from urllib.parse import urlparse
35
+ import math
36
+
37
+ payload1 = {'restId': '17902'}
38
+ files = []
39
+
40
+ headers2 = {
41
+ 'Content-Type': 'application/x-www-form-urlencoded',
42
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
43
+ }
44
+ headers = {
45
+ 'Content-Type': 'application/x-www-form-urlencoded',
46
+ }
47
+
48
+
49
+
50
+ parsed_url = urlparse(url_input)
51
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
52
+
53
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
54
+ data = {
55
+ "restCode": restaurant_code
56
+ }
57
+
58
+ response = requests.post(url, data=data)
59
+
60
+ menu_items_list = []
61
+ category_name_list = []
62
+ j = 0
63
+
64
+ if response.status_code == 200:
65
+ soup = BeautifulSoup(response.text, 'html.parser')
66
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
67
+
68
+ for i, item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
69
+ img = item_box.find('img') # Find the img tag within the div
70
+ if img: # Check if img tag exists
71
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
72
+ else:
73
+ image_url = None # If image URL is not found
74
+
75
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
76
+ if menu_item_details:
77
+ type_ = menu_item_details.find('p', class_='type').text.strip()
78
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
79
+ description = menu_item_details.find('p', itemprop='description').text.strip()
80
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
81
+ else:
82
+ # If menu-item-details not found, set defaults
83
+ type_ = ""
84
+ name = ""
85
+ description = ""
86
+ price = ""
87
+
88
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
89
+ if menu_list_options:
90
+ value = menu_list_options.get('value') # Get the value attribute
91
+ else:
92
+ value = None
93
+
94
+ if type_ not in category_name_list:
95
+ category_name_list.append(type_)
96
+ j = j+1
97
+ Category_position = j
98
+
99
+ else:
100
+ Category_position = j
101
+
102
+ menu_item = {
103
+ "Category": type_,
104
+ "Category_position": Category_position,
105
+ "Item_name": name,
106
+ "Item_position": i,
107
+ "Image": image_url,
108
+ "description": description,
109
+ "price": price,
110
+ "id": value,
111
+ }
112
+
113
+ menu_items_list.append(menu_item) # Append menu item before the request
114
+ if value is not None:
115
+ option_url = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{value}/1/"
116
+ option_response = requests.post(option_url, headers=headers2, data=payload1)
117
+ if option_response.status_code == 200:
118
+ try:
119
+ json_data = json.loads(option_response.text)
120
+ extracted_data = []
121
+
122
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
123
+ for choice in json_data['arrResult']['arrFoodChoice']:
124
+ extracted_data.append({
125
+ 'Option_group_name': choice['choice_name'],
126
+ 'Option_name': choice['name'],
127
+ 'Extra_price': choice['price'],
128
+ 'Min': choice.get('mandatory', 0),
129
+ 'Max': choice.get('max_choice', 1)
130
+ })
131
+
132
+ grouped_data = {}
133
+ for choice in extracted_data:
134
+ group_name = choice['Option_group_name']
135
+ if group_name not in grouped_data:
136
+ grouped_data[group_name] = {
137
+ 'Option_group_name': group_name,
138
+ 'Min': choice['Min'],
139
+ 'Max': max(choice['Max'], '1'),
140
+ }
141
+
142
+ num_options = sum(key.startswith('Option ') for key in grouped_data[group_name])
143
+ option_index = num_options + 1 # Index for the new option
144
+ grouped_data[group_name][f"Option {option_index} Name"] = choice['Option_name']
145
+ grouped_data[group_name][f"Option {option_index} Price"] = choice['Extra_price']
146
+
147
+ for group_data in grouped_data.values():
148
+ menu_items_list.append(group_data)
149
+
150
+ except json.JSONDecodeError:
151
+ print("JSON decoding error. Response content may not be in valid JSON format.")
152
+ else:
153
+ print(f"Failed to get data for item with value {value}. Status code: {option_response.status_code}")
154
+
155
+ df = pd.DataFrame(menu_items_list)
156
+
157
+ # Ensure 'Max' column exists before renaming or shifting
158
+ if 'Max' in df.columns:
159
+ max_column_index = df.columns.get_loc('Max')
160
+ for i in range(max_column_index + 1, len(df.columns)):
161
+ df.rename(columns={df.columns[i]: ''}, inplace=True)
162
+
163
+ option_group_name_index = df.columns.get_loc('Option_group_name')
164
+ for i in range(option_group_name_index, len(df.columns)):
165
+ df.iloc[:, i] = df.iloc[:, i].shift(-1)
166
+
167
+ excel_file = f"{restaurant_code}_menu.xlsx"
168
+ df.to_excel(excel_file, index=False)
169
+ print(f"Menu items saved to {excel_file}")
170
+
171
+ else:
172
+ print(f"Failed to get menu items. Status code: {response.status_code}")
173
+
174
+
175
+
176
+ def main_json(url_input):
177
+ import requests
178
+ import json
179
+ from bs4 import BeautifulSoup
180
+ from urllib.parse import urlparse
181
+
182
+ payload1 = {'restId': '17902'}
183
+ files = []
184
+
185
+ headers2 = {
186
+ 'Content-Type': 'application/x-www-form-urlencoded',
187
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
188
+ }
189
+
190
+ headers = {
191
+ 'Content-Type': 'application/x-www-form-urlencoded',
192
+ }
193
+
194
+
195
+
196
+ parsed_url = urlparse(url_input)
197
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
198
+
199
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
200
+ data = {
201
+ "restCode": restaurant_code
202
+ }
203
+
204
+ response = requests.post(url_input, data=data)
205
+
206
+ menu_items_list = []
207
+ category_name_list = []
208
+ j = 0
209
+
210
+ if response.status_code == 200:
211
+ soup = BeautifulSoup(response.text, 'html.parser')
212
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
213
+ location_area = soup.find('div', class_='location-area').text
214
+ location_name = soup.find('div', class_='location').text
215
+
216
+ for i,item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
217
+ img = item_box.find('img') # Find the img tag within the div
218
+ if img: # Check if img tag exists
219
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
220
+ else:
221
+ image_url = None # If image URL is not found
222
+
223
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
224
+ if menu_item_details:
225
+ category = menu_item_details.find('p', class_='type').text.strip()
226
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
227
+ description = menu_item_details.find('p', itemprop='description').text.strip()
228
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
229
+ else:
230
+ # If menu-item-details not found, set defaults
231
+ category = ""
232
+ name = ""
233
+ description = ""
234
+ price = ""
235
+
236
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
237
+ if menu_list_options:
238
+ item_id = menu_list_options.get('value') # Get the value attribute
239
+ else:
240
+ item_id = None
241
+
242
+ if category not in category_name_list:
243
+ category_name_list.append(category)
244
+ j = j+1
245
+ Category_position = j
246
+
247
+ else:
248
+ Category_position = j
249
+
250
+ menu_item = {
251
+ "Category": category,
252
+ "Category_position": Category_position,
253
+ "Item_name": name,
254
+ "Item_position": i,
255
+ "Image": image_url,
256
+ "Description": description,
257
+ "Price": price,
258
+ "ID": item_id,
259
+ "Option_groups": []
260
+
261
+ }
262
+
263
+ if item_id is not None:
264
+ url1 = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{item_id}/1/"
265
+ response = requests.request("POST", url1, headers=headers2, data=payload1, files=files)
266
+ if response.status_code == 200:
267
+ try:
268
+ json_data = response.json()
269
+ option_group_data = {}
270
+
271
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
272
+ for option_group in json_data['arrResult']['arrFoodChoice']:
273
+ group_name = option_group['choice_name']
274
+ min_quantity = option_group['mandatory']
275
+ max_quantity = max((option_group['max_choice']), "1")
276
+
277
+ option_group_item = next((x for x in menu_item['Option_groups'] if x['Option_group_name'] == group_name), None)
278
+ if option_group_item:
279
+ option_group_item['Option_group_names'].append({
280
+ "Option_name": option_group['name'],
281
+ "Option_price": option_group['price']
282
+ })
283
+ else:
284
+ menu_item['Option_groups'].append({
285
+ "Option_group_name": group_name,
286
+ "Min": min_quantity,
287
+ "Max": max_quantity,
288
+ "Option_group_names": [{
289
+ "Option_name": option_group['name'],
290
+ "Option_price": option_group['price']
291
+ }]
292
+ })
293
+
294
+ except json.JSONDecodeError:
295
+ print("JSON decoding error. Response content may not be in valid JSON format.")
296
+ else:
297
+ print(f"Failed to get data for item with value {item_id}. Status code: {response.status_code}")
298
+ if not menu_item['Option_groups']:
299
+ menu_item["Has_choice"] = False
300
+
301
+
302
+ else:
303
+ menu_item["Has_choice"] = True
304
+ menu_items_list.append(menu_item)
305
+ else:
306
+ print(f"Failed to get menu items. Status code: {response.status_code}")
307
+
308
+ # Convert the list of menu items into JSON
309
+ # with open(f'{restaurant_code}.json', 'w') as json_file:
310
+ # json.dump(menu_items_list, json_file, indent=4)
311
+
312
+ print(f"Menu items saved to {restaurant_code}.json file.")
313
+
314
+ # main_excel_file(url_input)
315
+ return menu_items_list,location_area,location_name
316
+
317
+
318
+ def extract_category(items,url):
319
+ db = client.Restaurants_in_dubai
320
+ collection = db.Category
321
+ json_data = items
322
+ restro_collection = db['Restaurant_details']
323
+
324
+
325
+ def item_extract_category(json,name):
326
+ items_list = []
327
+ for item in json:
328
+ if item['Category'] == name:
329
+ item_info = {
330
+ "id": item["ID"],
331
+ "name": item["Item_name"]
332
+ }
333
+ items_list.append(item_info)
334
+
335
+ return items_list
336
+
337
+
338
+ categories_info = []
339
+ existing_categories = set()
340
+ for item in json_data:
341
+ name = item['Category']
342
+ if name not in existing_categories:
343
+ # category_position = 1
344
+ # category_isActive = True
345
+ items = item_extract_category(json_data, name)
346
+
347
+ restro = restro_collection.find_one({'url': url})
348
+ if restro:
349
+ restro_id = restro['_id']
350
+
351
+ restro_ref_id = restro_id
352
+
353
+ category_info = {
354
+ 'category_name': name,
355
+ 'items': items,
356
+ 'restro_ref_id' : restro_ref_id
357
+ }
358
+ categories_info.append(category_info)
359
+ existing_categories.add(name)
360
+ result = collection.insert_many(categories_info)
361
+ print("Inserted document IDs:", result.inserted_ids)
362
+
363
+ def extract_item(items,url):
364
+ db = client.Restaurants_in_dubai
365
+ collection = db.Items
366
+ json_data = items
367
+ category_collection = db['Category']
368
+ restro_collection = db['Restaurant_details']
369
+
370
+ items_info = []
371
+
372
+ for item in json_data:
373
+ id = item['ID']
374
+ name = item['Item_name']
375
+ description = item['Description']
376
+ price = item['Price']
377
+ img_url= item['Image']
378
+ category_name = item['Category']
379
+ item_position = item['Item_position']
380
+ has_choices = item['Has_choice']
381
+ if has_choices == True:
382
+ modifires = []
383
+ for option_group in item.get('Option_groups', []):
384
+ modifires.append(option_group['Option_group_name'])
385
+ else:
386
+ modifires = "None"
387
+
388
+ restro = restro_collection.find_one({'url': url})
389
+ if restro:
390
+ restro_id = restro['_id']
391
+
392
+ restro_ref_id = restro_id
393
+
394
+ category = category_collection.find_one(
395
+ {
396
+ 'category_name': category_name,
397
+ 'restro_ref_id': restro_ref_id
398
+ })
399
+ if category:
400
+ category_id = category['_id']
401
+
402
+ ref_id = category_id
403
+
404
+
405
+ item_info = {
406
+ 'item_id': id,
407
+ 'name': name,
408
+ 'description': description,
409
+ 'amount': price,
410
+ 'image': img_url,
411
+ 'category_name':category_name,
412
+ 'modifires':modifires,
413
+ 'ref_id_category' : ref_id,
414
+ 'restro_ref_id' : restro_ref_id
415
+
416
+ }
417
+ items_info.append(item_info)
418
+ result = collection.insert_many(items_info)
419
+ print("Inserted document IDs:", result.inserted_ids)
420
+
421
+
422
+ def extract_option_group(items,url):
423
+ db = client.Restaurants_in_dubai
424
+ collection = db.OptionGroup
425
+ option_group_info = []
426
+ existing_categories = []
427
+ option_group_names_list = []
428
+ restro_collection = db['Restaurant_details']
429
+
430
+ for item in items:
431
+ for option_group in item.get('Option_groups', []):
432
+ flag = 1
433
+ restro = restro_collection.find_one({'url': url})
434
+ if restro:
435
+ restro_id = restro['_id']
436
+
437
+ restro_ref_id = restro_id
438
+
439
+
440
+ option_group_name = option_group["Option_group_name"]
441
+ min_quantity = option_group["Min"]
442
+ max_quantity = option_group["Max"]
443
+ option_names = []
444
+ option_names_trial_all = []
445
+
446
+ for option in option_group.get("Option_group_names", []):
447
+ option_name = option["Option_name"]
448
+ option_price = option["Option_price"]
449
+ option_names.append(option_name)
450
+ option_names_trial = {
451
+ 'Option_name': option_name,
452
+ 'Option_price': option_price
453
+ }
454
+ option_names_trial_all.append(option_names_trial)
455
+ item_id = []
456
+ for item in items:
457
+ for option_group in item.get('Option_groups', []):
458
+ option_group_name2 = option_group["Option_group_name"]
459
+ option_group_names1 = option_group.get('Option_group_names',[])
460
+ if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['Option_name'])) == sorted(option_names_trial_all, key=lambda x: x['Option_name']) ):
461
+ item_id.append(item['ID'])
462
+
463
+ option_group_information = {
464
+ "option_group_name" : option_group_name,
465
+ "min_quantity" : min_quantity,
466
+ "max_quantity" : max_quantity,
467
+ "option_names" : option_names,
468
+ "item_id" : item_id,
469
+ "restro_ref_id" : restro_ref_id
470
+ }
471
+ option_group_check ={
472
+ "option_group_name" : option_group_name,
473
+ "option_names" : option_names
474
+ }
475
+ for category in existing_categories:
476
+ if (category['option_group_name'] == option_group_check['option_group_name'] and
477
+ sorted(category['option_names']) == sorted(option_group_check['option_names'])):
478
+ flag = 0
479
+ if flag==1:
480
+ option_group_info.append(option_group_information)
481
+
482
+ existing_categories.append(option_group_check)
483
+ result = collection.insert_many(option_group_info)
484
+ print("Inserted document IDs:", result.inserted_ids)
485
+
486
+ def extract_option_group_names(items,url):
487
+ db = client.Restaurants_in_dubai
488
+ collection = db.OptionName
489
+ option_group_collection = db['OptionGroup']
490
+ json_data = items
491
+ option_names = []
492
+ option_names_list = []
493
+ restro_collection = db['Restaurant_details']
494
+
495
+
496
+ for item in json_data:
497
+ has_choices = item['Has_choice']
498
+ if has_choices == True:
499
+ flag = 1
500
+ for option_group in item.get('Option_groups', []):
501
+ option_group_name2 = option_group["Option_group_name"]
502
+ for option in option_group.get("Option_group_names", []):
503
+ restro = restro_collection.find_one({'url': url})
504
+ if restro:
505
+ restro_id = restro['_id']
506
+
507
+ restro_ref_id = restro_id
508
+ option_name = option["Option_name"]
509
+ option_price = option["Option_price"]
510
+
511
+ query = {
512
+
513
+ 'restro_ref_id': restro_id,
514
+ 'option_names': option_name
515
+ }
516
+
517
+ matching_documents = option_group_collection.find(query)
518
+ matching_ids = [doc['_id'] for doc in matching_documents]
519
+ # if option_group_name_true:
520
+ # option_group_id = option_group_name_true['_id']
521
+ # restro = restro_collection.find_one({'url': url})
522
+ option_group_name = {
523
+ "option_name" : option_name,
524
+ "option_price" : option_price,
525
+ "ref_option_group_id" : matching_ids,
526
+ "restro_ref_id" : restro_ref_id
527
+ }
528
+ if (option_name in option_names_list ):
529
+ flag = 0
530
+ if flag==1:
531
+ option_names.append(option_group_name)
532
+ option_names_list.append(option_name)
533
+
534
+ result = collection.insert_many(option_names)
535
+ print("Inserted document IDs:", result.inserted_ids)
536
+
537
+ items,location_area,location_name = main_json(url)
538
+ restro_details(url,location,inside_location)
539
+ extract_category(items,url)
540
+ extract_item(items,url)
541
+ extract_option_group(items,url)
542
+ extract_option_group_names(items,url)
543
+ return True
544
+
545
+
546
+ # main(url)
547
+
548
+
549
+
550
+
551
+
552
+
553
+
554
+
555
+
556
+
557
+
558
+
559
+
560
+
561
+
562
+
563
+
564
+
565
+
Mongo_Data_add.py ADDED
@@ -0,0 +1,398 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import settings
2
+ import pymongo
3
+ from urllib.parse import urlparse
4
+ def EatEasy_Mongo_Data_add(url):
5
+
6
+ client = pymongo.MongoClient(settings.MONGO_URL)
7
+
8
+ def restro_details(url,location_area,location_name):
9
+
10
+ db = client.Restaurants_in_dubai
11
+ collection = db.Restaurant_details
12
+
13
+ parsed_url = urlparse(url)
14
+ platform_name = parsed_url.netloc.split('.')[1].capitalize()
15
+ restaurant_name = parsed_url.path.strip('/').split('/')[-1]
16
+ restaurant_details = {
17
+ 'url' : url,
18
+ 'platform_name' : platform_name,
19
+ "restaurant_name" : restaurant_name.strip(),
20
+ "location_area" : location_area ,
21
+ "location_name" : location_name
22
+ }
23
+ result = collection.insert_one(restaurant_details)
24
+ print("Inserted document IDs:", result.inserted_id)
25
+
26
+
27
+ def main_json(url_input):
28
+ import requests
29
+ import json
30
+ from bs4 import BeautifulSoup
31
+ from urllib.parse import urlparse
32
+
33
+ payload1 = {'restId': '17902'}
34
+ files = []
35
+
36
+ headers2 = {
37
+ 'Content-Type': 'application/x-www-form-urlencoded',
38
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g'
39
+ }
40
+
41
+ headers = {
42
+ 'Content-Type': 'application/x-www-form-urlencoded',
43
+ }
44
+
45
+
46
+
47
+ parsed_url = urlparse(url_input)
48
+ restaurant_code = parsed_url.path.strip('/').split('/')[-1]
49
+
50
+ url = "https://www.eateasy.ae/dubai/food/getFilteredMenus"
51
+ data = {
52
+ "restCode": restaurant_code
53
+ }
54
+
55
+ response = requests.post(url_input, data=data)
56
+
57
+ menu_items_list = []
58
+ category_name_list = []
59
+ j = 0
60
+
61
+ if response.status_code == 200:
62
+ soup = BeautifulSoup(response.text, 'html.parser')
63
+ menu_item_boxes = soup.find_all('div', class_='menu-item-box') # Find all divs with class 'menu-item-box'
64
+ location_area = soup.find('div', class_='location-area').text
65
+ input_element = soup.find('input', {'id': 'menuDeliveryLocationNewModalInput'})
66
+
67
+ # Get the value of the value attribute
68
+ location_name = input_element.get('value')
69
+ # location_name = soup.find('div', class_='location').text
70
+
71
+ for i,item_box in enumerate(menu_item_boxes): # Iterate over each menu item box
72
+ img = item_box.find('img') # Find the img tag within the div
73
+ if img: # Check if img tag exists
74
+ image_url = img.get('data-image') # Get the value of 'data-image' attribute
75
+ else:
76
+ image_url = None # If image URL is not found
77
+
78
+ menu_item_details = item_box.find('div', class_='menu-item-details') # Find menu-item-details div
79
+ if menu_item_details:
80
+ category = menu_item_details.find('p', class_='type').text.strip()
81
+ name = menu_item_details.find('h5', class_='menu-food-title').text.strip()
82
+ description = menu_item_details.find('p', itemprop='description').text.strip()
83
+ price = menu_item_details.find('div', class_='menu-item-price').text.strip()
84
+ else:
85
+ # If menu-item-details not found, set defaults
86
+ category = ""
87
+ name = ""
88
+ description = ""
89
+ price = ""
90
+
91
+ menu_list_options = item_box.find('a', class_='menu-list-options') # Find the menu-list-options anchor tag
92
+ if menu_list_options:
93
+ item_id = menu_list_options.get('value') # Get the value attribute
94
+ else:
95
+ item_id = None
96
+
97
+ if category not in category_name_list:
98
+ category_name_list.append(category)
99
+ j = j+1
100
+ Category_position = j
101
+
102
+ else:
103
+ Category_position = j
104
+
105
+ menu_item = {
106
+ "Category": category,
107
+ "Category_position": Category_position,
108
+ "Item_name": name,
109
+ "Item_position": i,
110
+ "Image": image_url,
111
+ "Description": description,
112
+ "Price": price,
113
+ "ID": item_id,
114
+ "Option_groups": []
115
+
116
+ }
117
+
118
+ if item_id is not None:
119
+ url1 = f"https://www.eateasy.ae/dubai/order/add_to_cart_v1/{item_id}/1/"
120
+ response = requests.request("POST", url1, headers=headers2, data=payload1, files=files)
121
+ if response.status_code == 200:
122
+ try:
123
+ json_data = response.json()
124
+ option_group_data = {}
125
+
126
+ if 'arrResult' in json_data and 'arrFoodChoice' in json_data['arrResult']:
127
+ for option_group in json_data['arrResult']['arrFoodChoice']:
128
+ group_name = option_group['choice_name']
129
+ min_quantity = option_group['mandatory']
130
+ max_quantity = max((option_group['max_choice']), "1")
131
+
132
+ option_group_item = next((x for x in menu_item['Option_groups'] if x['Option_group_name'] == group_name), None)
133
+ if option_group_item:
134
+ option_group_item['Option_group_names'].append({
135
+ "Option_name": option_group['name'],
136
+ "Option_price": option_group['price']
137
+ })
138
+ else:
139
+ menu_item['Option_groups'].append({
140
+ "Option_group_name": group_name,
141
+ "Min": min_quantity,
142
+ "Max": max_quantity,
143
+ "Option_group_names": [{
144
+ "Option_name": option_group['name'],
145
+ "Option_price": option_group['price']
146
+ }]
147
+ })
148
+
149
+ except json.JSONDecodeError:
150
+ print("JSON decoding error. Response content may not be in valid JSON format.")
151
+ else:
152
+ print(f"Failed to get data for item with value {item_id}. Status code: {response.status_code}")
153
+ if not menu_item['Option_groups']:
154
+ menu_item["Has_choice"] = False
155
+
156
+
157
+ else:
158
+ menu_item["Has_choice"] = True
159
+ menu_items_list.append(menu_item)
160
+ else:
161
+ print(f"Failed to get menu items. Status code: {response.status_code}")
162
+
163
+ # Convert the list of menu items into JSON
164
+ # with open(f'{restaurant_code}.json', 'w') as json_file:
165
+ # json.dump(menu_items_list, json_file, indent=4)
166
+
167
+ # print(f"Menu items saved to {restaurant_code}.json file.")
168
+
169
+ # main_excel_file(url_input)
170
+ return menu_items_list,location_area,location_name
171
+
172
+
173
+ def extract_category(items,url):
174
+ db = client.Restaurants_in_dubai
175
+ collection = db.Category
176
+ json_data = items
177
+ restro_collection = db['Restaurant_details']
178
+
179
+
180
+ def item_extract_category(json,name):
181
+ items_list = []
182
+ for item in json:
183
+ if item['Category'] == name:
184
+ item_info = {
185
+ "id": item["ID"],
186
+ "name": item["Item_name"]
187
+ }
188
+ items_list.append(item_info)
189
+
190
+ return items_list
191
+
192
+
193
+ categories_info = []
194
+ existing_categories = set()
195
+ for item in json_data:
196
+ name = item['Category']
197
+ if name not in existing_categories:
198
+ # category_position = 1
199
+ # category_isActive = True
200
+ items = item_extract_category(json_data, name)
201
+
202
+ restro = restro_collection.find_one({'url': url})
203
+ if restro:
204
+ restro_id = restro['_id']
205
+
206
+ restro_ref_id = restro_id
207
+
208
+ category_info = {
209
+ 'category_name': name,
210
+ 'items': items,
211
+ 'restro_ref_id' : restro_ref_id
212
+ }
213
+ categories_info.append(category_info)
214
+ existing_categories.add(name)
215
+ result = collection.insert_many(categories_info)
216
+ print("Inserted Category document IDs:", result.inserted_ids)
217
+
218
+ def extract_item(items,url):
219
+ db = client.Restaurants_in_dubai
220
+ collection = db.Items
221
+ json_data = items
222
+ category_collection = db['Category']
223
+ restro_collection = db['Restaurant_details']
224
+
225
+ items_info = []
226
+
227
+ for item in json_data:
228
+ id = item['ID']
229
+ name = item['Item_name']
230
+ description = item['Description']
231
+ price = item['Price']
232
+ img_url= item['Image']
233
+ category_name = item['Category']
234
+ item_position = item['Item_position']
235
+ has_choices = item['Has_choice']
236
+ if has_choices == True:
237
+ modifires = []
238
+ for option_group in item.get('Option_groups', []):
239
+ modifires.append(option_group['Option_group_name'])
240
+ else:
241
+ modifires = "None"
242
+
243
+ restro = restro_collection.find_one({'url': url})
244
+ if restro:
245
+ restro_id = restro['_id']
246
+
247
+ restro_ref_id = restro_id
248
+
249
+ category = category_collection.find_one(
250
+ {
251
+ 'category_name': category_name,
252
+ 'restro_ref_id': restro_ref_id
253
+ })
254
+ if category:
255
+ category_id = category['_id']
256
+
257
+ ref_id = category_id
258
+
259
+
260
+ item_info = {
261
+ 'item_id': id,
262
+ 'name': name,
263
+ 'description': description,
264
+ 'amount': price,
265
+ 'image': img_url,
266
+ 'category_name':category_name,
267
+ 'modifires':modifires,
268
+ 'ref_id_category' : ref_id,
269
+ 'restro_ref_id' : restro_ref_id
270
+
271
+ }
272
+ items_info.append(item_info)
273
+ result = collection.insert_many(items_info)
274
+ print("Inserted Items document IDs:", result.inserted_ids)
275
+
276
+
277
+ def extract_option_group(items,url):
278
+ db = client.Restaurants_in_dubai
279
+ collection = db.OptionGroup
280
+ option_group_info = []
281
+ existing_categories = []
282
+ option_group_names_list = []
283
+ restro_collection = db['Restaurant_details']
284
+
285
+ for item in items:
286
+ for option_group in item.get('Option_groups', []):
287
+ flag = 1
288
+ restro = restro_collection.find_one({'url': url})
289
+ if restro:
290
+ restro_id = restro['_id']
291
+
292
+ restro_ref_id = restro_id
293
+
294
+
295
+ option_group_name = option_group["Option_group_name"]
296
+ min_quantity = option_group["Min"]
297
+ max_quantity = option_group["Max"]
298
+ option_names = []
299
+ option_names_trial_all = []
300
+
301
+ for option in option_group.get("Option_group_names", []):
302
+ option_name = option["Option_name"]
303
+ option_price = option["Option_price"]
304
+ option_names.append(option_name)
305
+ option_names_trial = {
306
+ 'Option_name': option_name,
307
+ 'Option_price': option_price
308
+ }
309
+ option_names_trial_all.append(option_names_trial)
310
+ item_id = []
311
+ for item in items:
312
+ for option_group in item.get('Option_groups', []):
313
+ option_group_name2 = option_group["Option_group_name"]
314
+ option_group_names1 = option_group.get('Option_group_names',[])
315
+ if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['Option_name'])) == sorted(option_names_trial_all, key=lambda x: x['Option_name']) ):
316
+ item_id.append(item['ID'])
317
+
318
+ option_group_information = {
319
+ "option_group_name" : option_group_name,
320
+ "min_quantity" : min_quantity,
321
+ "max_quantity" : max_quantity,
322
+ "option_names" : option_names,
323
+ "item_id" : item_id,
324
+ "restro_ref_id" : restro_ref_id
325
+ }
326
+ option_group_check ={
327
+ "option_group_name" : option_group_name,
328
+ "option_names" : option_names
329
+ }
330
+ for category in existing_categories:
331
+ if (category['option_group_name'] == option_group_check['option_group_name'] and
332
+ sorted(category['option_names']) == sorted(option_group_check['option_names'])):
333
+ flag = 0
334
+ if flag==1:
335
+ option_group_info.append(option_group_information)
336
+
337
+ existing_categories.append(option_group_check)
338
+ result = collection.insert_many(option_group_info)
339
+ print("Inserted Option_Groups document IDs:", result.inserted_ids)
340
+
341
+ def extract_option_group_names(items,url):
342
+ db = client.Restaurants_in_dubai
343
+ collection = db.OptionName
344
+ option_group_collection = db['OptionGroup']
345
+ json_data = items
346
+ option_names = []
347
+ option_names_list = []
348
+ restro_collection = db['Restaurant_details']
349
+
350
+
351
+ for item in json_data:
352
+ has_choices = item['Has_choice']
353
+ if has_choices == True:
354
+ flag = 1
355
+ for option_group in item.get('Option_groups', []):
356
+ option_group_name2 = option_group["Option_group_name"]
357
+ for option in option_group.get("Option_group_names", []):
358
+ restro = restro_collection.find_one({'url': url})
359
+ if restro:
360
+ restro_id = restro['_id']
361
+
362
+ restro_ref_id = restro_id
363
+ option_name = option["Option_name"]
364
+ option_price = option["Option_price"]
365
+
366
+ query = {
367
+
368
+ 'restro_ref_id': restro_id,
369
+ 'option_names': option_name
370
+ }
371
+
372
+ matching_documents = option_group_collection.find(query)
373
+ matching_ids = [doc['_id'] for doc in matching_documents]
374
+ # if option_group_name_true:
375
+ # option_group_id = option_group_name_true['_id']
376
+ # restro = restro_collection.find_one({'url': url})
377
+ option_group_name = {
378
+ "option_name" : option_name,
379
+ "option_price" : option_price,
380
+ "ref_option_group_id" : matching_ids,
381
+ "restro_ref_id" : restro_ref_id
382
+ }
383
+ if (option_name in option_names_list ):
384
+ flag = 0
385
+ if flag==1:
386
+ option_names.append(option_group_name)
387
+ option_names_list.append(option_name)
388
+
389
+ result = collection.insert_many(option_names)
390
+ print("Inserted Options_name document IDs:", result.inserted_ids)
391
+
392
+ items,location_area,location_name = main_json(url)
393
+ restro_details(url,location_area,location_name)
394
+ extract_category(items,url)
395
+ extract_item(items,url)
396
+ extract_option_group(items,url)
397
+ extract_option_group_names(items,url)
398
+ return True
Talabat_files/Multi_urls.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def multi_url(URLS):
2
+ from Talabat_files.Talabat_Everything_From_url import Talabat_EveryThing_From_url
3
+ for URL in URLS:
4
+ Talabat_EveryThing_From_url(URL)
5
+
6
+
Talabat_files/Talabat_Everything_From_url.py ADDED
@@ -0,0 +1,523 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # url = input("enter Restro url : ")
2
+
3
+ import json
4
+ import pymongo
5
+ import requests
6
+ import json
7
+ from bs4 import BeautifulSoup
8
+ import pandas as pd
9
+ from urllib.parse import urlparse
10
+ from config import settings
11
+
12
+ def Talabat_EveryThing_From_url(url):
13
+
14
+ client = pymongo.MongoClient(settings.MONGO_URL)
15
+ def restro_details(url,location):
16
+ import requests
17
+ from bs4 import BeautifulSoup
18
+ from urllib.parse import urlparse
19
+ import pymongo
20
+ db = client.Restaurants_in_dubai
21
+ collection = db.Restaurant_details
22
+
23
+ headers = {
24
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
25
+ }
26
+
27
+ response = requests.get(url, headers=headers)
28
+ soup = BeautifulSoup(response.text, 'html.parser')
29
+
30
+ parsed_url = urlparse(url,location)
31
+ platform_name = parsed_url.netloc.split('.')[1].capitalize()
32
+ restaurant_tag = soup.find('h1', {'data-testid': 'restaurant-title'})
33
+ restaurant_name = restaurant_tag.contents[0].strip()
34
+ restaurant_details = {
35
+ 'url' : url,
36
+ 'platform_name' : platform_name,
37
+ "restaurant_name" : restaurant_name,
38
+ "location" : location
39
+ }
40
+ result = collection.insert_one(restaurant_details)
41
+ print("Inserted document IDs:", result.inserted_id)
42
+
43
+
44
+ def excel_extract(url):
45
+ import requests
46
+ import json
47
+ import pandas as pd
48
+ from bs4 import BeautifulSoup
49
+ from urllib.parse import urlparse
50
+
51
+ def extract_choices(item_id,restaurant_id):
52
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
53
+ response = requests.get(choice_url, headers=headers)
54
+ if response.status_code == 200:
55
+ choice_data = response.json()
56
+ return choice_data
57
+ else:
58
+ print("Failed to retrieve choices for item ID:", item_id)
59
+ return None
60
+
61
+
62
+ url = url
63
+ parsed_url = urlparse(url)
64
+ path_segments = parsed_url.path.split('/')
65
+
66
+ restaurant_id = path_segments[-2]
67
+ restaurant_name = path_segments[-1]
68
+
69
+ headers = {
70
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
71
+ }
72
+ response = requests.get(url, headers=headers)
73
+ category_name_list = []
74
+ j = 0
75
+
76
+ if response.status_code == 200:
77
+ soup = BeautifulSoup(response.text, 'html.parser')
78
+ script_tag = soup.find('script', id='__NEXT_DATA__')
79
+
80
+ if script_tag:
81
+ json_content = json.loads(script_tag.string.strip())
82
+
83
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
84
+
85
+ menu_items_list = []
86
+ for i,item in enumerate(menu_data):
87
+ item_id = item['id']
88
+ name = item['name']
89
+ description = item['description']
90
+ price = item['price']
91
+ original_image = item['originalImage']
92
+ original_section = item['originalSection']
93
+ has_choices = item['hasChoices']
94
+
95
+ if original_section not in category_name_list:
96
+ category_name_list.append(original_section)
97
+ j = j+1
98
+ Category_position = j
99
+
100
+ else:
101
+ Category_position = j
102
+
103
+ menu_item = {
104
+ "Category": original_section,
105
+ "Category_positon": Category_position,
106
+ "Item_name": name,
107
+ "Item_position": i+1,
108
+ "Image": original_image,
109
+ "description": description,
110
+ "price": price,
111
+ "id": item_id
112
+ }
113
+
114
+ menu_items_list.append(menu_item)
115
+
116
+ if has_choices:
117
+ choice_data = extract_choices(item_id,restaurant_id)
118
+ if choice_data:
119
+ choice_for_item = choice_data["result"].get('choiceForItem', [])[0] # Accessing the first element of the list if exists
120
+ choice_sections = choice_for_item.get('choiceSections', [])
121
+ grouped_data = {}
122
+ for option_group in choice_sections:
123
+ option_group_name = option_group.get('nm', '')
124
+ min_quantity = option_group.get('mnq', '')
125
+ max_quantity = option_group.get('mxq', '')
126
+ options = option_group.get('ich', [])
127
+ for option_index, option in enumerate(options, start=1):
128
+ option_name = option.get('nm', '')
129
+ option_price = option.get('pr', '')
130
+ grouped_data.setdefault(option_group_name, {
131
+ "Option_group_name": option_group_name,
132
+ "Min_quantity": min_quantity,
133
+ "Max_quantity": max(max_quantity,1)
134
+ })
135
+ grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name
136
+ grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price
137
+
138
+ menu_items_list.extend(grouped_data.values())
139
+
140
+ df = pd.DataFrame(menu_items_list)
141
+ if "Max_quantity" in df.columns:
142
+ max_column_index = df.columns.get_loc('Max_quantity')
143
+ for i in range(max_column_index + 1, len(df.columns)):
144
+ df.rename(columns={df.columns[i]: ''}, inplace=True)
145
+
146
+ option_group_name_index = df.columns.get_loc('Option_group_name')
147
+ for i in range(option_group_name_index, len(df.columns)):
148
+ df.iloc[:, i] = df.iloc[:, i].shift(-1)
149
+
150
+ excel_file = f"{restaurant_name}_menu.xlsx"
151
+ df.to_excel(excel_file, index=False)
152
+ print(f"Menu items saved to {excel_file}")
153
+ else:
154
+ print("Script tag with id '__NEXT_DATA__' not found.")
155
+ else:
156
+ print(f"Failed to get menu items. Status code: {response.status_code}")
157
+
158
+
159
+
160
+ def main(url):
161
+ import json
162
+ import pymongo
163
+ import requests
164
+ import json
165
+ from bs4 import BeautifulSoup
166
+ import pandas as pd
167
+ from urllib.parse import urlparse
168
+
169
+
170
+ def extract_choices(item_id,restaurant_id):
171
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
172
+ response = requests.get(choice_url, headers=headers)
173
+ if response.status_code == 200:
174
+ soup = BeautifulSoup(response.text, 'html.parser')
175
+ choice_data = json.loads(soup.string.strip())
176
+ return choice_data
177
+ else:
178
+ print("Failed to retrieve choices for item ID:", item_id)
179
+ return None
180
+
181
+
182
+ url = url
183
+ parsed_url = urlparse(url)
184
+ path_segments = parsed_url.path.split('/')
185
+
186
+ restaurant_id = path_segments[-2]
187
+ restaurant_name = path_segments[-1]
188
+
189
+ headers = {
190
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
191
+ }
192
+ response = requests.get(url, headers=headers)
193
+
194
+ if response.status_code == 200:
195
+ soup = BeautifulSoup(response.text, 'html.parser')
196
+ script_tag = soup.find('script', id='__NEXT_DATA__')
197
+ j = 0
198
+ category_name_list = []
199
+
200
+
201
+ if script_tag:
202
+ json_content = json.loads(script_tag.string.strip())
203
+
204
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
205
+ location = json_content.get('props', {}).get('pageProps', {}).get('gtmEventData', {}).get('area', {}).get('name')
206
+
207
+ items = []
208
+ for i,item in enumerate(menu_data):
209
+ item_id = item['id']
210
+ name = item['name']
211
+ description = item['description']
212
+ price = item['price']
213
+ original_image = item['originalImage']
214
+ original_section = item['originalSection']
215
+ Category_id = item['sectionId']
216
+ has_choices = item['hasChoices']
217
+
218
+ if original_section not in category_name_list:
219
+ category_name_list.append(original_section)
220
+ j = j+1
221
+ Category_position = j
222
+
223
+ else:
224
+ Category_position = j
225
+
226
+
227
+ item_info = {
228
+ 'category': original_section,
229
+ 'category_postion': Category_position,
230
+ 'category_id': Category_id,
231
+ 'item_name': name,
232
+ 'item_position': i,
233
+ 'item_image': original_image,
234
+ 'description': description,
235
+ 'price': price,
236
+ 'item_id': item_id,
237
+ 'has_choices' : has_choices,
238
+ }
239
+
240
+ if has_choices:
241
+ option_groups_info = []
242
+ choice_data = extract_choices(item_id,restaurant_id)
243
+ if choice_data:
244
+ choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list
245
+ choice_sections = choice_for_item['choiceSections']
246
+ for option_group in choice_sections:
247
+ option_group_info = {
248
+ 'option_group_name': option_group['nm'],
249
+ 'min_quantity': option_group['mnq'],
250
+ 'max_quantity': option_group['mxq'],
251
+ 'option_group_names': []
252
+ }
253
+ if 'ich' in option_group:
254
+ option_group_names = option_group['ich']
255
+ for option_group_name in option_group_names:
256
+ option_group_name_info = {
257
+ 'option_name': option_group_name['nm'],
258
+ 'option_price': option_group_name['pr']
259
+ }
260
+ option_group_info['option_group_names'].append(option_group_name_info)
261
+ option_groups_info.append(option_group_info)
262
+ item_info['option_groups'] = option_groups_info
263
+ items.append(item_info)
264
+ # with open(f"{restaurant_name}.json", "w") as json_file:
265
+ # json.dump(items, json_file, indent=4)
266
+ print(f"josn named {restaurant_name}.json created succesfully")
267
+
268
+ # excel_extract(url)
269
+ # print("excel Created succesfully")
270
+ else:
271
+ print("Script tag with id '__NEXT_DATA__' not found.")
272
+ else:
273
+ print("Failed to retrieve the webpage. Status code:", response.status_code)
274
+ return items,json_content,location
275
+
276
+ def extract_item(items,url):
277
+ db = client.Restaurants_in_dubai
278
+ collection = db.Items
279
+ json_data = items
280
+ category_collection = db['Category']
281
+ restro_collection = db['Restaurant_details']
282
+
283
+
284
+ items_info = []
285
+
286
+ for item in json_data:
287
+ id = item['item_id']
288
+ name = item['item_name']
289
+ description = item['description']
290
+ price = item['price']
291
+ img_url= item['item_image']
292
+ category_name = item['category']
293
+ item_position = item['item_position']
294
+ has_choices = item['has_choices']
295
+ if has_choices == True:
296
+ modifires = []
297
+ for option_group in item.get('option_groups', []):
298
+ modifires.append(option_group['option_group_name'])
299
+ else:
300
+ modifires = "None"
301
+
302
+ restro = restro_collection.find_one({'url': url})
303
+ if restro:
304
+ restro_id = restro['_id']
305
+
306
+ restro_ref_id = restro_id
307
+
308
+ category = category_collection.find_one({
309
+ 'category_name': category_name,
310
+ 'restro_ref_id': restro_ref_id
311
+
312
+ })
313
+ if category:
314
+ category_id = category['_id']
315
+ else:
316
+ category_id = None
317
+
318
+ ref_id = category_id
319
+
320
+
321
+ item_info = {
322
+ 'item_id': id,
323
+ 'name': name,
324
+ 'description': description,
325
+ 'amount': price,
326
+ 'image': img_url,
327
+ 'category_name':category_name,
328
+ 'item_position':item_position,
329
+ 'modifires':modifires,
330
+ 'ref_id_category' : ref_id,
331
+ 'restro_ref_id' : restro_ref_id
332
+
333
+ }
334
+ items_info.append(item_info)
335
+ result = collection.insert_many(items_info)
336
+ print("Inserted document IDs:", result.inserted_ids)
337
+
338
+ def extract_category(items,json_content,url):
339
+ db = client.Restaurants_in_dubai
340
+ collection = db.Category
341
+ json_data = items
342
+ restro_collection = db['Restaurant_details']
343
+
344
+ def item_extract_category(json_content,name):
345
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['categories']
346
+ items_list = []
347
+ for category in menu_data:
348
+ if category["name"] == name:
349
+ for item in category["items"]:
350
+ item_info = {
351
+ "id": item["id"],
352
+ "name": item["name"]
353
+ }
354
+ items_list.append(item_info)
355
+
356
+ return items_list
357
+
358
+
359
+ categories_info = []
360
+
361
+ existing_categories = set()
362
+
363
+ for item in json_data:
364
+ name = item['category']
365
+ if name not in existing_categories:
366
+ category_positin = 1
367
+ category_isActive = True
368
+ items = item_extract_category(json_content, name)
369
+
370
+ restro = restro_collection.find_one({'url': url})
371
+ if restro:
372
+ restro_id = restro['_id']
373
+
374
+ restro_ref_id = restro_id
375
+
376
+ category_info = {
377
+ 'category_name': name,
378
+ 'category_position': category_positin,
379
+ 'category_isActive': category_isActive,
380
+ 'items': items,
381
+ 'restro_ref_id' : restro_ref_id
382
+ }
383
+ categories_info.append(category_info)
384
+ existing_categories.add(name)
385
+ result = collection.insert_many(categories_info)
386
+ print("Inserted document IDs:", result.inserted_ids)
387
+
388
+ def extract_option_group(items,url):
389
+ db = client.Restaurants_in_dubai
390
+ collection = db.OptionGroup
391
+ option_group_info = []
392
+ existing_categories = []
393
+ option_group_names_list = []
394
+ restro_collection = db['Restaurant_details']
395
+
396
+ for item in items:
397
+
398
+ for option_group in item.get('option_groups', []):
399
+ flag = 1
400
+ restro = restro_collection.find_one({'url': url})
401
+
402
+ if restro:
403
+ restro_id = restro['_id']
404
+ restro_ref_id = restro_id
405
+
406
+
407
+ option_group_name = option_group["option_group_name"]
408
+ min_quantity = option_group["min_quantity"]
409
+ max_quantity = option_group["max_quantity"]
410
+ option_names = []
411
+ option_names_trial_all = []
412
+
413
+ for option in option_group.get("option_group_names", []):
414
+ option_name = option["option_name"]
415
+ option_price = option["option_price"]
416
+ option_names.append(option_name)
417
+ option_names_trial = {
418
+ 'option_name': option_name,
419
+ 'option_price': option_price
420
+ }
421
+ option_names_trial_all.append(option_names_trial)
422
+ item_id = []
423
+ for item in items:
424
+ for option_group in item.get('option_groups', []):
425
+ option_group_name2 = option_group["option_group_name"]
426
+ option_group_names1 = option_group.get('option_group_names',[])
427
+ if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['option_name'])) == sorted(option_names_trial_all, key=lambda x: x['option_name']) ):
428
+ item_id.append(item['item_id'])
429
+
430
+
431
+ option_group_information = {
432
+ "option_group_name" : option_group_name,
433
+ "min_quantity" : min_quantity,
434
+ "max_quantity" : max_quantity,
435
+ "option_names" : option_names,
436
+ "item_id" : item_id,
437
+ "restro_ref_id" : restro_ref_id
438
+ }
439
+ option_group_check ={
440
+ "option_group_name" : option_group_name,
441
+ "option_names" : option_names
442
+ }
443
+ for category in existing_categories:
444
+ if (category['option_group_name'] == option_group_check['option_group_name'] and
445
+ sorted(category['option_names']) == sorted(option_group_check['option_names'])):
446
+ flag = 0
447
+ if flag==1:
448
+ option_group_info.append(option_group_information)
449
+
450
+
451
+ existing_categories.append(option_group_check)
452
+ if option_group_info:
453
+ result = collection.insert_many(option_group_info)
454
+ print("Inserted document IDs:", result.inserted_ids)
455
+
456
+
457
+ def extract_option_group_names(items,url):
458
+ db = client.Restaurants_in_dubai
459
+ collection = db.OptionName
460
+ option_group_collection = db['OptionGroup']
461
+ restro_collection = db['Restaurant_details']
462
+ json_data = items
463
+ option_names = []
464
+ option_names_list = []
465
+
466
+ for item in json_data:
467
+ has_choices = item['has_choices']
468
+ if has_choices == True:
469
+ flag = 1
470
+
471
+ for option_group in item.get('option_groups', []):
472
+ option_group_name2 = option_group["option_group_name"]
473
+ for option in option_group.get("option_group_names", []):
474
+ restro = restro_collection.find_one({'url': url})
475
+ if restro:
476
+ restro_id = restro['_id']
477
+
478
+ restro_ref_id = restro_id
479
+ option_name = option["option_name"]
480
+ option_price = option["option_price"]
481
+
482
+ query = {
483
+
484
+ 'restro_ref_id': restro_id,
485
+ 'option_names': option_name
486
+ }
487
+
488
+
489
+ matching_documents = option_group_collection.find(query)
490
+ matching_ids = [doc['_id'] for doc in matching_documents]
491
+ # option_group_name_true = option_group_collection.find_one({'option_group_name': option_group_name2,
492
+ # 'restro_ref_id' : restro_id})
493
+ # if option_group_name_true:
494
+ # option_group_id = option_group_name_true['_id']
495
+
496
+ option_group_name = {
497
+ "option_name" : option_name,
498
+ "option_price" : option_price,
499
+ "ref_option_group_id" : matching_ids,
500
+ "restro_ref_id" : restro_ref_id
501
+ }
502
+ if (option_name in option_names_list ):
503
+ flag = 0
504
+
505
+ if flag==1:
506
+ option_names.append(option_group_name)
507
+
508
+ option_names_list.append(option_name)
509
+ if option_names:
510
+ result = collection.insert_many(option_names)
511
+ print("Inserted document IDs:", result.inserted_ids)
512
+
513
+
514
+
515
+ items,json_content,location = main(url)
516
+ restro_details(url,location)
517
+ extract_category(items,json_content,url)
518
+ extract_item(items,url)
519
+ extract_option_group(items,url)
520
+ extract_option_group_names(items,url)
521
+ return True
522
+
523
+ # main_all_extract(url)
Talabat_files/Talabat_Mongo_Data_add.py ADDED
@@ -0,0 +1,402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from config import settings
2
+ import pymongo
3
+ import json
4
+ import requests
5
+ import json
6
+ from bs4 import BeautifulSoup
7
+ import pandas as pd
8
+ from urllib.parse import urlparse
9
+
10
+ def Talabat_mongo_data_add(url):
11
+
12
+ client = pymongo.MongoClient(settings.MONGO_URL)
13
+
14
+ def restro_details(url,location):
15
+ db = client.Restaurants_in_dubai
16
+ collection = db.Restaurant_details
17
+
18
+ headers = {
19
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
20
+ }
21
+
22
+ response = requests.get(url, headers=headers)
23
+ soup = BeautifulSoup(response.text, 'html.parser')
24
+
25
+ parsed_url = urlparse(url,location)
26
+ platform_name = parsed_url.netloc.split('.')[1].capitalize()
27
+ restaurant_tag = soup.find('h1', {'data-testid': 'restaurant-title'})
28
+ restaurant_name = restaurant_tag.contents[0].strip()
29
+ restaurant_details = {
30
+ 'url' : url,
31
+ 'platform_name' : platform_name,
32
+ "restaurant_name" : restaurant_name,
33
+ "location" : location
34
+ }
35
+ result = collection.insert_one(restaurant_details)
36
+ print("Inserted document IDs:", result.inserted_id)
37
+
38
+
39
+
40
+ def main(url):
41
+
42
+
43
+
44
+ def extract_choices(item_id,restaurant_id):
45
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
46
+ response = requests.get(choice_url, headers=headers)
47
+ if response.status_code == 200:
48
+ soup = BeautifulSoup(response.text, 'html.parser')
49
+ choice_data = json.loads(soup.string.strip())
50
+ return choice_data
51
+ else:
52
+ print("Failed to retrieve choices for item ID:", item_id)
53
+ return None
54
+
55
+
56
+ url = url
57
+ parsed_url = urlparse(url)
58
+ path_segments = parsed_url.path.split('/')
59
+
60
+ restaurant_id = path_segments[-2]
61
+ restaurant_name = path_segments[-1]
62
+
63
+ headers = {
64
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
65
+ }
66
+ response = requests.get(url, headers=headers)
67
+
68
+ if response.status_code == 200:
69
+ soup = BeautifulSoup(response.text, 'html.parser')
70
+ script_tag = soup.find('script', id='__NEXT_DATA__')
71
+ j = 0
72
+ category_name_list = []
73
+
74
+
75
+ if script_tag:
76
+ json_content = json.loads(script_tag.string.strip())
77
+
78
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
79
+ location = json_content.get('props', {}).get('pageProps', {}).get('gtmEventData', {}).get('area', {}).get('name')
80
+
81
+ items = []
82
+ for i,item in enumerate(menu_data):
83
+ item_id = item['id']
84
+ name = item['name']
85
+ description = item['description']
86
+ price = item['price']
87
+ original_image = item['originalImage']
88
+ original_section = item['originalSection']
89
+ Category_id = item['sectionId']
90
+ has_choices = item['hasChoices']
91
+
92
+ if original_section not in category_name_list:
93
+ category_name_list.append(original_section)
94
+ j = j+1
95
+ Category_position = j
96
+
97
+ else:
98
+ Category_position = j
99
+
100
+
101
+ item_info = {
102
+ 'category': original_section,
103
+ 'category_postion': Category_position,
104
+ 'category_id': Category_id,
105
+ 'item_name': name,
106
+ 'item_position': i,
107
+ 'item_image': original_image,
108
+ 'description': description,
109
+ 'price': price,
110
+ 'item_id': item_id,
111
+ 'has_choices' : has_choices,
112
+ }
113
+
114
+ if has_choices:
115
+ option_groups_info = []
116
+ choice_data = extract_choices(item_id,restaurant_id)
117
+ if choice_data:
118
+ choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list
119
+ choice_sections = choice_for_item['choiceSections']
120
+ for option_group in choice_sections:
121
+ option_group_info = {
122
+ 'option_group_name': option_group['nm'],
123
+ 'min_quantity': option_group['mnq'],
124
+ 'max_quantity': option_group['mxq'],
125
+ 'option_group_names': []
126
+ }
127
+ if 'ich' in option_group:
128
+ option_group_names = option_group['ich']
129
+ for option_group_name in option_group_names:
130
+ option_group_name_info = {
131
+ 'option_name': option_group_name['nm'],
132
+ 'option_price': option_group_name['pr']
133
+ }
134
+ option_group_info['option_group_names'].append(option_group_name_info)
135
+ option_groups_info.append(option_group_info)
136
+ item_info['option_groups'] = option_groups_info
137
+ items.append(item_info)
138
+ # with open(f"{restaurant_name}.json", "w") as json_file:
139
+ # json.dump(items, json_file, indent=4)
140
+ # print(f"josn named {restaurant_name}.json created succesfully")
141
+
142
+ # excel_extract(url)
143
+ # print("excel Created succesfully")
144
+ else:
145
+ print("Script tag with id '__NEXT_DATA__' not found.")
146
+ else:
147
+ print("Failed to retrieve the webpage. Status code:", response.status_code)
148
+ return items,json_content,location
149
+
150
+ def extract_item(items,url):
151
+
152
+ db = client.Restaurants_in_dubai
153
+ collection = db.Items
154
+ json_data = items
155
+ category_collection = db['Category']
156
+ restro_collection = db['Restaurant_details']
157
+
158
+
159
+ items_info = []
160
+
161
+ for item in json_data:
162
+ id = item['item_id']
163
+ name = item['item_name']
164
+ description = item['description']
165
+ price = item['price']
166
+ img_url= item['item_image']
167
+ category_name = item['category']
168
+ item_position = item['item_position']
169
+ has_choices = item['has_choices']
170
+ if has_choices == True:
171
+ modifires = []
172
+ for option_group in item.get('option_groups', []):
173
+ modifires.append(option_group['option_group_name'])
174
+ else:
175
+ modifires = "None"
176
+
177
+ restro = restro_collection.find_one({'url': url})
178
+ if restro:
179
+ restro_id = restro['_id']
180
+
181
+ restro_ref_id = restro_id
182
+
183
+ category = category_collection.find_one({
184
+ 'category_name': category_name,
185
+ 'restro_ref_id': restro_ref_id
186
+ })
187
+ if category:
188
+ category_id = category['_id']
189
+ else:
190
+ category_id = None
191
+
192
+ ref_id = category_id
193
+
194
+
195
+ item_info = {
196
+ 'item_id': id,
197
+ 'name': name,
198
+ 'description': description,
199
+ 'amount': price,
200
+ 'image': img_url,
201
+ 'category_name':category_name,
202
+ 'item_position':item_position,
203
+ 'modifires':modifires,
204
+ 'ref_id_category' : ref_id,
205
+ 'restro_ref_id' : restro_ref_id
206
+
207
+ }
208
+ items_info.append(item_info)
209
+ result = collection.insert_many(items_info)
210
+ print("Inserted document IDs:", result.inserted_ids)
211
+
212
+ def extract_category(items,json_content,url):
213
+
214
+ db = client.Restaurants_in_dubai
215
+ collection = db.Category
216
+ json_data = items
217
+ restro_collection = db['Restaurant_details']
218
+
219
+ def item_extract_category(json_content,name):
220
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['categories']
221
+ items_list = []
222
+ for category in menu_data:
223
+ if category["name"] == name:
224
+ for item in category["items"]:
225
+ item_info = {
226
+ "id": item["id"],
227
+ "name": item["name"]
228
+ }
229
+ items_list.append(item_info)
230
+
231
+ return items_list
232
+
233
+
234
+ categories_info = []
235
+
236
+ existing_categories = set()
237
+
238
+ for item in json_data:
239
+ name = item['category']
240
+ if name not in existing_categories:
241
+ category_positin = 1
242
+ category_isActive = True
243
+ items = item_extract_category(json_content, name)
244
+
245
+ restro = restro_collection.find_one({'url': url})
246
+ if restro:
247
+ restro_id = restro['_id']
248
+
249
+ restro_ref_id = restro_id
250
+
251
+ category_info = {
252
+ 'category_name': name,
253
+ 'category_position': category_positin,
254
+ 'category_isActive': category_isActive,
255
+ 'items': items,
256
+ 'restro_ref_id' : restro_ref_id
257
+ }
258
+ categories_info.append(category_info)
259
+ existing_categories.add(name)
260
+ result = collection.insert_many(categories_info)
261
+ print("Inserted document IDs:", result.inserted_ids)
262
+
263
+ def extract_option_group(items,url):
264
+ db = client.Restaurants_in_dubai
265
+ collection = db.OptionGroup
266
+ option_group_info = []
267
+ existing_categories = []
268
+ option_group_names_list = []
269
+ restro_collection = db['Restaurant_details']
270
+
271
+ for item in items:
272
+
273
+ for option_group in item.get('option_groups', []):
274
+ flag = 1
275
+ restro = restro_collection.find_one({'url': url})
276
+
277
+ if restro:
278
+ restro_id = restro['_id']
279
+ restro_ref_id = restro_id
280
+
281
+
282
+ option_group_name = option_group["option_group_name"]
283
+ min_quantity = option_group["min_quantity"]
284
+ max_quantity = option_group["max_quantity"]
285
+ option_names = []
286
+ option_names_trial_all = []
287
+
288
+ for option in option_group.get("option_group_names", []):
289
+ option_name = option["option_name"]
290
+ option_price = option["option_price"]
291
+ option_names.append(option_name)
292
+ option_names_trial = {
293
+ 'option_name': option_name,
294
+ 'option_price': option_price
295
+ }
296
+ option_names_trial_all.append(option_names_trial)
297
+ item_id = []
298
+ for item in items:
299
+ for option_group in item.get('option_groups', []):
300
+ option_group_name2 = option_group["option_group_name"]
301
+ option_group_names1 = option_group.get('option_group_names',[])
302
+ if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['option_name'])) == sorted(option_names_trial_all, key=lambda x: x['option_name']) ):
303
+ item_id.append(item['item_id'])
304
+
305
+ restro = restro_collection.find_one({'url': url})
306
+ if restro:
307
+ restro_id = restro['_id']
308
+
309
+ restro_ref_id = restro_id
310
+
311
+
312
+ option_group_information = {
313
+ "option_group_name" : option_group_name,
314
+ "min_quantity" : min_quantity,
315
+ "max_quantity" : max_quantity,
316
+ "option_names" : option_names,
317
+ "item_id" : item_id,
318
+ "restro_ref_id" : restro_ref_id
319
+ }
320
+ option_group_check ={
321
+ "option_group_name" : option_group_name,
322
+ "option_names" : option_names
323
+ }
324
+ for category in existing_categories:
325
+ if (category['option_group_name'] == option_group_check['option_group_name'] and
326
+ sorted(category['option_names']) == sorted(option_group_check['option_names'])):
327
+ flag = 0
328
+ if flag==1:
329
+ option_group_info.append(option_group_information)
330
+
331
+
332
+ existing_categories.append(option_group_check)
333
+ if option_group_info:
334
+ result = collection.insert_many(option_group_info)
335
+ print("Inserted document IDs:", result.inserted_ids)
336
+
337
+
338
+ def extract_option_group_names(items,url):
339
+ db = client.Restaurants_in_dubai
340
+ collection = db.OptionName
341
+ option_group_collection = db['OptionGroup']
342
+ restro_collection = db['Restaurant_details']
343
+ json_data = items
344
+ option_names = []
345
+ option_names_list = []
346
+
347
+ for item in json_data:
348
+ has_choices = item['has_choices']
349
+ if has_choices == True:
350
+ flag = 1
351
+
352
+ for option_group in item.get('option_groups', []):
353
+ option_group_name2 = option_group["option_group_name"]
354
+ for option in option_group.get("option_group_names", []):
355
+ restro = restro_collection.find_one({'url': url})
356
+ if restro:
357
+ restro_id = restro['_id']
358
+
359
+ restro_ref_id = restro_id
360
+ option_name = option["option_name"]
361
+ option_price = option["option_price"]
362
+
363
+ query = {
364
+
365
+ 'restro_ref_id': restro_id,
366
+ 'option_names': option_name
367
+ }
368
+
369
+
370
+ matching_documents = option_group_collection.find(query)
371
+ matching_ids = [doc['_id'] for doc in matching_documents]
372
+ # option_group_name_true = option_group_collection.find_one({'option_group_name': option_group_name2,
373
+ # 'restro_ref_id' : restro_id})
374
+ # if option_group_name_true:
375
+ # option_group_id = option_group_name_true['_id']
376
+
377
+ option_group_name = {
378
+ "option_name" : option_name,
379
+ "option_price" : option_price,
380
+ "ref_option_group_id" : matching_ids,
381
+ "restro_ref_id" : restro_ref_id
382
+ }
383
+ if (option_name in option_names_list ):
384
+ flag = 0
385
+
386
+ if flag==1:
387
+ option_names.append(option_group_name)
388
+
389
+ option_names_list.append(option_name)
390
+ if option_names:
391
+ result = collection.insert_many(option_names)
392
+ print("Inserted document IDs:", result.inserted_ids)
393
+
394
+
395
+
396
+ items,json_content,location = main(url)
397
+ restro_details(url,location)
398
+ extract_category(items,json_content,url)
399
+ extract_item(items,url)
400
+ extract_option_group(items,url)
401
+ extract_option_group_names(items,url)
402
+ return True
Talabat_files/Talabat_excel_final.py ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def Talabat_excel_extract(url):
2
+ import requests
3
+ import json
4
+ import pandas as pd
5
+ from bs4 import BeautifulSoup
6
+ from urllib.parse import urlparse
7
+ from io import BytesIO
8
+
9
+ def extract_choices(item_id, restaurant_id):
10
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
11
+ response = requests.get(choice_url, headers=headers)
12
+ if response.status_code == 200:
13
+ choice_data = response.json()
14
+ return choice_data
15
+ else:
16
+ print("Failed to retrieve choices for item ID:", item_id)
17
+ return None
18
+
19
+ # url = input("Enter restaurant URL: ")
20
+ parsed_url = urlparse(url)
21
+ path_segments = parsed_url.path.split('/')
22
+
23
+ restaurant_id = path_segments[-2]
24
+ restaurant_name = path_segments[-1]
25
+
26
+ headers = {
27
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
28
+ }
29
+ response = requests.get(url, headers=headers)
30
+ j = 0
31
+ category_name_list = []
32
+
33
+ if response.status_code == 200:
34
+ soup = BeautifulSoup(response.text, 'html.parser')
35
+ script_tag = soup.find('script', id='__NEXT_DATA__')
36
+
37
+ if script_tag:
38
+ json_content = json.loads(script_tag.string.strip())
39
+
40
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
41
+
42
+ menu_items_list = []
43
+
44
+ for i,item in enumerate(menu_data):
45
+ item_id = item['id']
46
+ name = item['name']
47
+ description = item['description']
48
+ price = item['price']
49
+ original_image = item['originalImage']
50
+ original_section = item['originalSection']
51
+ has_choices = item['hasChoices']
52
+
53
+
54
+ if original_section not in category_name_list:
55
+ category_name_list.append(original_section)
56
+ j = j+1
57
+ Category_position = j
58
+
59
+ else:
60
+ Category_position = j
61
+
62
+
63
+ menu_item = {
64
+ "Category": original_section,
65
+ "Category_position": Category_position,
66
+ "Item_name": name,
67
+ "Item_position": i+1,
68
+ "Image": original_image,
69
+ "description": description,
70
+ "price": price,
71
+ "id": item_id
72
+ }
73
+
74
+ menu_items_list.append(menu_item)
75
+
76
+ if has_choices:
77
+ choice_data = extract_choices(item_id, restaurant_id)
78
+ if choice_data:
79
+ choice_for_item = choice_data["result"].get('choiceForItem', [])[0] # Accessing the first element of the list if exists
80
+ choice_sections = choice_for_item.get('choiceSections', [])
81
+ grouped_data = {}
82
+ for option_group in choice_sections:
83
+ option_group_name = option_group.get('nm', '')
84
+ min_quantity = option_group.get('mnq', '')
85
+ max_quantity = option_group.get('mxq', '')
86
+ options = option_group.get('ich', [])
87
+ for option_index, option in enumerate(options, start=1):
88
+ option_name = option.get('nm', '')
89
+ option_price = option.get('pr', '')
90
+ grouped_data.setdefault(option_group_name, {
91
+ "Option_group_name": option_group_name,
92
+ "Min_quantity": min_quantity,
93
+ "Max_quantity": max_quantity
94
+ })
95
+ grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name
96
+ grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price
97
+
98
+ menu_items_list.extend(grouped_data.values())
99
+
100
+ df = pd.DataFrame(menu_items_list)
101
+
102
+ if 'Max_quantity' in df.columns:
103
+ max_column_index = df.columns.get_loc('Max_quantity')
104
+ for i in range(max_column_index + 1, len(df.columns)):
105
+ df.rename(columns={df.columns[i]: ''}, inplace=True)
106
+
107
+ option_group_name_index = df.columns.get_loc('Option_group_name')
108
+ for i in range(option_group_name_index, len(df.columns)):
109
+ df.iloc[:, i] = df.iloc[:, i].shift(-1)
110
+
111
+ df_cleaned = df.dropna(how='all')
112
+ # excel_file = f"{restaurant_name}_menu.xlsx"
113
+ # df.to_excel(excel_file, index=False)
114
+ # print(f"Menu items saved to {excel_file}")
115
+ excel_file = BytesIO()
116
+ df_cleaned.to_excel(excel_file, index=False)
117
+ excel_file.seek(0) # Move to the beginning of the BytesIO stream
118
+
119
+ return excel_file, f"{restaurant_name}_menu.xlsx"
120
+ else:
121
+ print("Script tag with id '__NEXT_DATA__' not found.")
122
+ else:
123
+ print(f"Failed to get menu items. Status code: {response.status_code}")
Talabat_files/Talabat_json_final.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def Talabat_Json_extract(url):
2
+ import json
3
+ import requests
4
+ import json
5
+ from bs4 import BeautifulSoup
6
+ import pandas as pd
7
+ from urllib.parse import urlparse
8
+ from io import BytesIO
9
+
10
+ def extract_choices(item_id):
11
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
12
+ response = requests.get(choice_url, headers=headers)
13
+ if response.status_code == 200:
14
+ soup = BeautifulSoup(response.text, 'html.parser')
15
+ choice_data = json.loads(soup.string.strip())
16
+ return choice_data
17
+ else:
18
+ print("Failed to retrieve choices for item ID:", item_id)
19
+ return None
20
+
21
+
22
+ # url = input("enter retro URL : ")
23
+ parsed_url = urlparse(url)
24
+ path_segments = parsed_url.path.split('/')
25
+
26
+ restaurant_id = path_segments[-2]
27
+ restaurant_name = path_segments[-1]
28
+
29
+
30
+ headers = {
31
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
32
+ }
33
+ response = requests.get(url, headers=headers)
34
+
35
+ if response.status_code == 200:
36
+ soup = BeautifulSoup(response.text, 'html.parser')
37
+ script_tag = soup.find('script', id='__NEXT_DATA__')
38
+
39
+ if script_tag:
40
+ json_content = json.loads(script_tag.string.strip())
41
+
42
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
43
+
44
+ items = []
45
+ for item in menu_data:
46
+ item_id = item['id']
47
+ name = item['name']
48
+ description = item['description']
49
+ price = item['price']
50
+ original_image = item['originalImage']
51
+ original_section = item['originalSection']
52
+ has_choices = item['hasChoices']
53
+
54
+ item_info = {
55
+ 'category': original_section,
56
+ 'category_postion': 1,
57
+ 'item_name': name,
58
+ 'item_position': 1,
59
+ 'original_image': original_image,
60
+ 'description': description,
61
+ 'price': price,
62
+ 'item_id': item_id,
63
+ }
64
+
65
+ if has_choices:
66
+ option_groups_info = []
67
+ choice_data = extract_choices(item_id,restaurant_id)
68
+ if choice_data:
69
+ choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list
70
+ choice_sections = choice_for_item['choiceSections']
71
+ for option_group in choice_sections:
72
+ option_group_info = {
73
+ 'option_group_name': option_group['nm'],
74
+ 'min_quantity': option_group['mnq'],
75
+ 'max_quantity': option_group['mxq'],
76
+ 'option_group_names': []
77
+ }
78
+ if 'ich' in option_group:
79
+ option_group_names = option_group['ich']
80
+ for option_group_name in option_group_names:
81
+ option_group_name_info = {
82
+ 'option_name': option_group_name['nm'],
83
+ 'option_price': option_group_name['pr']
84
+ }
85
+ option_group_info['option_group_names'].append(option_group_name_info)
86
+ option_groups_info.append(option_group_info)
87
+ item_info['option_groups'] = option_groups_info
88
+ items.append(item_info)
89
+ # with open(f"{restaurant_name}.json", "w") as json_file:
90
+ # json.dump(items, json_file, indent=4)
91
+ json_content = json.dumps(items, indent=4)
92
+
93
+ # Create BytesIO object to hold the JSON content
94
+ output = BytesIO()
95
+ output.write(json_content.encode('utf-8'))
96
+ output.seek(0)
97
+
98
+ return restaurant_name,output
99
+
100
+
101
+
102
+
103
+ else:
104
+ print("Script tag with id '__NEXT_DATA__' not found.")
105
+ else:
106
+ print("Failed to retrieve the webpage. Status code:", response.status_code)
107
+ return True
Talabat_files/Talabat_multy_location.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ def Talabat_multy_LOCATION(locations):
2
+ from Talabat_files.location_link_fetch import Talabat_location_links
3
+ for loaction in locations:
4
+ Talabat_location_links(loaction)
5
+
6
+
Talabat_files/__init__.py ADDED
File without changes
Talabat_files/__pycache__/Talabat_Everything_From_url.cpython-312.pyc ADDED
Binary file (19.1 kB). View file
 
Talabat_files/__pycache__/Talabat_Mongo_Data_add.cpython-312.pyc ADDED
Binary file (13.9 kB). View file
 
Talabat_files/__pycache__/Talabat_excel_final.cpython-312.pyc ADDED
Binary file (5.33 kB). View file
 
Talabat_files/__pycache__/Talabat_json_final.cpython-312.pyc ADDED
Binary file (4.04 kB). View file
 
Talabat_files/__pycache__/__init__.cpython-312.pyc ADDED
Binary file (172 Bytes). View file
 
Talabat_files/__pycache__/location_link_fetch.cpython-312.pyc ADDED
Binary file (4.74 kB). View file
 
Talabat_files/__pycache__/main_function.cpython-312.pyc ADDED
Binary file (19 kB). View file
 
Talabat_files/location_fetch_function.py ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def location_all(location,inside_location):
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from Main_function_For_location import main
5
+
6
+
7
+ location =location.replace(" ", "").lower()
8
+ inside_location = inside_location.replace(" ", "-").lower()
9
+
10
+ url = f"https://www.eateasy.ae/{location}/{inside_location}-restaurants"
11
+
12
+ payload = {}
13
+ files = {}
14
+ headers = {
15
+ 'Cookie': 'eateasy-ae-website=lai3mvcb9hd99nnivbt0pn68ibfjsd6g',
16
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36'
17
+ }
18
+
19
+ response = requests.post(url, headers=headers, data=payload, files=files)
20
+ hrefs = []
21
+ i = 1
22
+
23
+ if response.status_code == 200:
24
+ soup = BeautifulSoup(response.text, 'html.parser')
25
+
26
+ links = soup.find_all('a', class_='eateasy-restaurant-box-inner')
27
+ for link in links:
28
+ href = link.get('href')
29
+ if href:
30
+ hrefs.append(href)
31
+ print(f"Found {len(href)} Links")
32
+ if hrefs:
33
+ for href in hrefs:
34
+ print(href)
35
+ main(href,location,inside_location)
36
+ i = i+1
37
+ if i == 21:
38
+ break
39
+ print(" DONE ")
40
+
41
+ else:
42
+ print(f"Failed to retrieve the page. Status code: {response.status_code}")
Talabat_files/location_link_fetch.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def Talabat_location_links(location):
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ import re
5
+ from Talabat_files.main_function import main_all_extract
6
+ # Function to convert text to kebab case
7
+ def to_kebab_case(text: str) -> str:
8
+ text = text.lower() # Convert to lowercase
9
+ text = re.sub(r'\s+', '-', text) # Replace spaces with dashes
10
+ text = re.sub(r'[^a-zA-Z0-9-]', '', text) # Remove non-alphanumeric characters
11
+ return text
12
+
13
+ # Function to fetch location code
14
+ def location_code(name: str) -> int:
15
+ url = "https://www.talabat.com/nextApi/location/country-areas/4"
16
+ headers = {
17
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
18
+ 'Accept-Language': 'en-US',
19
+ 'Cookie': '__cf_bm=_mZexeD7G1shqe.pO2VMyyTt.NCBm6Tt1saj4P.MQDk-1716294515-1.0.1.1-3nmOwncAyqyw5pak8.DFvLZjZYKQv4yW16.7jRG6groz0869YtR83cK8uy_VR70ebNG7mLMgRPRHxhTndBWJUGFQUlk; AWSALB=mS5qvg1IVA+fbSMlvbIt2qv6TrpVbPLMF2YveRn8OJQCwHyPfnEVsOhdmOOFINqeB93N6kx9xPbjNggoc9pBVTfQWCVeVayfSPMeOBQKSXFZ/ppIMGjDMTX3o+BP; AWSALBCORS=mS5qvg1IVA+fbSMlvbIt2qv6TrpVbPLMF2YveRn8OJQCwHyPfnEVsOhdmOOFINqeB93N6kx9xPbjNggoc9pBVTfQWCVeVayfSPMeOBQKSXFZ/ppIMGjDMTX3o+BP; dhhPerseusGuestId=1716186301782.797703851415064000.04xvpecztvkg; next-i18next=en; tlb_country=uae; tlb_lng=en'
20
+ }
21
+ try:
22
+ response = requests.get(url, headers=headers)
23
+ response.raise_for_status() # Raise an error for bad status codes
24
+ jsondata = response.json()
25
+
26
+ for data in jsondata.get('areas', []):
27
+ if name.lower() == data['cityName'].lower() or name.lower() == data['name'].lower():
28
+ return data['id']
29
+ return None
30
+ except requests.RequestException as e:
31
+ print(f"An error occurred: {e}")
32
+ return None
33
+
34
+
35
+ location_name_input = location.strip()
36
+ code = location_code(location_name_input)
37
+
38
+
39
+ if code is not None:
40
+ location_name = to_kebab_case(location_name_input)
41
+ url = f"https://www.talabat.com/uae/restaurants/{code}/{location_name}"
42
+ headers = {
43
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/98.0.4758.102 Safari/537.36',
44
+ 'Cookie': '__cf_bm=pUdFBMFwQSdL5rRym8o1cr4lAKqQLqpz661jnZp9q_M-1716188106-1.0.1.1-P989rK8kUjlPuclwf7GVdcxuX9SXwyF8r4eelpQtsA7M1yCX82n4b74tSDaNTPdILiY7SHC8qXP61WKIFX1RcsV4cdtVsKmEq9Adzzo1Mxs; AWSALB=HWer/ht79Hy5nHr3VOFEqnaCc0UyDHrkYx/xyAYcq3bIe8SybCBoqXaT9B4yG/08Xhy2KaCjgh75x3No44IhEJbg8cy/n+opvCEW2lfo4TOW2MBorkBSyQ1GQ6HY; AWSALBCORS=HWer/ht79Hy5nHr3VOFEqnaCc0UyDHrkYx/xyAYcq3bIe8SybCBoqXaT9B4yG/08Xhy2KaCjgh75x3No44IhEJbg8cy/n+opvCEW2lfo4TOW2MBorkBSyQ1GQ6HY; dhhPerseusGuestId=1716186301782.797703851415064000.04xvpecztvkg; next-i18next=en; tlb_country=uae; tlb_lng=en'
45
+ }
46
+ try:
47
+ response = requests.get(url, headers=headers)
48
+ response.raise_for_status() # Raise an error for bad status codes
49
+ soup = BeautifulSoup(response.text, 'html.parser')
50
+
51
+ restaurant_list_div = soup.find('div', class_='restaurant-list')
52
+ if restaurant_list_div:
53
+ links = restaurant_list_div.find_all('a', {'data-testid': 'restaurant-a'})
54
+ hrefs = ['https://www.talabat.com' + link.get('href') for link in links]
55
+ print(f"Found {len(hrefs)} Links")
56
+ for href in hrefs:
57
+ print(href)
58
+ main_all_extract(href)
59
+ else:
60
+ print("No restaurant list found.")
61
+ except requests.RequestException as e:
62
+ print(f"An error occurred while fetching the restaurant data: {e}")
63
+ else:
64
+ print(f"Location '{location_name_input}' not found.")
Talabat_files/main_function.py ADDED
@@ -0,0 +1,517 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # url = input("enter Restro url : ")
2
+ import requests
3
+ from bs4 import BeautifulSoup
4
+ from urllib.parse import urlparse
5
+ from config import settings
6
+ import pymongo
7
+ import json
8
+ import pandas as pd
9
+
10
+
11
+ def main_all_extract(url):
12
+
13
+ client = pymongo.MongoClient(settings.MONGO_URL)
14
+
15
+ def restro_details(url,location):
16
+
17
+
18
+ db = client.Restaurants_in_dubai
19
+ collection = db.Restaurant_details
20
+
21
+ headers = {
22
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
23
+ }
24
+
25
+ response = requests.get(url, headers=headers)
26
+ soup = BeautifulSoup(response.text, 'html.parser')
27
+
28
+ parsed_url = urlparse(url,location)
29
+ platform_name = parsed_url.netloc.split('.')[1].capitalize()
30
+ restaurant_tag = soup.find('h1', {'data-testid': 'restaurant-title'})
31
+ restaurant_name = restaurant_tag.contents[0].strip()
32
+ restaurant_details = {
33
+ 'url' : url,
34
+ 'platform_name' : platform_name,
35
+ "restaurant_name" : restaurant_name,
36
+ "location" : location
37
+ }
38
+ result = collection.insert_one(restaurant_details)
39
+ print("Inserted document IDs:", result.inserted_id)
40
+
41
+
42
+ def excel_extract(url):
43
+
44
+
45
+ def extract_choices(item_id,restaurant_id):
46
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
47
+ response = requests.get(choice_url, headers=headers)
48
+ if response.status_code == 200:
49
+ choice_data = response.json()
50
+ return choice_data
51
+ else:
52
+ print("Failed to retrieve choices for item ID:", item_id)
53
+ return None
54
+
55
+
56
+ url = url
57
+ parsed_url = urlparse(url)
58
+ path_segments = parsed_url.path.split('/')
59
+
60
+ restaurant_id = path_segments[-2]
61
+ restaurant_name = path_segments[-1]
62
+
63
+ headers = {
64
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
65
+ }
66
+ response = requests.get(url, headers=headers)
67
+ category_name_list = []
68
+ j = 0
69
+
70
+ if response.status_code == 200:
71
+ soup = BeautifulSoup(response.text, 'html.parser')
72
+ script_tag = soup.find('script', id='__NEXT_DATA__')
73
+
74
+ if script_tag:
75
+ json_content = json.loads(script_tag.string.strip())
76
+
77
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
78
+
79
+ menu_items_list = []
80
+ for i,item in enumerate(menu_data):
81
+ item_id = item['id']
82
+ name = item['name']
83
+ description = item['description']
84
+ price = item['price']
85
+ original_image = item['originalImage']
86
+ original_section = item['originalSection']
87
+ has_choices = item['hasChoices']
88
+
89
+ if original_section not in category_name_list:
90
+ category_name_list.append(original_section)
91
+ j = j+1
92
+ Category_position = j
93
+
94
+ else:
95
+ Category_position = j
96
+
97
+ menu_item = {
98
+ "Category": original_section,
99
+ "Category_positon": Category_position,
100
+ "Item_name": name,
101
+ "Item_position": i,
102
+ "Image": original_image,
103
+ "description": description,
104
+ "price": price,
105
+ "id": item_id
106
+ }
107
+
108
+ menu_items_list.append(menu_item)
109
+
110
+ if has_choices:
111
+ choice_data = extract_choices(item_id,restaurant_id)
112
+ if choice_data:
113
+ choice_for_item = choice_data["result"].get('choiceForItem', [])[0] # Accessing the first element of the list if exists
114
+ choice_sections = choice_for_item.get('choiceSections', [])
115
+ grouped_data = {}
116
+ for option_group in choice_sections:
117
+ option_group_name = option_group.get('nm', '')
118
+ min_quantity = option_group.get('mnq', '')
119
+ max_quantity = option_group.get('mxq', '')
120
+ options = option_group.get('ich', [])
121
+ for option_index, option in enumerate(options, start=1):
122
+ option_name = option.get('nm', '')
123
+ option_price = option.get('pr', '')
124
+ grouped_data.setdefault(option_group_name, {
125
+ "Option_group_name": option_group_name,
126
+ "Min_quantity": min_quantity,
127
+ "Max_quantity": max(max_quantity,1)
128
+ })
129
+ grouped_data[option_group_name][f"Option_{option_index}_Name"] = option_name
130
+ grouped_data[option_group_name][f"Option_{option_index}_Price"] = option_price
131
+
132
+ menu_items_list.extend(grouped_data.values())
133
+
134
+ df = pd.DataFrame(menu_items_list)
135
+ if "Max_quantity" in df.columns:
136
+ max_column_index = df.columns.get_loc('Max_quantity')
137
+ for i in range(max_column_index + 1, len(df.columns)):
138
+ df.rename(columns={df.columns[i]: ''}, inplace=True)
139
+
140
+ option_group_name_index = df.columns.get_loc('Option_group_name')
141
+ for i in range(option_group_name_index, len(df.columns)):
142
+ df.iloc[:, i] = df.iloc[:, i].shift(-1)
143
+
144
+ excel_file = f"{restaurant_name}_menu.xlsx"
145
+ df.to_excel(excel_file, index=False)
146
+ print(f"Menu items saved to {excel_file}")
147
+ else:
148
+ print("Script tag with id '__NEXT_DATA__' not found.")
149
+ else:
150
+ print(f"Failed to get menu items. Status code: {response.status_code}")
151
+
152
+
153
+
154
+ def main(url):
155
+
156
+
157
+ def extract_choices(item_id,restaurant_id):
158
+ choice_url = f"https://www.talabat.com/nextMenuApi/v2/branches/{restaurant_id}/menu/{item_id}/choices"
159
+ response = requests.get(choice_url, headers=headers)
160
+ if response.status_code == 200:
161
+ soup = BeautifulSoup(response.text, 'html.parser')
162
+ choice_data = json.loads(soup.string.strip())
163
+ return choice_data
164
+ else:
165
+ print("Failed to retrieve choices for item ID:", item_id)
166
+ return None
167
+
168
+
169
+ url = url
170
+ parsed_url = urlparse(url)
171
+ path_segments = parsed_url.path.split('/')
172
+
173
+ restaurant_id = path_segments[-2]
174
+ restaurant_name = path_segments[-1]
175
+
176
+ headers = {
177
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
178
+ }
179
+ response = requests.get(url, headers=headers)
180
+
181
+ if response.status_code == 200:
182
+ soup = BeautifulSoup(response.text, 'html.parser')
183
+ script_tag = soup.find('script', id='__NEXT_DATA__')
184
+ j = 0
185
+ category_name_list = []
186
+
187
+
188
+ if script_tag:
189
+ json_content = json.loads(script_tag.string.strip())
190
+
191
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['items']
192
+ location = json_content.get('props', {}).get('pageProps', {}).get('gtmEventData', {}).get('area', {}).get('name')
193
+
194
+ items = []
195
+ for i,item in enumerate(menu_data):
196
+ item_id = item['id']
197
+ name = item['name']
198
+ description = item['description']
199
+ price = item['price']
200
+ original_image = item['originalImage']
201
+ original_section = item['originalSection']
202
+ Category_id = item['sectionId']
203
+ has_choices = item['hasChoices']
204
+
205
+ if original_section not in category_name_list:
206
+ category_name_list.append(original_section)
207
+ j = j+1
208
+ Category_position = j
209
+
210
+ else:
211
+ Category_position = j
212
+
213
+
214
+ item_info = {
215
+ 'category': original_section,
216
+ 'category_postion': Category_position,
217
+ 'category_id': Category_id,
218
+ 'item_name': name,
219
+ 'item_position': i,
220
+ 'item_image': original_image,
221
+ 'description': description,
222
+ 'price': price,
223
+ 'item_id': item_id,
224
+ 'has_choices' : has_choices,
225
+ }
226
+
227
+ if has_choices:
228
+ option_groups_info = []
229
+ choice_data = extract_choices(item_id,restaurant_id)
230
+ if choice_data:
231
+ choice_for_item = choice_data["result"]['choiceForItem'][0] # Accessing the first element of the list
232
+ choice_sections = choice_for_item['choiceSections']
233
+ for option_group in choice_sections:
234
+ option_group_info = {
235
+ 'option_group_name': option_group['nm'],
236
+ 'min_quantity': option_group['mnq'],
237
+ 'max_quantity': option_group['mxq'],
238
+ 'option_group_names': []
239
+ }
240
+ if 'ich' in option_group:
241
+ option_group_names = option_group['ich']
242
+ for option_group_name in option_group_names:
243
+ option_group_name_info = {
244
+ 'option_name': option_group_name['nm'],
245
+ 'option_price': option_group_name['pr']
246
+ }
247
+ option_group_info['option_group_names'].append(option_group_name_info)
248
+ option_groups_info.append(option_group_info)
249
+ item_info['option_groups'] = option_groups_info
250
+ items.append(item_info)
251
+ # with open(f"{restaurant_name}.json", "w") as json_file:
252
+ # json.dump(items, json_file, indent=4)
253
+ print(f"josn named {restaurant_name}.json created succesfully")
254
+
255
+
256
+ # excel_extract(url)
257
+ # print("excel Created succesfully")
258
+ else:
259
+ print("Script tag with id '__NEXT_DATA__' not found.")
260
+ else:
261
+ print("Failed to retrieve the webpage. Status code:", response.status_code)
262
+ return items,json_content,location
263
+
264
+ def extract_item(items,url):
265
+ db = client.Restaurants_in_dubai
266
+ collection = db.Items
267
+ json_data = items
268
+ category_collection = db['Category']
269
+ restro_collection = db['Restaurant_details']
270
+
271
+
272
+ items_info = []
273
+
274
+ for item in json_data:
275
+ id = item['item_id']
276
+ name = item['item_name']
277
+ description = item['description']
278
+ price = item['price']
279
+ img_url= item['item_image']
280
+ category_name = item['category']
281
+ item_position = item['item_position']
282
+ has_choices = item['has_choices']
283
+ if has_choices == True:
284
+ modifires = []
285
+ for option_group in item.get('option_groups', []):
286
+ modifires.append(option_group['option_group_name'])
287
+ else:
288
+ modifires = "None"
289
+
290
+ restro = restro_collection.find_one({'url': url})
291
+ if restro:
292
+ restro_id = restro['_id']
293
+
294
+ restro_ref_id = restro_id
295
+
296
+ category = category_collection.find_one({
297
+ 'category_name': category_name,
298
+ 'restro_ref_id': restro_ref_id
299
+ })
300
+ if category:
301
+ category_id = category['_id']
302
+ else:
303
+ category_id = None
304
+
305
+ ref_id = category_id
306
+
307
+
308
+ item_info = {
309
+ 'item_id': id,
310
+ 'name': name,
311
+ 'description': description,
312
+ 'amount': price,
313
+ 'image': img_url,
314
+ 'category_name':category_name,
315
+ 'item_position':item_position,
316
+ 'modifires':modifires,
317
+ 'ref_id_category' : ref_id,
318
+ 'restro_ref_id' : restro_ref_id
319
+
320
+ }
321
+ items_info.append(item_info)
322
+ result = collection.insert_many(items_info)
323
+ print("Inserted document IDs:", result.inserted_ids)
324
+
325
+ def extract_category(items,json_content,url):
326
+ db = client.Restaurants_in_dubai
327
+ collection = db.Category
328
+ json_data = items
329
+ restro_collection = db['Restaurant_details']
330
+
331
+ def item_extract_category(json_content,name):
332
+ menu_data = json_content['props']['pageProps']['initialMenuState']['menuData']['categories']
333
+ items_list = []
334
+ for category in menu_data:
335
+ if category["name"] == name:
336
+ for item in category["items"]:
337
+ item_info = {
338
+ "id": item["id"],
339
+ "name": item["name"]
340
+ }
341
+ items_list.append(item_info)
342
+
343
+ return items_list
344
+
345
+
346
+ categories_info = []
347
+
348
+ existing_categories = set()
349
+
350
+ for item in json_data:
351
+ name = item['category']
352
+ if name not in existing_categories:
353
+ category_positin = 1
354
+ category_isActive = True
355
+ items = item_extract_category(json_content, name)
356
+
357
+ restro = restro_collection.find_one({'url': url})
358
+ if restro:
359
+ restro_id = restro['_id']
360
+
361
+ restro_ref_id = restro_id
362
+
363
+ category_info = {
364
+ 'category_name': name,
365
+ 'category_position': category_positin,
366
+ 'category_isActive': category_isActive,
367
+ 'items': items,
368
+ 'restro_ref_id' : restro_ref_id
369
+ }
370
+ categories_info.append(category_info)
371
+ existing_categories.add(name)
372
+ result = collection.insert_many(categories_info)
373
+ print("Inserted document IDs:", result.inserted_ids)
374
+
375
+ def extract_option_group(items,url):
376
+ db = client.Restaurants_in_dubai
377
+ collection = db.OptionGroup
378
+ option_group_info = []
379
+ existing_categories = []
380
+ option_group_names_list = []
381
+ restro_collection = db['Restaurant_details']
382
+
383
+ for item in items:
384
+
385
+ for option_group in item.get('option_groups', []):
386
+ flag = 1
387
+ restro = restro_collection.find_one({'url': url})
388
+
389
+ if restro:
390
+ restro_id = restro['_id']
391
+ restro_ref_id = restro_id
392
+
393
+
394
+ option_group_name = option_group["option_group_name"]
395
+ min_quantity = option_group["min_quantity"]
396
+ max_quantity = option_group["max_quantity"]
397
+ option_names = []
398
+ option_names_trial_all = []
399
+
400
+ for option in option_group.get("option_group_names", []):
401
+ option_name = option["option_name"]
402
+ option_price = option["option_price"]
403
+ option_names.append(option_name)
404
+ option_names_trial = {
405
+ 'option_name': option_name,
406
+ 'option_price': option_price
407
+ }
408
+ option_names_trial_all.append(option_names_trial)
409
+ item_id = []
410
+ for item in items:
411
+ for option_group in item.get('option_groups', []):
412
+ option_group_name2 = option_group["option_group_name"]
413
+ option_group_names1 = option_group.get('option_group_names',[])
414
+ if(option_group_name2 == option_group_name and (sorted(option_group_names1, key=lambda x: x['option_name'])) == sorted(option_names_trial_all, key=lambda x: x['option_name']) ):
415
+ item_id.append(item['item_id'])
416
+
417
+ restro = restro_collection.find_one({'url': url})
418
+ if restro:
419
+ restro_id = restro['_id']
420
+
421
+ restro_ref_id = restro_id
422
+
423
+
424
+ option_group_information = {
425
+ "option_group_name" : option_group_name,
426
+ "min_quantity" : min_quantity,
427
+ "max_quantity" : max_quantity,
428
+ "option_names" : option_names,
429
+ "item_id" : item_id,
430
+ "restro_ref_id" : restro_ref_id
431
+ }
432
+ option_group_check ={
433
+ "option_group_name" : option_group_name,
434
+ "option_names" : option_names
435
+ }
436
+ for category in existing_categories:
437
+ if (category['option_group_name'] == option_group_check['option_group_name'] and
438
+ sorted(category['option_names']) == sorted(option_group_check['option_names'])):
439
+ flag = 0
440
+ if flag==1:
441
+ option_group_info.append(option_group_information)
442
+
443
+
444
+ existing_categories.append(option_group_check)
445
+ if option_group_info:
446
+ result = collection.insert_many(option_group_info)
447
+ print("Inserted document IDs:", result.inserted_ids)
448
+
449
+
450
+ def extract_option_group_names(items,url):
451
+ db = client.Restaurants_in_dubai
452
+ collection = db.OptionName
453
+ option_group_collection = db['OptionGroup']
454
+ restro_collection = db['Restaurant_details']
455
+ json_data = items
456
+ option_names = []
457
+ option_names_list = []
458
+
459
+ for item in json_data:
460
+ has_choices = item['has_choices']
461
+ if has_choices == True:
462
+ flag = 1
463
+
464
+ for option_group in item.get('option_groups', []):
465
+ option_group_name2 = option_group["option_group_name"]
466
+ for option in option_group.get("option_group_names", []):
467
+ restro = restro_collection.find_one({'url': url})
468
+ if restro:
469
+ restro_id = restro['_id']
470
+
471
+ restro_ref_id = restro_id
472
+ option_name = option["option_name"]
473
+ option_price = option["option_price"]
474
+
475
+ query = {
476
+
477
+ 'restro_ref_id': restro_id,
478
+ 'option_names': option_name
479
+ }
480
+
481
+
482
+ matching_documents = option_group_collection.find(query)
483
+ matching_ids = [doc['_id'] for doc in matching_documents]
484
+ # option_group_name_true = option_group_collection.find_one({'option_group_name': option_group_name2,
485
+ # 'restro_ref_id' : restro_id})
486
+ # if option_group_name_true:
487
+ # option_group_id = option_group_name_true['_id']
488
+
489
+ option_group_name = {
490
+ "option_name" : option_name,
491
+ "option_price" : option_price,
492
+ "ref_option_group_id" : matching_ids,
493
+ "restro_ref_id" : restro_ref_id
494
+ }
495
+ if (option_name in option_names_list ):
496
+ flag = 0
497
+
498
+ if flag==1:
499
+ option_names.append(option_group_name)
500
+
501
+ option_names_list.append(option_name)
502
+
503
+ if option_names:
504
+ result = collection.insert_many(option_names)
505
+ print("Inserted document IDs:", result.inserted_ids)
506
+
507
+
508
+
509
+ items,json_content,location = main(url)
510
+ restro_details(url,location)
511
+ extract_category(items,json_content,url)
512
+ extract_item(items,url)
513
+ extract_option_group(items,url)
514
+ extract_option_group_names(items,url)
515
+ return True
516
+
517
+ # main_all_extract(url)
app.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException,Response
2
+ from io import BytesIO
3
+ from pydantic import BaseModel
4
+ from EatEasy_EveryThing_From_url import EatEasy_EveryThing_From_url
5
+ from EatEasy_excel_only import EatEasy_excel_extract
6
+ from EatEasy_Json_only import EatEasy_Json_extract
7
+ from location_fetch_function import location_all
8
+ from Mongo_Data_add import EatEasy_Mongo_Data_add
9
+ from Talabat_files.Talabat_Everything_From_url import Talabat_EveryThing_From_url
10
+ from Talabat_files.Talabat_excel_final import Talabat_excel_extract
11
+ from Talabat_files.Talabat_json_final import Talabat_Json_extract
12
+ from Talabat_files.location_link_fetch import Talabat_location_links
13
+ from Talabat_files.Talabat_Mongo_Data_add import Talabat_mongo_data_add
14
+ from Talabat_files.Multi_urls import multi_url
15
+ from Talabat_files.Talabat_multy_location import Talabat_multy_LOCATION
16
+ from Eateasy_Multi_Urls import multi_urls
17
+ from starlette.responses import RedirectResponse
18
+
19
+ app = FastAPI()
20
+
21
+
22
+ class EatEasyUrlInput0(BaseModel):
23
+ url: list
24
+
25
+ class EatEasyUrlInput(BaseModel):
26
+ url: str
27
+
28
+ class EatEasyUrlInput1(BaseModel):
29
+ url: str
30
+
31
+ class EatEasyUrlInput2(BaseModel):
32
+ url: str
33
+
34
+ class Location_input(BaseModel):
35
+ location: str
36
+ inside_location: str
37
+
38
+ class EatEasyUrlInput3(BaseModel):
39
+ url: str
40
+
41
+ class UrlInputTalabat1(BaseModel):
42
+ url: str
43
+ class UrlInputTalabat2(BaseModel):
44
+ url: str
45
+ class UrlInputTalabat3(BaseModel):
46
+ url: str
47
+ class UrlInputTalabat4(BaseModel):
48
+ location: list
49
+ class UrlInputTalabat5(BaseModel):
50
+ url: str
51
+
52
+ class UrlInputTalabat6(BaseModel):
53
+ url: list
54
+
55
+ @app.get("/")
56
+ def Docsmain():
57
+ return RedirectResponse(url="/docs")
58
+
59
+
60
+ # @app.post("/EatEasy_Excel_Json_MongoData_From_URL",tags=["EatEasy"])
61
+ # def extract_eat_easy_data(url_input: EatEasyUrlInput):
62
+ # try:
63
+ # data = EatEasy_EveryThing_From_url(url_input.url)
64
+ # return {"message": "Extraction process completed successfully", "data": data}
65
+ # except HTTPException as e:
66
+ # raise e
67
+ # except Exception as e:
68
+ # raise HTTPException(status_code=500, detail=str(e))
69
+
70
+
71
+ @app.post("/EatEasy_Mongo_from_Multi_urls",tags=["EatEasy"])
72
+ def EatEasy_Mongo_from_Multi_Urls(url_input: EatEasyUrlInput0):
73
+ try:
74
+ data = multi_urls(url_input.url)
75
+ return {"message": "Extraction process completed successfully", "data": data}
76
+ except HTTPException as e:
77
+ raise e
78
+ except Exception as e:
79
+ raise HTTPException(status_code=500, detail=str(e))
80
+
81
+
82
+ @app.post("/EatEasy_Extract_Excel_only",tags=["EatEasy"])
83
+ def extract_EatEasy_Excel_only(url_input: EatEasyUrlInput1):
84
+ try:
85
+ output, filename = EatEasy_excel_extract(url_input.url)
86
+ headers = {
87
+ 'Content-Disposition': f'attachment; filename="{filename}"'
88
+ }
89
+ return Response(content=output.getvalue(), media_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet', headers=headers)
90
+ except HTTPException as e:
91
+ raise e
92
+ except Exception as e:
93
+ raise HTTPException(status_code=500, detail=str(e))
94
+
95
+
96
+ # @app.post("/EatEasy_Extract_Json_only",tags=["EatEasy"])
97
+ # def extract_EatEasy_Json_only(url_input: EatEasyUrlInput2):
98
+ # try:
99
+ # json_data, restaurant_code = EatEasy_Json_extract(url_input.url)
100
+
101
+ # # Create an in-memory binary stream
102
+ # json_file = BytesIO()
103
+ # json_file.write(json_data.encode('utf-8'))
104
+ # json_file.seek(0)
105
+
106
+ # file_name = f"{restaurant_code}.json"
107
+
108
+ # headers = {
109
+ # 'Content-Disposition': f'attachment; filename="{file_name}"'
110
+ # }
111
+
112
+ # return Response(content=json_file.getvalue(), media_type='application/json', headers=headers)
113
+ # except HTTPException as e:
114
+ # raise e
115
+ # except Exception as e:
116
+ # raise HTTPException(status_code=500, detail=str(e))
117
+
118
+
119
+
120
+
121
+ @app.post("/EatEasy_Excel_Json_MongoData_From_Location",tags=["EatEasy"])
122
+ def extract_data(url_input: Location_input):
123
+ try:
124
+ data = location_all(url_input.location , url_input.inside_location)
125
+ return {"message": "Extraction process completed successfully", "data": data}
126
+ except HTTPException as e:
127
+ raise e
128
+ except Exception as e:
129
+ raise HTTPException(status_code=500, detail=str(e))
130
+
131
+
132
+ # @app.post("/EatEasy_Mongo_Data_Add_from_url",tags=["EatEasy"])
133
+ # def EatEasy_Mongo_Data_add_fun(url_input: EatEasyUrlInput3):
134
+ # try:
135
+ # data = EatEasy_Mongo_Data_add(url_input.url)
136
+ # return {"message": "Extraction process completed successfully", "data": data}
137
+ # except HTTPException as e:
138
+ # raise e
139
+ # except Exception as e:
140
+ # raise HTTPException(status_code=500, detail=str(e))
141
+
142
+
143
+
144
+ # @app.post("/Talabat_MongoData_From_URL", tags=["Talabat"])
145
+ # def extract_Talabat_data_all(url_input: UrlInputTalabat1):
146
+ # try:
147
+ # data = Talabat_EveryThing_From_url(url_input.url)
148
+ # return {"message": "Extraction process completed successfully", "data": data}
149
+ # except HTTPException as e:
150
+ # raise e
151
+ # except Exception as e:
152
+ # raise HTTPException(status_code=500, detail=str(e))
153
+
154
+
155
+ @app.post("/Talabat_Extract_excel_only", tags=["Talabat"])
156
+ def extract_Talabat_Excel_only(url_input: UrlInputTalabat2):
157
+ try:
158
+ excel_file, filename = Talabat_excel_extract(url_input.url)
159
+
160
+ response = Response(content=excel_file.getvalue())
161
+ response.headers["Content-Disposition"] = f"attachment; filename={filename}"
162
+ response.headers["Content-Type"] = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
163
+
164
+ return response
165
+ except HTTPException as e:
166
+ raise e
167
+ except Exception as e:
168
+ raise HTTPException(status_code=500, detail=str(e))
169
+
170
+
171
+ # @app.post("/Talabat_Extract_Json_only",tags=["Talabat"])
172
+ # def extract_Talabat_Json_only(url_input: UrlInputTalabat3):
173
+ # try:
174
+ # restaurant_name, output = Talabat_Json_extract(url_input.url)
175
+
176
+ # # Prepare response to return the JSON file as downloadable content
177
+ # response = Response(content=output.getvalue())
178
+ # response.headers["Content-Disposition"] = f"attachment; filename={restaurant_name}.json"
179
+ # response.headers["Content-Type"] = "application/json"
180
+
181
+ # return response
182
+ # except HTTPException as e:
183
+ # raise e
184
+ # except Exception as e:
185
+ # raise HTTPException(status_code=500, detail=str(e))
186
+
187
+
188
+ @app.post("/Talabat_MongoData_From_Location",tags=["Talabat"])
189
+ def extract_data_Talabat(url_input: UrlInputTalabat4):
190
+ try:
191
+ data = Talabat_multy_LOCATION(url_input.location)
192
+ return {"message": "Extraction process completed successfully", "data": data}
193
+ except HTTPException as e:
194
+ raise e
195
+ except Exception as e:
196
+ raise HTTPException(status_code=500, detail=str(e))
197
+
198
+
199
+ # @app.post("/Talabat_Mongo_Data_Add_from_url",tags=["Talabat"])
200
+ # def Talabat_Mongo_Data_add_fun(url_input: UrlInputTalabat5):
201
+ # try:
202
+ # data = Talabat_mongo_data_add(url_input.url)
203
+ # return {"message": "Extraction process completed successfully", "data": data}
204
+ # except HTTPException as e:
205
+ # raise e
206
+ # except Exception as e:
207
+ # raise HTTPException(status_code=500, detail=str(e))
208
+
209
+ @app.post("/Talabat_MongoData_From_Multy_URLs", tags=["Talabat"])
210
+ def Talabat_MongoData_From_URLs(url_input: UrlInputTalabat6):
211
+ try:
212
+ data = multi_url(url_input.url)
213
+ return {"message": "Extraction process completed successfully", "data": data}
214
+ except HTTPException as e:
215
+ raise e
216
+ except Exception as e:
217
+ raise HTTPException(status_code=500, detail=str(e))
218
+
219
+
220
+ if __name__ == "__main__":
221
+ import uvicorn
222
+ uvicorn.run(app, host="0.0.0.0", port=8000)
config.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic_settings import BaseSettings, SettingsConfigDict
2
+ import os
3
+
4
+ class Settings(BaseSettings):
5
+ MONGO_URL: str = os.environ.get("MONGO_URL", "FAKE")
6
+
7
+ model_config = SettingsConfigDict(env_file=".env")
8
+
9
+ settings = Settings()
mongo.py ADDED
@@ -0,0 +1 @@
 
 
1
+ MONGO_URL = "C:\Users\dipen\OneDrive\Desktop\Git_repo_hugging_face\talabat_eateasy\.env"
requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ pydantic
3
+ pymongo
4
+ requests
5
+ beautifulsoup4
6
+ pandas
7
+ starlette
8
+ openpyxl
9
+ pydantic_settings