wilmars commited on
Commit
cbd10bc
1 Parent(s): 925d3c3

add inididual page scrapper for each vehicle

Browse files
Files changed (4) hide show
  1. final_data.csv +0 -0
  2. src/extraction.py +27 -2
  3. src/transformation.py +2 -1
  4. testing.ipynb +178 -898
final_data.csv CHANGED
The diff for this file is too large to render. See raw diff
 
src/extraction.py CHANGED
@@ -1,22 +1,27 @@
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
 
 
 
4
 
5
  def main(product):
6
  list_df = []
7
  initial_df, follow = organize_page_data(product=product)
 
8
  list_df.append(initial_df)
9
  while True:
10
  print('follow_page: ', follow)
11
  follow_df, follow = organize_page_data(url=follow)
 
12
  list_df.append(follow_df)
13
  follow_df.rename(columns={None:product}, inplace=True)
14
  print(follow_df.columns)
15
  if follow is None:
16
  break
17
- return pd.concat(list_df)
18
 
19
- def organize_page_data(url: str = 'https://listado.mercadolibre.com.co' ,product= None):
20
  s = get_soup_by_url(url=url, product=product)
21
  products = get_all_product_names_for_page(s)
22
  follow = None
@@ -61,6 +66,26 @@ def get_all_product_urls_for_page(s):
61
  product_url = [h.get('href') for h in product_url]
62
  return product_url
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
 
65
  if __name__ == '__main__':
66
  data = main(product='carros')
 
1
  import requests
2
  from bs4 import BeautifulSoup
3
  import pandas as pd
4
+ import datetime
5
+
6
+ BASE_URL = 'https://listado.mercadolibre.com.co'
7
 
8
  def main(product):
9
  list_df = []
10
  initial_df, follow = organize_page_data(product=product)
11
+ initial_df['vehicle_info'] = initial_df['link'].apply(get_vehicle_info)
12
  list_df.append(initial_df)
13
  while True:
14
  print('follow_page: ', follow)
15
  follow_df, follow = organize_page_data(url=follow)
16
+ follow_df['vehicle_info'] = follow_df['link'].apply(get_vehicle_info)
17
  list_df.append(follow_df)
18
  follow_df.rename(columns={None:product}, inplace=True)
19
  print(follow_df.columns)
20
  if follow is None:
21
  break
22
+ return pd.concat(list_df)
23
 
24
+ def organize_page_data(url: str = BASE_URL ,product= None):
25
  s = get_soup_by_url(url=url, product=product)
26
  products = get_all_product_names_for_page(s)
27
  follow = None
 
66
  product_url = [h.get('href') for h in product_url]
67
  return product_url
68
 
69
+ def get_vehicle_info(url):
70
+ s = get_soup_by_url(url)
71
+ text = s.find_all('span', attrs= {'class':'ui-pdp-subtitle'})[0].text.replace('.', '')
72
+ parts = text.split(' · ')
73
+ location = s.find('p', attrs = {'class':'ui-seller-info__status-info__subtitle'}).text
74
+ pub_number = s.find_all('span', attrs= {'class':'ui-pdp-color--BLACK ui-pdp-family--SEMIBOLD'})[0].text.replace('#','')
75
+ year = parts[0].split(' | ')[0]
76
+ kilometrage = parts[0].split(' | ')[1].replace('km', '')
77
+ publication_date = parts[1]
78
+ output_dict = {
79
+ "Year": year,
80
+ "Kilometrage": kilometrage,
81
+ "Publication Date": publication_date,
82
+ "Location": location,
83
+ "Pub Number": pub_number,
84
+ "Created At": datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
85
+ }
86
+
87
+ return output_dict
88
+
89
 
90
  if __name__ == '__main__':
91
  data = main(product='carros')
src/transformation.py CHANGED
@@ -1,2 +1,3 @@
1
  import pandas as pd
2
- from extraction import *
 
 
1
  import pandas as pd
2
+ from extraction import *
3
+
testing.ipynb CHANGED
@@ -2,1102 +2,382 @@
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
- "execution_count": 7,
6
  "metadata": {},
7
- "outputs": [
8
- {
9
- "name": "stdout",
10
- "output_type": "stream",
11
- "text": [
12
- "The autoreload extension is already loaded. To reload it, use:\n",
13
- " %reload_ext autoreload\n"
14
- ]
15
- }
16
- ],
17
  "source": [
18
  "%load_ext autoreload\n",
19
  "%autoreload 2\n",
20
  "import requests\n",
21
  "from bs4 import BeautifulSoup\n",
22
- "import pandas as pd\n"
 
23
  ]
24
  },
25
  {
26
  "cell_type": "code",
27
- "execution_count": 8,
28
  "metadata": {},
29
  "outputs": [],
30
  "source": [
31
- "def get_data_from_li(html):\n",
32
- " li = html.find_all('li')\n",
33
- " data = [x.text for x in li]\n",
34
- " return data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  ]
36
  },
37
  {
38
  "cell_type": "code",
39
- "execution_count": 9,
40
  "metadata": {},
41
  "outputs": [],
42
  "source": [
43
- "url = 'https://listado.mercadolibre.com.co/carro'\n",
44
- "r = requests.get(url=url)\n",
45
- "\n",
46
- "s = BeautifulSoup(r.content, 'html.parser')"
47
  ]
48
  },
49
  {
50
  "cell_type": "code",
51
  "execution_count": 11,
52
  "metadata": {},
53
- "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
54
  "source": [
55
- "def get_all_data_from_page(s):\n",
56
- "\n",
57
- " vehicle_names = s.find_all('h2', attrs= {\"class\":\"ui-search-item__title\"})\n",
58
- " vehicle_names = [v.text for v in vehicle_names]\n",
59
- " vehicle_locations = s.find_all('span', attrs= {\"class\":\"ui-search-item__group__element ui-search-item__location\"})\n",
60
- " vehicle_locations = [v.text for v in vehicle_locations]\n",
61
- "\n",
62
- " href = s.find_all('a', attrs= {\"class\":\"ui-search-item__group__element ui-search-link__title-card ui-search-link\"})\n",
63
- " href = [h.get('href') for h in href]\n",
64
- "\n",
65
- " divs = s.find_all('div', attrs= {\"class\":\"ui-search-result__wrapper\"})\n",
66
- " prices = [int(div.find_all('span', attrs= {\"class\":\"andes-money-amount__fraction\"})[0].text.replace('.','')) for div in divs]\n",
67
- " siguiente = [div.find('a')['href']\n",
68
- " for div in s.find_all('li', attrs={\"class\":\"andes-pagination__button andes-pagination__button--next\"}) \n",
69
- " if div.find('a') is not None][0]\n",
70
- "\n",
71
- "\n",
72
- " df = pd.DataFrame({'vehiculo':vehicle_names, 'seller_city': vehicle_locations, 'price':prices ,'link':href})\n",
73
- "\n",
74
- " return df, siguiente"
75
  ]
76
  },
77
  {
78
  "cell_type": "code",
79
- "execution_count": 12,
80
  "metadata": {},
81
  "outputs": [],
82
  "source": [
83
- "df, siguiente = get_all_data_from_page(s)\n"
 
 
84
  ]
85
  },
86
  {
87
  "cell_type": "code",
88
- "execution_count": 252,
89
  "metadata": {},
90
  "outputs": [
91
  {
92
- "name": "stdout",
93
- "output_type": "stream",
94
- "text": [
95
- "1\n",
96
- "https://carros.mercadolibre.com.co/carro_Desde_49_NoIndex_True\n",
97
- "2\n",
98
- "https://carros.mercadolibre.com.co/carro_Desde_97_NoIndex_True\n",
99
- "3\n",
100
- "https://carros.mercadolibre.com.co/carro_Desde_145_NoIndex_True\n",
101
- "4\n",
102
- "https://carros.mercadolibre.com.co/carro_Desde_193_NoIndex_True\n",
103
- "5\n",
104
- "https://carros.mercadolibre.com.co/carro_Desde_241_NoIndex_True\n",
105
- "6\n",
106
- "https://carros.mercadolibre.com.co/carro_Desde_289_NoIndex_True\n",
107
- "7\n",
108
- "https://carros.mercadolibre.com.co/carro_Desde_337_NoIndex_True\n",
109
- "8\n",
110
- "https://carros.mercadolibre.com.co/carro_Desde_385_NoIndex_True\n",
111
- "9\n",
112
- "https://carros.mercadolibre.com.co/carro_Desde_433_NoIndex_True\n",
113
- "10\n",
114
- "https://carros.mercadolibre.com.co/carro_Desde_481_NoIndex_True\n",
115
- "11\n",
116
- "https://carros.mercadolibre.com.co/carro_Desde_529_NoIndex_True\n",
117
- "12\n",
118
- "https://carros.mercadolibre.com.co/carro_Desde_577_NoIndex_True\n",
119
- "13\n",
120
- "https://carros.mercadolibre.com.co/carro_Desde_625_NoIndex_True\n",
121
- "14\n",
122
- "https://carros.mercadolibre.com.co/carro_Desde_673_NoIndex_True\n",
123
- "15\n",
124
- "https://carros.mercadolibre.com.co/carro_Desde_721_NoIndex_True\n",
125
- "16\n",
126
- "https://carros.mercadolibre.com.co/carro_Desde_769_NoIndex_True\n",
127
- "17\n",
128
- "https://carros.mercadolibre.com.co/carro_Desde_817_NoIndex_True\n",
129
- "18\n",
130
- "https://carros.mercadolibre.com.co/carro_Desde_865_NoIndex_True\n",
131
- "19\n",
132
- "https://carros.mercadolibre.com.co/carro_Desde_913_NoIndex_True\n",
133
- "20\n",
134
- "https://carros.mercadolibre.com.co/carro_Desde_961_NoIndex_True\n",
135
- "21\n",
136
- "https://carros.mercadolibre.com.co/carro_Desde_1009_NoIndex_True\n",
137
- "22\n",
138
- "https://carros.mercadolibre.com.co/carro_Desde_1057_NoIndex_True\n",
139
- "23\n",
140
- "https://carros.mercadolibre.com.co/carro_Desde_1105_NoIndex_True\n",
141
- "24\n",
142
- "https://carros.mercadolibre.com.co/carro_Desde_1153_NoIndex_True\n",
143
- "25\n",
144
- "https://carros.mercadolibre.com.co/carro_Desde_1201_NoIndex_True\n",
145
- "26\n",
146
- "https://carros.mercadolibre.com.co/carro_Desde_1249_NoIndex_True\n",
147
- "27\n",
148
- "https://carros.mercadolibre.com.co/carro_Desde_1297_NoIndex_True\n",
149
- "28\n",
150
- "https://carros.mercadolibre.com.co/carro_Desde_1345_NoIndex_True\n",
151
- "29\n",
152
- "https://carros.mercadolibre.com.co/carro_Desde_1393_NoIndex_True\n",
153
- "30\n",
154
- "https://carros.mercadolibre.com.co/carro_Desde_1441_NoIndex_True\n",
155
- "31\n",
156
- "https://carros.mercadolibre.com.co/carro_Desde_1489_NoIndex_True\n",
157
- "32\n",
158
- "https://carros.mercadolibre.com.co/carro_Desde_1537_NoIndex_True\n",
159
- "33\n",
160
- "https://carros.mercadolibre.com.co/carro_Desde_1585_NoIndex_True\n",
161
- "34\n",
162
- "https://carros.mercadolibre.com.co/carro_Desde_1633_NoIndex_True\n",
163
- "35\n",
164
- "https://carros.mercadolibre.com.co/carro_Desde_1681_NoIndex_True\n",
165
- "36\n",
166
- "https://carros.mercadolibre.com.co/carro_Desde_1729_NoIndex_True\n",
167
- "37\n",
168
- "https://carros.mercadolibre.com.co/carro_Desde_1777_NoIndex_True\n",
169
- "38\n",
170
- "https://carros.mercadolibre.com.co/carro_Desde_1825_NoIndex_True\n",
171
- "39\n",
172
- "https://carros.mercadolibre.com.co/carro_Desde_1873_NoIndex_True\n",
173
- "40\n",
174
- "https://carros.mercadolibre.com.co/carro_Desde_1921_NoIndex_True\n",
175
- "41\n",
176
- "https://carros.mercadolibre.com.co/carro_Desde_1969_NoIndex_True\n",
177
- "42\n"
178
- ]
179
- },
180
- {
181
- "ename": "IndexError",
182
- "evalue": "list index out of range",
183
- "output_type": "error",
184
- "traceback": [
185
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
186
- "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
187
- "Cell \u001b[0;32mIn[252], line 9\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m r\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m200\u001b[39m:\n\u001b[1;32m 8\u001b[0m s \u001b[38;5;241m=\u001b[39m BeautifulSoup(r\u001b[38;5;241m.\u001b[39mcontent, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhtml.parser\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m----> 9\u001b[0m df, siguiente \u001b[38;5;241m=\u001b[39m \u001b[43mget_all_data_from_page\u001b[49m\u001b[43m(\u001b[49m\u001b[43ms\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 10\u001b[0m list_df\u001b[38;5;241m.\u001b[39mappend(df)\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28mprint\u001b[39m(siguiente) \n",
188
- "Cell \u001b[0;32mIn[250], line 13\u001b[0m, in \u001b[0;36mget_all_data_from_page\u001b[0;34m(s)\u001b[0m\n\u001b[1;32m 11\u001b[0m divs \u001b[38;5;241m=\u001b[39m s\u001b[38;5;241m.\u001b[39mfind_all(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mdiv\u001b[39m\u001b[38;5;124m'\u001b[39m, attrs\u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclass\u001b[39m\u001b[38;5;124m\"\u001b[39m:\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mui-search-result__wrapper\u001b[39m\u001b[38;5;124m\"\u001b[39m})\n\u001b[1;32m 12\u001b[0m prices \u001b[38;5;241m=\u001b[39m [\u001b[38;5;28mint\u001b[39m(div\u001b[38;5;241m.\u001b[39mfind_all(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mspan\u001b[39m\u001b[38;5;124m'\u001b[39m, attrs\u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mclass\u001b[39m\u001b[38;5;124m\"\u001b[39m:\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mandes-money-amount__fraction\u001b[39m\u001b[38;5;124m\"\u001b[39m})[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mtext\u001b[38;5;241m.\u001b[39mreplace(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m'\u001b[39m)) \u001b[38;5;28;01mfor\u001b[39;00m div \u001b[38;5;129;01min\u001b[39;00m divs]\n\u001b[0;32m---> 13\u001b[0m siguiente \u001b[38;5;241m=\u001b[39m \u001b[43m[\u001b[49m\u001b[43mdiv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43ma\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mhref\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdiv\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43ms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_all\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mli\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mattrs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mclass\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mandes-pagination__button andes-pagination__button--next\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mdiv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43ma\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mis\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m]\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[1;32m 18\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame({\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mvehiculo\u001b[39m\u001b[38;5;124m'\u001b[39m:vehicle_names, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mseller_city\u001b[39m\u001b[38;5;124m'\u001b[39m: vehicle_locations, \u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprice\u001b[39m\u001b[38;5;124m'\u001b[39m:prices ,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mlink\u001b[39m\u001b[38;5;124m'\u001b[39m:href})\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m df, siguiente\n",
189
- "\u001b[0;31mIndexError\u001b[0m: list index out of range"
190
- ]
191
  }
192
  ],
193
  "source": [
194
- "siguiente = url\n",
195
- "list_df = []\n",
196
- "i = 1\n",
197
- "while True:\n",
198
- " print(i)\n",
199
- " r = requests.get(url=siguiente)\n",
200
- " if r.status_code == 200:\n",
201
- " s = BeautifulSoup(r.content, 'html.parser')\n",
202
- " df, siguiente = get_all_data_from_page(s)\n",
203
- " list_df.append(df)\n",
204
- " print(siguiente)\n",
205
- " i=i+1\n",
206
- " else:\n",
207
- " break\n",
208
- " "
209
  ]
210
  },
211
  {
212
  "cell_type": "code",
213
- "execution_count": 258,
214
  "metadata": {},
215
  "outputs": [],
216
- "source": [
217
- "all_vehicles = pd.concat(list_df)"
218
- ]
219
  },
220
  {
221
  "cell_type": "code",
222
- "execution_count": 262,
223
  "metadata": {},
224
  "outputs": [
225
  {
226
  "data": {
227
- "text/html": [
228
- "<div>\n",
229
- "<style scoped>\n",
230
- " .dataframe tbody tr th:only-of-type {\n",
231
- " vertical-align: middle;\n",
232
- " }\n",
233
- "\n",
234
- " .dataframe tbody tr th {\n",
235
- " vertical-align: top;\n",
236
- " }\n",
237
- "\n",
238
- " .dataframe thead th {\n",
239
- " text-align: right;\n",
240
- " }\n",
241
- "</style>\n",
242
- "<table border=\"1\" class=\"dataframe\">\n",
243
- " <thead>\n",
244
- " <tr style=\"text-align: right;\">\n",
245
- " <th></th>\n",
246
- " <th>vehiculo</th>\n",
247
- " <th>seller_city</th>\n",
248
- " <th>price</th>\n",
249
- " <th>link</th>\n",
250
- " </tr>\n",
251
- " </thead>\n",
252
- " <tbody>\n",
253
- " <tr>\n",
254
- " <th>0</th>\n",
255
- " <td>Volkswagen Gol 1.6 Power</td>\n",
256
- " <td>Tunjuelito - Bogotá D.C.</td>\n",
257
- " <td>33800000</td>\n",
258
- " <td>https://carro.mercadolibre.com.co/MCO-13785388...</td>\n",
259
- " </tr>\n",
260
- " <tr>\n",
261
- " <th>34</th>\n",
262
- " <td>Volkswagen Gol 1.6 Power</td>\n",
263
- " <td>Kennedy - Bogotá D.C.</td>\n",
264
- " <td>25700000</td>\n",
265
- " <td>https://carro.mercadolibre.com.co/MCO-13699588...</td>\n",
266
- " </tr>\n",
267
- " </tbody>\n",
268
- "</table>\n",
269
- "</div>"
270
- ],
271
  "text/plain": [
272
- " vehiculo seller_city price \\\n",
273
- "0 Volkswagen Gol 1.6 Power Tunjuelito - Bogotá D.C. 33800000 \n",
274
- "34 Volkswagen Gol 1.6 Power Kennedy - Bogotá D.C. 25700000 \n",
275
- "\n",
276
- " link \n",
277
- "0 https://carro.mercadolibre.com.co/MCO-13785388... \n",
278
- "34 https://carro.mercadolibre.com.co/MCO-13699588... "
279
  ]
280
  },
281
- "execution_count": 262,
282
  "metadata": {},
283
  "output_type": "execute_result"
284
  }
285
  ],
286
  "source": [
287
- "all_vehicles[all_vehicles['vehiculo']=='Volkswagen Gol 1.6 Power']"
 
288
  ]
289
  },
290
  {
291
  "cell_type": "code",
292
- "execution_count": 236,
293
  "metadata": {},
294
  "outputs": [
295
  {
296
  "data": {
297
  "text/plain": [
298
- "'https://carros.mercadolibre.com.co/carro_NoIndex_True'"
 
 
 
 
 
299
  ]
300
  },
301
- "execution_count": 236,
302
  "metadata": {},
303
  "output_type": "execute_result"
304
  }
305
  ],
306
  "source": [
307
- "siguiente"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
308
  ]
309
  },
310
  {
311
  "cell_type": "code",
312
- "execution_count": 235,
 
 
 
 
 
 
 
313
  "metadata": {},
314
  "outputs": [
315
  {
316
  "data": {
317
  "text/plain": [
318
- "'https://carros.mercadolibre.com.co/carro_NoIndex_True'"
319
  ]
320
  },
321
- "execution_count": 235,
322
  "metadata": {},
323
  "output_type": "execute_result"
324
  }
325
  ],
326
- "source": [
327
- "siguiente"
328
- ]
329
  },
330
  {
331
  "cell_type": "code",
332
- "execution_count": 248,
333
  "metadata": {},
334
  "outputs": [],
335
  "source": [
336
- "siguiente = [div.find('a')['href']\n",
337
- " for div in s.find_all('li', attrs={\"class\":\"andes-pagination__button andes-pagination__button--next\"}) \n",
338
- " if div.find('a') is not None][0]"
339
  ]
340
  },
341
  {
342
  "cell_type": "code",
343
- "execution_count": 249,
344
  "metadata": {},
345
  "outputs": [
346
  {
347
  "data": {
348
  "text/plain": [
349
- "'https://carros.mercadolibre.com.co/carro_Desde_97_NoIndex_True'"
350
  ]
351
  },
352
- "execution_count": 249,
353
  "metadata": {},
354
  "output_type": "execute_result"
355
  }
356
  ],
357
  "source": [
358
- "siguiente"
359
  ]
360
  },
361
  {
362
  "cell_type": "code",
363
- "execution_count": 247,
364
  "metadata": {},
365
  "outputs": [
366
  {
367
- "data": {
368
- "text/plain": [
369
- "'https://carros.mercadolibre.com.co/carro_Desde_97_NoIndex_True'"
370
- ]
371
- },
372
- "execution_count": 247,
373
- "metadata": {},
374
- "output_type": "execute_result"
 
375
  }
376
  ],
377
  "source": [
378
- "s.find_all('li', attrs={\"class\":\"andes-pagination__button andes-pagination__button--next\"})[0].find_all('a')[0]['href']"
 
379
  ]
380
  },
381
  {
382
  "cell_type": "code",
383
- "execution_count": null,
384
  "metadata": {},
385
  "outputs": [],
386
- "source": []
 
 
 
 
 
 
387
  },
388
  {
389
  "cell_type": "code",
390
- "execution_count": 145,
391
  "metadata": {},
392
  "outputs": [
393
  {
394
  "data": {
395
  "text/plain": [
396
- "[['2020', '41.000 Km'],\n",
397
- " ['2022', '13.905 Km'],\n",
398
- " ['2019', '25.700 Km'],\n",
399
- " ['2020', '50.255 Km'],\n",
400
- " ['2023', '6.800 Km'],\n",
401
- " ['2012', '108.500 Km'],\n",
402
- " ['2021', '20.000 Km'],\n",
403
- " ['2012', '137.305 Km'],\n",
404
- " ['2020', '47.000 Km'],\n",
405
- " ['2022', '7.200 Km'],\n",
406
- " ['2022', '26.000 Km'],\n",
407
- " ['2021', '18.200 Km'],\n",
408
- " ['2011', '122.259 Km'],\n",
409
- " ['2019', '34.500 Km'],\n",
410
- " ['2019', '9.100 Km'],\n",
411
- " ['2011', '128.200 Km'],\n",
412
- " ['2010', '92.500 Km'],\n",
413
- " ['2007', '119.500 Km'],\n",
414
- " ['2022', '21.700 Km'],\n",
415
- " ['2017', '34.633 Km'],\n",
416
- " ['2016', '76.000 Km'],\n",
417
- " ['2015', '76.300 Km'],\n",
418
- " ['2013', '115.000 Km'],\n",
419
- " ['2023', '3.000 Km'],\n",
420
- " ['2015', '106.000 Km'],\n",
421
- " ['2013', '125.000 Km'],\n",
422
- " ['2018', '42.000 Km'],\n",
423
- " ['2020', '35.000 Km'],\n",
424
- " ['2023', '6.156 Km'],\n",
425
- " ['2013', '93.000 Km'],\n",
426
- " ['2022', '15.000 Km'],\n",
427
- " ['2013', '137.000 Km'],\n",
428
- " ['2020', '43.000 Km'],\n",
429
- " ['2016', '83.415 Km'],\n",
430
- " ['2015', '74.450 Km'],\n",
431
- " ['2011', '89.300 Km'],\n",
432
- " ['2020', '21.000 Km'],\n",
433
- " ['2019', '43.500 Km'],\n",
434
- " ['2022', '18.900 Km'],\n",
435
- " ['2008', '171.000 Km'],\n",
436
- " ['2021', '29.897 Km'],\n",
437
- " ['2014', '47.500 Km'],\n",
438
- " ['2017', '52.000 Km'],\n",
439
- " ['2019', '61.000 Km'],\n",
440
- " ['2018', '56.000 Km'],\n",
441
- " ['2021', '29.000 Km'],\n",
442
- " ['2023', '5.200 Km'],\n",
443
- " ['2021', '19.300 Km']]"
444
  ]
445
  },
446
- "execution_count": 145,
447
  "metadata": {},
448
  "output_type": "execute_result"
449
  }
450
  ],
451
  "source": [
452
- "model_km_list = s.find_all('ul', attrs= {\"class\":\"ui-search-card-attributes ui-search-item__group__element\"})\n",
453
- "model_km_list = [get_data_from_li(x) for x in model_km_list]\n",
454
- "model_km_list"
455
- ]
456
- },
457
- {
458
- "cell_type": "code",
459
- "execution_count": 151,
460
- "metadata": {},
461
- "outputs": [],
462
- "source": [
463
- "siguiente = [[div.find('a')['href'], \n",
464
- " int(get_data_from_li(div)[0]),\n",
465
- " int(get_data_from_li(div)[1].split(' ')[1])\n",
466
- " ] \n",
467
- " for div in s.find_all('div', attrs={\"class\":\"ui-search-pagination\"}) \n",
468
- " if div.find('a') is not None][0]"
469
  ]
470
  },
471
  {
472
  "cell_type": "code",
473
- "execution_count": 153,
474
  "metadata": {},
475
  "outputs": [
476
  {
477
- "data": {
478
- "text/plain": [
479
- "['https://carros.mercadolibre.com.co/carro_Desde_49_NoIndex_True', 1, 42]"
480
- ]
481
- },
482
- "execution_count": 153,
483
- "metadata": {},
484
- "output_type": "execute_result"
 
485
  }
486
  ],
487
  "source": [
488
- "siguiente"
489
  ]
490
  },
491
  {
492
  "cell_type": "code",
493
- "execution_count": 201,
 
 
 
 
 
 
 
494
  "metadata": {},
495
  "outputs": [],
496
  "source": [
497
- "df['page']= siguiente[1]"
498
  ]
499
  },
500
  {
501
  "cell_type": "code",
502
- "execution_count": 202,
503
  "metadata": {},
504
  "outputs": [
505
  {
506
  "data": {
507
- "text/html": [
508
- "<div>\n",
509
- "<style scoped>\n",
510
- " .dataframe tbody tr th:only-of-type {\n",
511
- " vertical-align: middle;\n",
512
- " }\n",
513
- "\n",
514
- " .dataframe tbody tr th {\n",
515
- " vertical-align: top;\n",
516
- " }\n",
517
- "\n",
518
- " .dataframe thead th {\n",
519
- " text-align: right;\n",
520
- " }\n",
521
- "</style>\n",
522
- "<table border=\"1\" class=\"dataframe\">\n",
523
- " <thead>\n",
524
- " <tr style=\"text-align: right;\">\n",
525
- " <th></th>\n",
526
- " <th>vehiculo</th>\n",
527
- " <th>seller_city</th>\n",
528
- " <th>price</th>\n",
529
- " <th>link</th>\n",
530
- " <th>page</th>\n",
531
- " </tr>\n",
532
- " </thead>\n",
533
- " <tbody>\n",
534
- " <tr>\n",
535
- " <th>0</th>\n",
536
- " <td>Mazda 3 2.0 Touring</td>\n",
537
- " <td>Usaquén - Bogotá D.C.</td>\n",
538
- " <td>75000000</td>\n",
539
- " <td>https://carro.mercadolibre.com.co/MCO-21353664...</td>\n",
540
- " <td>1</td>\n",
541
- " </tr>\n",
542
- " <tr>\n",
543
- " <th>1</th>\n",
544
- " <td>Honda Cr-v 2.4 Ex</td>\n",
545
- " <td>Suba - Bogotá D.C.</td>\n",
546
- " <td>37000000</td>\n",
547
- " <td>https://carro.mercadolibre.com.co/MCO-21353664...</td>\n",
548
- " <td>1</td>\n",
549
- " </tr>\n",
550
- " <tr>\n",
551
- " <th>2</th>\n",
552
- " <td>Chevrolet Tracker Ls</td>\n",
553
- " <td>Kennedy - Bogotá D.C.</td>\n",
554
- " <td>43000000</td>\n",
555
- " <td>https://carro.mercadolibre.com.co/MCO-21355926...</td>\n",
556
- " <td>1</td>\n",
557
- " </tr>\n",
558
- " <tr>\n",
559
- " <th>3</th>\n",
560
- " <td>Kia Rio 1.4</td>\n",
561
- " <td>Medellín - Antioquia</td>\n",
562
- " <td>41000000</td>\n",
563
- " <td>https://carro.mercadolibre.com.co/MCO-21352382...</td>\n",
564
- " <td>1</td>\n",
565
- " </tr>\n",
566
- " <tr>\n",
567
- " <th>4</th>\n",
568
- " <td>Volkswagen Gol 1.6 Trendline</td>\n",
569
- " <td>Rionegro - Antioquia</td>\n",
570
- " <td>45000000</td>\n",
571
- " <td>https://carro.mercadolibre.com.co/MCO-21351601...</td>\n",
572
- " <td>1</td>\n",
573
- " </tr>\n",
574
- " <tr>\n",
575
- " <th>5</th>\n",
576
- " <td>Toyota Fortuner 2.7 Sw4 Street</td>\n",
577
- " <td>Usaquén - Bogotá D.C.</td>\n",
578
- " <td>175000000</td>\n",
579
- " <td>https://carro.mercadolibre.com.co/MCO-13782022...</td>\n",
580
- " <td>1</td>\n",
581
- " </tr>\n",
582
- " <tr>\n",
583
- " <th>6</th>\n",
584
- " <td>Mercedes-benz Clase A 1.3 Amg Line 2022</td>\n",
585
- " <td>Fontibón - Bogotá D.C.</td>\n",
586
- " <td>138000000</td>\n",
587
- " <td>https://carro.mercadolibre.com.co/MCO-13782653...</td>\n",
588
- " <td>1</td>\n",
589
- " </tr>\n",
590
- " <tr>\n",
591
- " <th>7</th>\n",
592
- " <td>Volvo Xc40 2.0 T5 Awd Momentum 4x4 2019</td>\n",
593
- " <td>Suba - Bogotá D.C.</td>\n",
594
- " <td>136500000</td>\n",
595
- " <td>https://carro.mercadolibre.com.co/MCO-13782264...</td>\n",
596
- " <td>1</td>\n",
597
- " </tr>\n",
598
- " <tr>\n",
599
- " <th>8</th>\n",
600
- " <td>Volkswagen Amarok Trendline</td>\n",
601
- " <td>Kennedy - Bogotá D.C.</td>\n",
602
- " <td>104000000</td>\n",
603
- " <td>https://carro.mercadolibre.com.co/MCO-21339355...</td>\n",
604
- " <td>1</td>\n",
605
- " </tr>\n",
606
- " <tr>\n",
607
- " <th>9</th>\n",
608
- " <td>Mazda 2 1.5 Touring</td>\n",
609
- " <td>Medellín - Antioquia</td>\n",
610
- " <td>76900000</td>\n",
611
- " <td>https://carro.mercadolibre.com.co/MCO-21339740...</td>\n",
612
- " <td>1</td>\n",
613
- " </tr>\n",
614
- " <tr>\n",
615
- " <th>10</th>\n",
616
- " <td>Mitsubishi Montero 3.0 V13</td>\n",
617
- " <td>Medellín - Antioquia</td>\n",
618
- " <td>95000000</td>\n",
619
- " <td>https://carro.mercadolibre.com.co/MCO-21339478...</td>\n",
620
- " <td>1</td>\n",
621
- " </tr>\n",
622
- " <tr>\n",
623
- " <th>11</th>\n",
624
- " <td>Toyota Fortuner 2.7l</td>\n",
625
- " <td>Medellín - Antioquia</td>\n",
626
- " <td>192000000</td>\n",
627
- " <td>https://carro.mercadolibre.com.co/MCO-13781937...</td>\n",
628
- " <td>1</td>\n",
629
- " </tr>\n",
630
- " <tr>\n",
631
- " <th>12</th>\n",
632
- " <td>Kia Rio Ub Ex</td>\n",
633
- " <td>Engativa - Bogotá D.C.</td>\n",
634
- " <td>33500000</td>\n",
635
- " <td>https://carro.mercadolibre.com.co/MCO-21339596...</td>\n",
636
- " <td>1</td>\n",
637
- " </tr>\n",
638
- " <tr>\n",
639
- " <th>13</th>\n",
640
- " <td>Fiat Mobi Easy</td>\n",
641
- " <td>San Cristobal Sur - Bogotá D.C.</td>\n",
642
- " <td>37000000</td>\n",
643
- " <td>https://carro.mercadolibre.com.co/MCO-21340333...</td>\n",
644
- " <td>1</td>\n",
645
- " </tr>\n",
646
- " <tr>\n",
647
- " <th>14</th>\n",
648
- " <td>Renault Koleos 2.5 Intens Cvt 4x4</td>\n",
649
- " <td>Suba - Bogotá D.C.</td>\n",
650
- " <td>132990000</td>\n",
651
- " <td>https://carro.mercadolibre.com.co/MCO-21339336...</td>\n",
652
- " <td>1</td>\n",
653
- " </tr>\n",
654
- " <tr>\n",
655
- " <th>15</th>\n",
656
- " <td>Fiat Mobi 1.0 Like</td>\n",
657
- " <td>Engativa - Bogotá D.C.</td>\n",
658
- " <td>42500000</td>\n",
659
- " <td>https://carro.mercadolibre.com.co/MCO-21340195...</td>\n",
660
- " <td>1</td>\n",
661
- " </tr>\n",
662
- " <tr>\n",
663
- " <th>16</th>\n",
664
- " <td>Honda Cr-v 2.4 L 5dr 2wd Lx 2021</td>\n",
665
- " <td>Chapinero - Bogotá D.C.</td>\n",
666
- " <td>135000000</td>\n",
667
- " <td>https://carro.mercadolibre.com.co/MCO-21339453...</td>\n",
668
- " <td>1</td>\n",
669
- " </tr>\n",
670
- " <tr>\n",
671
- " <th>17</th>\n",
672
- " <td>Ford F-150 Xlt At</td>\n",
673
- " <td>Fontibón - Bogotá D.C.</td>\n",
674
- " <td>95000000</td>\n",
675
- " <td>https://carro.mercadolibre.com.co/MCO-21339448...</td>\n",
676
- " <td>1</td>\n",
677
- " </tr>\n",
678
- " <tr>\n",
679
- " <th>18</th>\n",
680
- " <td>Mazda 3 2.0 Touring</td>\n",
681
- " <td>Suba - Bogotá D.C.</td>\n",
682
- " <td>72000000</td>\n",
683
- " <td>https://carro.mercadolibre.com.co/MCO-13781890...</td>\n",
684
- " <td>1</td>\n",
685
- " </tr>\n",
686
- " <tr>\n",
687
- " <th>19</th>\n",
688
- " <td>Lexus Lx 5.7 570 Super Sport S 4x4 2019</td>\n",
689
- " <td>Suba - Bogotá D.C.</td>\n",
690
- " <td>639900000</td>\n",
691
- " <td>https://carro.mercadolibre.com.co/MCO-21339687...</td>\n",
692
- " <td>1</td>\n",
693
- " </tr>\n",
694
- " <tr>\n",
695
- " <th>20</th>\n",
696
- " <td>Nissan Frontier 2.4 Dx</td>\n",
697
- " <td>Antonio Nariño - Bogotá D.C.</td>\n",
698
- " <td>50000000</td>\n",
699
- " <td>https://carro.mercadolibre.com.co/MCO-21340053...</td>\n",
700
- " <td>1</td>\n",
701
- " </tr>\n",
702
- " <tr>\n",
703
- " <th>21</th>\n",
704
- " <td>Ford Escape 3.0 Xlt</td>\n",
705
- " <td>Usaquén - Bogotá D.C.</td>\n",
706
- " <td>41800000</td>\n",
707
- " <td>https://carro.mercadolibre.com.co/MCO-21339532...</td>\n",
708
- " <td>1</td>\n",
709
- " </tr>\n",
710
- " <tr>\n",
711
- " <th>22</th>\n",
712
- " <td>Volkswagen Jetta 2.0 2007</td>\n",
713
- " <td>Usaquén - Bogotá D.C.</td>\n",
714
- " <td>24500000</td>\n",
715
- " <td>https://carro.mercadolibre.com.co/MCO-13781632...</td>\n",
716
- " <td>1</td>\n",
717
- " </tr>\n",
718
- " <tr>\n",
719
- " <th>23</th>\n",
720
- " <td>Renault Duster Oroch 2.0 Intens Mt 4x4 2022</td>\n",
721
- " <td>Usaquén - Bogotá D.C.</td>\n",
722
- " <td>85000000</td>\n",
723
- " <td>https://carro.mercadolibre.com.co/MCO-13781407...</td>\n",
724
- " <td>1</td>\n",
725
- " </tr>\n",
726
- " <tr>\n",
727
- " <th>24</th>\n",
728
- " <td>Chevrolet Spark Gt Ltz</td>\n",
729
- " <td>Kennedy - Bogotá D.C.</td>\n",
730
- " <td>35800000</td>\n",
731
- " <td>https://carro.mercadolibre.com.co/MCO-13781006...</td>\n",
732
- " <td>1</td>\n",
733
- " </tr>\n",
734
- " <tr>\n",
735
- " <th>25</th>\n",
736
- " <td>Suzuki Grand Vitara 2.4 Sz Glx Sport 4x2</td>\n",
737
- " <td>Suba - Bogotá D.C.</td>\n",
738
- " <td>61500000</td>\n",
739
- " <td>https://carro.mercadolibre.com.co/MCO-13781494...</td>\n",
740
- " <td>1</td>\n",
741
- " </tr>\n",
742
- " <tr>\n",
743
- " <th>26</th>\n",
744
- " <td>Nissan Sentra 1.8 B17 Advance At</td>\n",
745
- " <td>Engativa - Bogotá D.C.</td>\n",
746
- " <td>43000000</td>\n",
747
- " <td>https://carro.mercadolibre.com.co/MCO-21332883...</td>\n",
748
- " <td>1</td>\n",
749
- " </tr>\n",
750
- " <tr>\n",
751
- " <th>27</th>\n",
752
- " <td>Renault Duster 1.6 Expression 2013</td>\n",
753
- " <td>Barrios Unidos - Bogotá D.C.</td>\n",
754
- " <td>37500000</td>\n",
755
- " <td>https://carro.mercadolibre.com.co/MCO-13781212...</td>\n",
756
- " <td>1</td>\n",
757
- " </tr>\n",
758
- " <tr>\n",
759
- " <th>28</th>\n",
760
- " <td>Subaru Forester 2.5 Advance 2023</td>\n",
761
- " <td>Usaquén - Bogotá D.C.</td>\n",
762
- " <td>135000000</td>\n",
763
- " <td>https://carro.mercadolibre.com.co/MCO-13780952...</td>\n",
764
- " <td>1</td>\n",
765
- " </tr>\n",
766
- " <tr>\n",
767
- " <th>29</th>\n",
768
- " <td>Mercedes-benz Clase Cla 1.6 2015</td>\n",
769
- " <td>Usaquén - Bogotá D.C.</td>\n",
770
- " <td>68000000</td>\n",
771
- " <td>https://carro.mercadolibre.com.co/MCO-13780810...</td>\n",
772
- " <td>1</td>\n",
773
- " </tr>\n",
774
- " <tr>\n",
775
- " <th>30</th>\n",
776
- " <td>Mazda 3 1.6 All New</td>\n",
777
- " <td>Medellín - Antioquia</td>\n",
778
- " <td>40900000</td>\n",
779
- " <td>https://carro.mercadolibre.com.co/MCO-21331854...</td>\n",
780
- " <td>1</td>\n",
781
- " </tr>\n",
782
- " <tr>\n",
783
- " <th>31</th>\n",
784
- " <td>Landwind Landwind X5 Plus Jx6460lmm</td>\n",
785
- " <td>Kennedy - Bogotá D.C.</td>\n",
786
- " <td>43000000</td>\n",
787
- " <td>https://carro.mercadolibre.com.co/MCO-21331695...</td>\n",
788
- " <td>1</td>\n",
789
- " </tr>\n",
790
- " <tr>\n",
791
- " <th>32</th>\n",
792
- " <td>Bmw X4 2.0 Xdrive30i</td>\n",
793
- " <td>Usaquén - Bogotá D.C.</td>\n",
794
- " <td>175000000</td>\n",
795
- " <td>https://carro.mercadolibre.com.co/MCO-13778695...</td>\n",
796
- " <td>1</td>\n",
797
- " </tr>\n",
798
- " <tr>\n",
799
- " <th>33</th>\n",
800
- " <td>Mazda 2 Touring</td>\n",
801
- " <td>Kennedy - Bogotá D.C.</td>\n",
802
- " <td>78000000</td>\n",
803
- " <td>https://carro.mercadolibre.com.co/MCO-13778682...</td>\n",
804
- " <td>1</td>\n",
805
- " </tr>\n",
806
- " <tr>\n",
807
- " <th>34</th>\n",
808
- " <td>Nissan Sentra E</td>\n",
809
- " <td>Kennedy - Bogotá D.C.</td>\n",
810
- " <td>31000000</td>\n",
811
- " <td>https://carro.mercadolibre.com.co/MCO-13778293...</td>\n",
812
- " <td>1</td>\n",
813
- " </tr>\n",
814
- " <tr>\n",
815
- " <th>35</th>\n",
816
- " <td>Renault Koleos 2.5 Zen</td>\n",
817
- " <td>Suba - Bogotá D.C.</td>\n",
818
- " <td>110000000</td>\n",
819
- " <td>https://carro.mercadolibre.com.co/MCO-13778678...</td>\n",
820
- " <td>1</td>\n",
821
- " </tr>\n",
822
- " <tr>\n",
823
- " <th>36</th>\n",
824
- " <td>Bmw X3 2.0 F25 Xdrive20i</td>\n",
825
- " <td>Usaquén - Bogotá D.C.</td>\n",
826
- " <td>74700000</td>\n",
827
- " <td>https://carro.mercadolibre.com.co/MCO-13778550...</td>\n",
828
- " <td>1</td>\n",
829
- " </tr>\n",
830
- " <tr>\n",
831
- " <th>37</th>\n",
832
- " <td>Renault Stepway 1.6 Dynamique / Intens Automática</td>\n",
833
- " <td>Suba - Bogotá D.C.</td>\n",
834
- " <td>58000000</td>\n",
835
- " <td>https://carro.mercadolibre.com.co/MCO-13779061...</td>\n",
836
- " <td>1</td>\n",
837
- " </tr>\n",
838
- " <tr>\n",
839
- " <th>38</th>\n",
840
- " <td>Volkswagen Jetta Comfortline</td>\n",
841
- " <td>Fontibón - Bogotá D.C.</td>\n",
842
- " <td>55000000</td>\n",
843
- " <td>https://carro.mercadolibre.com.co/MCO-13779179...</td>\n",
844
- " <td>1</td>\n",
845
- " </tr>\n",
846
- " <tr>\n",
847
- " <th>39</th>\n",
848
- " <td>Volkswagen Tiguan 2.0 Sport &amp; Style</td>\n",
849
- " <td>Medellín - Antioquia</td>\n",
850
- " <td>68000000</td>\n",
851
- " <td>https://carro.mercadolibre.com.co/MCO-13778801...</td>\n",
852
- " <td>1</td>\n",
853
- " </tr>\n",
854
- " <tr>\n",
855
- " <th>40</th>\n",
856
- " <td>Renault Koleos 2.0 Dynamique Plus</td>\n",
857
- " <td>Suba - Bogotá D.C.</td>\n",
858
- " <td>39900000</td>\n",
859
- " <td>https://carro.mercadolibre.com.co/MCO-13779056...</td>\n",
860
- " <td>1</td>\n",
861
- " </tr>\n",
862
- " <tr>\n",
863
- " <th>41</th>\n",
864
- " <td>Mazda 2 1.5 Touring Sedan</td>\n",
865
- " <td>Usaquén - Bogotá D.C.</td>\n",
866
- " <td>64900000</td>\n",
867
- " <td>https://carro.mercadolibre.com.co/MCO-21314615...</td>\n",
868
- " <td>1</td>\n",
869
- " </tr>\n",
870
- " <tr>\n",
871
- " <th>42</th>\n",
872
- " <td>Mazda 3 2.0 Grand Touring</td>\n",
873
- " <td>Usaquén - Bogotá D.C.</td>\n",
874
- " <td>71900000</td>\n",
875
- " <td>https://carro.mercadolibre.com.co/MCO-21314614...</td>\n",
876
- " <td>1</td>\n",
877
- " </tr>\n",
878
- " <tr>\n",
879
- " <th>43</th>\n",
880
- " <td>Zhidou D2s Electrico</td>\n",
881
- " <td>Suba - Bogotá D.C.</td>\n",
882
- " <td>49500000</td>\n",
883
- " <td>https://carro.mercadolibre.com.co/MCO-13779053...</td>\n",
884
- " <td>1</td>\n",
885
- " </tr>\n",
886
- " <tr>\n",
887
- " <th>44</th>\n",
888
- " <td>Toyota Prado 2.7 Sumo Cruiser</td>\n",
889
- " <td>Antonio Nariño - Bogotá D.C.</td>\n",
890
- " <td>77000000</td>\n",
891
- " <td>https://carro.mercadolibre.com.co/MCO-13779050...</td>\n",
892
- " <td>1</td>\n",
893
- " </tr>\n",
894
- " <tr>\n",
895
- " <th>45</th>\n",
896
- " <td>Changan Cs 15 Skyview Pro Mt</td>\n",
897
- " <td>Fontibón - Bogotá D.C.</td>\n",
898
- " <td>63000000</td>\n",
899
- " <td>https://carro.mercadolibre.com.co/MCO-13779048...</td>\n",
900
- " <td>1</td>\n",
901
- " </tr>\n",
902
- " <tr>\n",
903
- " <th>46</th>\n",
904
- " <td>Kia Rio Spice 1.4</td>\n",
905
- " <td>Suba - Bogotá D.C.</td>\n",
906
- " <td>45000000</td>\n",
907
- " <td>https://carro.mercadolibre.com.co/MCO-13778792...</td>\n",
908
- " <td>1</td>\n",
909
- " </tr>\n",
910
- " <tr>\n",
911
- " <th>47</th>\n",
912
- " <td>Dodge Journey 2.4 Se/express Fl</td>\n",
913
- " <td>Barrios Unidos - Bogotá D.C.</td>\n",
914
- " <td>58000000</td>\n",
915
- " <td>https://carro.mercadolibre.com.co/MCO-13778916...</td>\n",
916
- " <td>1</td>\n",
917
- " </tr>\n",
918
- " </tbody>\n",
919
- "</table>\n",
920
- "</div>"
921
- ],
922
  "text/plain": [
923
- " vehiculo \\\n",
924
- "0 Mazda 3 2.0 Touring \n",
925
- "1 Honda Cr-v 2.4 Ex \n",
926
- "2 Chevrolet Tracker Ls \n",
927
- "3 Kia Rio 1.4 \n",
928
- "4 Volkswagen Gol 1.6 Trendline \n",
929
- "5 Toyota Fortuner 2.7 Sw4 Street \n",
930
- "6 Mercedes-benz Clase A 1.3 Amg Line 2022 \n",
931
- "7 Volvo Xc40 2.0 T5 Awd Momentum 4x4 2019 \n",
932
- "8 Volkswagen Amarok Trendline \n",
933
- "9 Mazda 2 1.5 Touring \n",
934
- "10 Mitsubishi Montero 3.0 V13 \n",
935
- "11 Toyota Fortuner 2.7l \n",
936
- "12 Kia Rio Ub Ex \n",
937
- "13 Fiat Mobi Easy \n",
938
- "14 Renault Koleos 2.5 Intens Cvt 4x4 \n",
939
- "15 Fiat Mobi 1.0 Like \n",
940
- "16 Honda Cr-v 2.4 L 5dr 2wd Lx 2021 \n",
941
- "17 Ford F-150 Xlt At \n",
942
- "18 Mazda 3 2.0 Touring \n",
943
- "19 Lexus Lx 5.7 570 Super Sport S 4x4 2019 \n",
944
- "20 Nissan Frontier 2.4 Dx \n",
945
- "21 Ford Escape 3.0 Xlt \n",
946
- "22 Volkswagen Jetta 2.0 2007 \n",
947
- "23 Renault Duster Oroch 2.0 Intens Mt 4x4 2022 \n",
948
- "24 Chevrolet Spark Gt Ltz \n",
949
- "25 Suzuki Grand Vitara 2.4 Sz Glx Sport 4x2 \n",
950
- "26 Nissan Sentra 1.8 B17 Advance At \n",
951
- "27 Renault Duster 1.6 Expression 2013 \n",
952
- "28 Subaru Forester 2.5 Advance 2023 \n",
953
- "29 Mercedes-benz Clase Cla 1.6 2015 \n",
954
- "30 Mazda 3 1.6 All New \n",
955
- "31 Landwind Landwind X5 Plus Jx6460lmm \n",
956
- "32 Bmw X4 2.0 Xdrive30i \n",
957
- "33 Mazda 2 Touring \n",
958
- "34 Nissan Sentra E \n",
959
- "35 Renault Koleos 2.5 Zen \n",
960
- "36 Bmw X3 2.0 F25 Xdrive20i \n",
961
- "37 Renault Stepway 1.6 Dynamique / Intens Automática \n",
962
- "38 Volkswagen Jetta Comfortline \n",
963
- "39 Volkswagen Tiguan 2.0 Sport & Style \n",
964
- "40 Renault Koleos 2.0 Dynamique Plus \n",
965
- "41 Mazda 2 1.5 Touring Sedan \n",
966
- "42 Mazda 3 2.0 Grand Touring \n",
967
- "43 Zhidou D2s Electrico \n",
968
- "44 Toyota Prado 2.7 Sumo Cruiser \n",
969
- "45 Changan Cs 15 Skyview Pro Mt \n",
970
- "46 Kia Rio Spice 1.4 \n",
971
- "47 Dodge Journey 2.4 Se/express Fl \n",
972
- "\n",
973
- " seller_city price \\\n",
974
- "0 Usaquén - Bogotá D.C. 75000000 \n",
975
- "1 Suba - Bogotá D.C. 37000000 \n",
976
- "2 Kennedy - Bogotá D.C. 43000000 \n",
977
- "3 Medellín - Antioquia 41000000 \n",
978
- "4 Rionegro - Antioquia 45000000 \n",
979
- "5 Usaquén - Bogotá D.C. 175000000 \n",
980
- "6 Fontibón - Bogotá D.C. 138000000 \n",
981
- "7 Suba - Bogotá D.C. 136500000 \n",
982
- "8 Kennedy - Bogotá D.C. 104000000 \n",
983
- "9 Medellín - Antioquia 76900000 \n",
984
- "10 Medellín - Antioquia 95000000 \n",
985
- "11 Medellín - Antioquia 192000000 \n",
986
- "12 Engativa - Bogotá D.C. 33500000 \n",
987
- "13 San Cristobal Sur - Bogotá D.C. 37000000 \n",
988
- "14 Suba - Bogotá D.C. 132990000 \n",
989
- "15 Engativa - Bogotá D.C. 42500000 \n",
990
- "16 Chapinero - Bogotá D.C. 135000000 \n",
991
- "17 Fontibón - Bogotá D.C. 95000000 \n",
992
- "18 Suba - Bogotá D.C. 72000000 \n",
993
- "19 Suba - Bogotá D.C. 639900000 \n",
994
- "20 Antonio Nariño - Bogotá D.C. 50000000 \n",
995
- "21 Usaquén - Bogotá D.C. 41800000 \n",
996
- "22 Usaquén - Bogotá D.C. 24500000 \n",
997
- "23 Usaquén - Bogotá D.C. 85000000 \n",
998
- "24 Kennedy - Bogotá D.C. 35800000 \n",
999
- "25 Suba - Bogotá D.C. 61500000 \n",
1000
- "26 Engativa - Bogotá D.C. 43000000 \n",
1001
- "27 Barrios Unidos - Bogotá D.C. 37500000 \n",
1002
- "28 Usaquén - Bogotá D.C. 135000000 \n",
1003
- "29 Usaquén - Bogotá D.C. 68000000 \n",
1004
- "30 Medellín - Antioquia 40900000 \n",
1005
- "31 Kennedy - Bogotá D.C. 43000000 \n",
1006
- "32 Usaquén - Bogotá D.C. 175000000 \n",
1007
- "33 Kennedy - Bogotá D.C. 78000000 \n",
1008
- "34 Kennedy - Bogotá D.C. 31000000 \n",
1009
- "35 Suba - Bogotá D.C. 110000000 \n",
1010
- "36 Usaquén - Bogotá D.C. 74700000 \n",
1011
- "37 Suba - Bogotá D.C. 58000000 \n",
1012
- "38 Fontibón - Bogotá D.C. 55000000 \n",
1013
- "39 Medellín - Antioquia 68000000 \n",
1014
- "40 Suba - Bogotá D.C. 39900000 \n",
1015
- "41 Usaquén - Bogotá D.C. 64900000 \n",
1016
- "42 Usaquén - Bogotá D.C. 71900000 \n",
1017
- "43 Suba - Bogotá D.C. 49500000 \n",
1018
- "44 Antonio Nariño - Bogotá D.C. 77000000 \n",
1019
- "45 Fontibón - Bogotá D.C. 63000000 \n",
1020
- "46 Suba - Bogotá D.C. 45000000 \n",
1021
- "47 Barrios Unidos - Bogotá D.C. 58000000 \n",
1022
- "\n",
1023
- " link page \n",
1024
- "0 https://carro.mercadolibre.com.co/MCO-21353664... 1 \n",
1025
- "1 https://carro.mercadolibre.com.co/MCO-21353664... 1 \n",
1026
- "2 https://carro.mercadolibre.com.co/MCO-21355926... 1 \n",
1027
- "3 https://carro.mercadolibre.com.co/MCO-21352382... 1 \n",
1028
- "4 https://carro.mercadolibre.com.co/MCO-21351601... 1 \n",
1029
- "5 https://carro.mercadolibre.com.co/MCO-13782022... 1 \n",
1030
- "6 https://carro.mercadolibre.com.co/MCO-13782653... 1 \n",
1031
- "7 https://carro.mercadolibre.com.co/MCO-13782264... 1 \n",
1032
- "8 https://carro.mercadolibre.com.co/MCO-21339355... 1 \n",
1033
- "9 https://carro.mercadolibre.com.co/MCO-21339740... 1 \n",
1034
- "10 https://carro.mercadolibre.com.co/MCO-21339478... 1 \n",
1035
- "11 https://carro.mercadolibre.com.co/MCO-13781937... 1 \n",
1036
- "12 https://carro.mercadolibre.com.co/MCO-21339596... 1 \n",
1037
- "13 https://carro.mercadolibre.com.co/MCO-21340333... 1 \n",
1038
- "14 https://carro.mercadolibre.com.co/MCO-21339336... 1 \n",
1039
- "15 https://carro.mercadolibre.com.co/MCO-21340195... 1 \n",
1040
- "16 https://carro.mercadolibre.com.co/MCO-21339453... 1 \n",
1041
- "17 https://carro.mercadolibre.com.co/MCO-21339448... 1 \n",
1042
- "18 https://carro.mercadolibre.com.co/MCO-13781890... 1 \n",
1043
- "19 https://carro.mercadolibre.com.co/MCO-21339687... 1 \n",
1044
- "20 https://carro.mercadolibre.com.co/MCO-21340053... 1 \n",
1045
- "21 https://carro.mercadolibre.com.co/MCO-21339532... 1 \n",
1046
- "22 https://carro.mercadolibre.com.co/MCO-13781632... 1 \n",
1047
- "23 https://carro.mercadolibre.com.co/MCO-13781407... 1 \n",
1048
- "24 https://carro.mercadolibre.com.co/MCO-13781006... 1 \n",
1049
- "25 https://carro.mercadolibre.com.co/MCO-13781494... 1 \n",
1050
- "26 https://carro.mercadolibre.com.co/MCO-21332883... 1 \n",
1051
- "27 https://carro.mercadolibre.com.co/MCO-13781212... 1 \n",
1052
- "28 https://carro.mercadolibre.com.co/MCO-13780952... 1 \n",
1053
- "29 https://carro.mercadolibre.com.co/MCO-13780810... 1 \n",
1054
- "30 https://carro.mercadolibre.com.co/MCO-21331854... 1 \n",
1055
- "31 https://carro.mercadolibre.com.co/MCO-21331695... 1 \n",
1056
- "32 https://carro.mercadolibre.com.co/MCO-13778695... 1 \n",
1057
- "33 https://carro.mercadolibre.com.co/MCO-13778682... 1 \n",
1058
- "34 https://carro.mercadolibre.com.co/MCO-13778293... 1 \n",
1059
- "35 https://carro.mercadolibre.com.co/MCO-13778678... 1 \n",
1060
- "36 https://carro.mercadolibre.com.co/MCO-13778550... 1 \n",
1061
- "37 https://carro.mercadolibre.com.co/MCO-13779061... 1 \n",
1062
- "38 https://carro.mercadolibre.com.co/MCO-13779179... 1 \n",
1063
- "39 https://carro.mercadolibre.com.co/MCO-13778801... 1 \n",
1064
- "40 https://carro.mercadolibre.com.co/MCO-13779056... 1 \n",
1065
- "41 https://carro.mercadolibre.com.co/MCO-21314615... 1 \n",
1066
- "42 https://carro.mercadolibre.com.co/MCO-21314614... 1 \n",
1067
- "43 https://carro.mercadolibre.com.co/MCO-13779053... 1 \n",
1068
- "44 https://carro.mercadolibre.com.co/MCO-13779050... 1 \n",
1069
- "45 https://carro.mercadolibre.com.co/MCO-13779048... 1 \n",
1070
- "46 https://carro.mercadolibre.com.co/MCO-13778792... 1 \n",
1071
- "47 https://carro.mercadolibre.com.co/MCO-13778916... 1 "
1072
  ]
1073
  },
1074
- "execution_count": 202,
1075
  "metadata": {},
1076
  "output_type": "execute_result"
1077
  }
1078
  ],
1079
  "source": [
1080
- "df"
1081
  ]
1082
  },
1083
  {
1084
  "cell_type": "code",
1085
- "execution_count": 232,
1086
  "metadata": {},
1087
  "outputs": [
1088
  {
1089
  "data": {
1090
  "text/plain": [
1091
- "'https://carros.mercadolibre.com.co/carro_NoIndex_True'"
1092
  ]
1093
  },
1094
- "execution_count": 232,
1095
  "metadata": {},
1096
  "output_type": "execute_result"
1097
  }
1098
  ],
1099
  "source": [
1100
- "siguiente"
1101
  ]
1102
  },
1103
  {
 
2
  "cells": [
3
  {
4
  "cell_type": "code",
5
+ "execution_count": 1,
6
  "metadata": {},
7
+ "outputs": [],
 
 
 
 
 
 
 
 
 
8
  "source": [
9
  "%load_ext autoreload\n",
10
  "%autoreload 2\n",
11
  "import requests\n",
12
  "from bs4 import BeautifulSoup\n",
13
+ "import pandas as pd\n",
14
+ "from lxml import etree"
15
  ]
16
  },
17
  {
18
  "cell_type": "code",
19
+ "execution_count": null,
20
  "metadata": {},
21
  "outputs": [],
22
  "source": [
23
+ "def get_soup_by_url(url, product: str = None):\n",
24
+ " if product is None:\n",
25
+ " url = url\n",
26
+ " else:\n",
27
+ " url = f'{url}/{product}'\n",
28
+ " r = requests.get(url=url)\n",
29
+ " s = BeautifulSoup(r.content, 'html.parser')\n",
30
+ " return s\n",
31
+ "\n",
32
+ "def get_vehicle_info(url):\n",
33
+ " s = get_soup_by_url(url)\n",
34
+ " text = s.find_all('span', attrs= {'class':'ui-pdp-subtitle'})[0].text.replace('.', '')\n",
35
+ " parts = text.split(' · ')\n",
36
+ " location = s.find('p', attrs = {'class':'ui-seller-info__status-info__subtitle'}).text\n",
37
+ " pub_number = s.find_all('span', attrs= {'class':'ui-pdp-color--BLACK ui-pdp-family--SEMIBOLD'})[0].text.replace('#','')\n",
38
+ " year = parts[0].split(' | ')[0]\n",
39
+ " kilometrage = parts[0].split(' | ')[1].replace('km', '')\n",
40
+ " publication_date = parts[1]\n",
41
+ " output_dict = {\n",
42
+ " \"Year\": year,\n",
43
+ " \"Kilometrage\": kilometrage,\n",
44
+ " \"Publication Date\": publication_date,\n",
45
+ " \"Location\": location,\n",
46
+ " \"Pub Number\": pub_number,\n",
47
+ " \"Created At\": datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
48
+ " }\n",
49
+ "\n",
50
+ " return output_dict\n",
51
+ "data = pd.read_csv('final_data.csv')"
52
  ]
53
  },
54
  {
55
  "cell_type": "code",
56
+ "execution_count": 2,
57
  "metadata": {},
58
  "outputs": [],
59
  "source": [
60
+ "link = 'https://carro.mercadolibre.com.co/MCO-1379439831-mercedes-benz-gle-350-d-4matic-2017-_JM#position=1&search_layout=grid&type=item&tracking_id=79a64cb3-adcf-4d29-be31-812b12b34cef'"
 
 
 
61
  ]
62
  },
63
  {
64
  "cell_type": "code",
65
  "execution_count": 11,
66
  "metadata": {},
67
+ "outputs": [
68
+ {
69
+ "data": {
70
+ "text/plain": [
71
+ "'https://carro.mercadolibre.com.co/MCO-1379439831-mercedes-benz-gle-350-d-4matic-2017-_JM#position=1&search_layout=grid&type=item&tracking_id=79a64cb3-adcf-4d29-be31-812b12b34cef'"
72
+ ]
73
+ },
74
+ "execution_count": 11,
75
+ "metadata": {},
76
+ "output_type": "execute_result"
77
+ }
78
+ ],
79
  "source": [
80
+ "link"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
81
  ]
82
  },
83
  {
84
  "cell_type": "code",
85
+ "execution_count": 3,
86
  "metadata": {},
87
  "outputs": [],
88
  "source": [
89
+ "r = requests.get(url=link)\n",
90
+ "\n",
91
+ "s = BeautifulSoup(r.content, 'html.parser')"
92
  ]
93
  },
94
  {
95
  "cell_type": "code",
96
+ "execution_count": 4,
97
  "metadata": {},
98
  "outputs": [
99
  {
100
+ "data": {
101
+ "text/plain": [
102
+ "189000000"
103
+ ]
104
+ },
105
+ "execution_count": 4,
106
+ "metadata": {},
107
+ "output_type": "execute_result"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
108
  }
109
  ],
110
  "source": [
111
+ "# publication number\n",
112
+ "\n",
113
+ "int(s.find_all('span', attrs= {'class':'andes-money-amount__fraction'})[0].text.replace('.', ''))"
 
 
 
 
 
 
 
 
 
 
 
 
114
  ]
115
  },
116
  {
117
  "cell_type": "code",
118
+ "execution_count": null,
119
  "metadata": {},
120
  "outputs": [],
121
+ "source": []
 
 
122
  },
123
  {
124
  "cell_type": "code",
125
+ "execution_count": 5,
126
  "metadata": {},
127
  "outputs": [
128
  {
129
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  "text/plain": [
131
+ "'1379439831'"
 
 
 
 
 
 
132
  ]
133
  },
134
+ "execution_count": 5,
135
  "metadata": {},
136
  "output_type": "execute_result"
137
  }
138
  ],
139
  "source": [
140
+ "# precio\n",
141
+ "s.find_all('span', attrs= {'class':'ui-pdp-color--BLACK ui-pdp-family--SEMIBOLD'})[0].text.replace('#','')"
142
  ]
143
  },
144
  {
145
  "cell_type": "code",
146
+ "execution_count": 16,
147
  "metadata": {},
148
  "outputs": [
149
  {
150
  "data": {
151
  "text/plain": [
152
+ "{'Year': '2017',\n",
153
+ " 'Kilometrage': '47800 ',\n",
154
+ " 'Publication Date': 'Publicado hace 3 días',\n",
155
+ " 'Location': 'Chicó Navarra - Usaquén - Bogotá D.C.',\n",
156
+ " 'Pub Number': '1379439831',\n",
157
+ " 'Created At': '2024-01-13 13:35:22'}"
158
  ]
159
  },
160
+ "execution_count": 16,
161
  "metadata": {},
162
  "output_type": "execute_result"
163
  }
164
  ],
165
  "source": [
166
+ "import datetime\n",
167
+ "\n",
168
+ "text = s.find_all('span', attrs= {'class':'ui-pdp-subtitle'})[0].text.replace('.', '')\n",
169
+ "parts = text.split(' · ')\n",
170
+ "location = s.find('p', attrs = {'class':'ui-seller-info__status-info__subtitle'}).text\n",
171
+ "pub_number = s.find_all('span', attrs= {'class':'ui-pdp-color--BLACK ui-pdp-family--SEMIBOLD'})[0].text.replace('#','')\n",
172
+ "year = parts[0].split(' | ')[0]\n",
173
+ "kilometrage = parts[0].split(' | ')[1].replace('km', '')\n",
174
+ "publication_date = parts[1]\n",
175
+ "output_dict = {\n",
176
+ " \"Year\": year,\n",
177
+ " \"Kilometrage\": kilometrage,\n",
178
+ " \"Publication Date\": publication_date,\n",
179
+ " \"Location\": location,\n",
180
+ " \"Pub Number\": pub_number,\n",
181
+ " \"Created At\": datetime.datetime.now().strftime(\"%Y-%m-%d %H:%M:%S\")\n",
182
+ "}\n",
183
+ "\n",
184
+ "output_dict"
185
  ]
186
  },
187
  {
188
  "cell_type": "code",
189
+ "execution_count": null,
190
+ "metadata": {},
191
+ "outputs": [],
192
+ "source": []
193
+ },
194
+ {
195
+ "cell_type": "code",
196
+ "execution_count": 7,
197
  "metadata": {},
198
  "outputs": [
199
  {
200
  "data": {
201
  "text/plain": [
202
+ "'Chicó Navarra - Usaquén - Bogotá D.C.'"
203
  ]
204
  },
205
+ "execution_count": 7,
206
  "metadata": {},
207
  "output_type": "execute_result"
208
  }
209
  ],
210
+ "source": []
 
 
211
  },
212
  {
213
  "cell_type": "code",
214
+ "execution_count": 8,
215
  "metadata": {},
216
  "outputs": [],
217
  "source": [
218
+ "table = s.find_all('table', attrs= {'class': 'andes-table'})"
 
 
219
  ]
220
  },
221
  {
222
  "cell_type": "code",
223
+ "execution_count": 9,
224
  "metadata": {},
225
  "outputs": [
226
  {
227
  "data": {
228
  "text/plain": [
229
+ "[]"
230
  ]
231
  },
232
+ "execution_count": 9,
233
  "metadata": {},
234
  "output_type": "execute_result"
235
  }
236
  ],
237
  "source": [
238
+ "table"
239
  ]
240
  },
241
  {
242
  "cell_type": "code",
243
+ "execution_count": 10,
244
  "metadata": {},
245
  "outputs": [
246
  {
247
+ "ename": "IndexError",
248
+ "evalue": "list index out of range",
249
+ "output_type": "error",
250
+ "traceback": [
251
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
252
+ "\u001b[0;31mIndexError\u001b[0m Traceback (most recent call last)",
253
+ "Cell \u001b[0;32mIn[10], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m rows \u001b[38;5;241m=\u001b[39m \u001b[43mtable\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241m.\u001b[39mfind_all(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mtr\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[1;32m 2\u001b[0m rows\n",
254
+ "\u001b[0;31mIndexError\u001b[0m: list index out of range"
255
+ ]
256
  }
257
  ],
258
  "source": [
259
+ "rows = table[0].find_all('tr')\n",
260
+ "rows"
261
  ]
262
  },
263
  {
264
  "cell_type": "code",
265
+ "execution_count": 106,
266
  "metadata": {},
267
  "outputs": [],
268
+ "source": [
269
+ "data = {}\n",
270
+ "for row in rows:\n",
271
+ " cols = row.find_all(['th', 'td'])\n",
272
+ " cols = [ele.text.strip() for ele in cols]\n",
273
+ " data[cols[0]] = cols[1]\n"
274
+ ]
275
  },
276
  {
277
  "cell_type": "code",
278
+ "execution_count": 107,
279
  "metadata": {},
280
  "outputs": [
281
  {
282
  "data": {
283
  "text/plain": [
284
+ "{'Marca': 'Mercedes-Benz',\n",
285
+ " 'Modelo': 'Clase GLE',\n",
286
+ " 'Año': '2017',\n",
287
+ " 'Versión': '3.0 Coupe 4matic Diésel',\n",
288
+ " 'Color': 'Gris',\n",
289
+ " 'Tipo de combustible': 'Diésel',\n",
290
+ " 'Puertas': '5',\n",
291
+ " 'Transmisión': 'Automática',\n",
292
+ " 'Motor': '3.0',\n",
293
+ " 'Tipo de carrocería': 'Station Wagon',\n",
294
+ " 'Kilómetros': '47800 km'}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
295
  ]
296
  },
297
+ "execution_count": 107,
298
  "metadata": {},
299
  "output_type": "execute_result"
300
  }
301
  ],
302
  "source": [
303
+ "data"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  ]
305
  },
306
  {
307
  "cell_type": "code",
308
+ "execution_count": 22,
309
  "metadata": {},
310
  "outputs": [
311
  {
312
+ "ename": "NameError",
313
+ "evalue": "name 'rows' is not defined",
314
+ "output_type": "error",
315
+ "traceback": [
316
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
317
+ "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
318
+ "Cell \u001b[0;32mIn[22], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mrows\u001b[49m\n",
319
+ "\u001b[0;31mNameError\u001b[0m: name 'rows' is not defined"
320
+ ]
321
  }
322
  ],
323
  "source": [
324
+ "rows"
325
  ]
326
  },
327
  {
328
  "cell_type": "code",
329
+ "execution_count": null,
330
+ "metadata": {},
331
+ "outputs": [],
332
+ "source": []
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "execution_count": 49,
337
  "metadata": {},
338
  "outputs": [],
339
  "source": [
340
+ "dom = etree.HTML(str(s))"
341
  ]
342
  },
343
  {
344
  "cell_type": "code",
345
+ "execution_count": 50,
346
  "metadata": {},
347
  "outputs": [
348
  {
349
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
350
  "text/plain": [
351
+ "<Element html at 0x7baa0c6bbfc0>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
  ]
353
  },
354
+ "execution_count": 50,
355
  "metadata": {},
356
  "output_type": "execute_result"
357
  }
358
  ],
359
  "source": [
360
+ "dom"
361
  ]
362
  },
363
  {
364
  "cell_type": "code",
365
+ "execution_count": 51,
366
  "metadata": {},
367
  "outputs": [
368
  {
369
  "data": {
370
  "text/plain": [
371
+ "[]"
372
  ]
373
  },
374
+ "execution_count": 51,
375
  "metadata": {},
376
  "output_type": "execute_result"
377
  }
378
  ],
379
  "source": [
380
+ "dom.xpath(\"//table[@class = 'andes-table']\")"
381
  ]
382
  },
383
  {