File size: 10,112 Bytes
72a6c46 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 |
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd \n",
"import numpy as np \n",
"import re\n",
"\n",
"from time import sleep\n",
"from selenium import webdriver\n",
"from selenium.webdriver.common.by import By\n",
"from selenium.common.exceptions import NoSuchElementException\n",
"from selenium.webdriver.common.keys import Keys\n",
"from selenium.webdriver.support.select import Select\n",
"from selenium.webdriver.common.action_chains import ActionChains"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"### Initialize Browser (Chrome)\n",
"options = webdriver.ChromeOptions()\n",
"options.add_experimental_option('excludeSwitches', ['enable-logging'])\n",
"driver = webdriver.Chrome(options=options)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Collect Data"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 https://iisma.kemdikbud.go.id/info/02-university-college-london/\n",
"1 https://iisma.kemdikbud.go.id/info/03-university-of-chicago/\n",
"2 https://iisma.kemdikbud.go.id/info/04-nanyang-technological-university/\n",
"3 https://iisma.kemdikbud.go.id/info/05-the-university-of-pennsylvania-college-of-liberal-and-professional-studies/\n",
"4 https://iisma.kemdikbud.go.id/info/06-yale-university/\n",
"5 https://iisma.kemdikbud.go.id/info/07-university-of-edinburgh/\n",
"6 https://iisma.kemdikbud.go.id/info/09-the-australian-national-university/\n",
"7 https://iisma.kemdikbud.go.id/info/10-university-of-melbourne/\n",
"8 https://iisma.kemdikbud.go.id/info/11-university-of-sydney/\n",
"9 https://iisma.kemdikbud.go.id/info/12-university-of-new-south-wales/\n",
"10 https://iisma.kemdikbud.go.id/info/13-university-of-british-columbia/\n",
"11 https://iisma.kemdikbud.go.id/info/14-the-university-of-queensland/\n",
"12 https://iisma.kemdikbud.go.id/info/15-monash-university/\n",
"13 https://iisma.kemdikbud.go.id/info/16-university-of-warwick/\n",
"14 https://iisma.kemdikbud.go.id/info/17-universiti-malaya/\n",
"15 https://iisma.kemdikbud.go.id/info/18-national-taiwan-university/\n",
"16 https://iisma.kemdikbud.go.id/info/18-osaka-university/\n",
"17 https://iisma.kemdikbud.go.id/info/20-ku-leuven/\n",
"18 https://iisma.kemdikbud.go.id/info/21-university-of-texas-at-austin/\n",
"19 https://iisma.kemdikbud.go.id/info/22-university-of-glasgow/\n",
"20 https://iisma.kemdikbud.go.id/info/23-korea-university/\n",
"21 https://iisma.kemdikbud.go.id/info/24-m-v-lomonosov-moscow-state-university/\n",
"22 https://iisma.kemdikbud.go.id/info/25-university-of-auckland/\n",
"23 https://iisma.kemdikbud.go.id/info/26-university-of-leeds/\n",
"24 https://iisma.kemdikbud.go.id/info/27-the-university-of-western-australia/\n",
"25 https://iisma.kemdikbud.go.id/info/28-university-of-birmingham/\n",
"26 https://iisma.kemdikbud.go.id/info/29-penn-state-university/\n",
"27 https://iisma.kemdikbud.go.id/info/30-university-of-california-davis/\n",
"28 https://iisma.kemdikbud.go.id/info/31-boston-university-metropolitan-college/\n",
"29 https://iisma.kemdikbud.go.id/info/32-the-university-of-adelaide/\n",
"30 https://iisma.kemdikbud.go.id/info/33-university-college-cork/\n",
"31 https://iisma.kemdikbud.go.id/info/34-queen-mary-university-of-london/\n",
"32 https://iisma.kemdikbud.go.id/info/35-uc-chile/\n",
"33 https://iisma.kemdikbud.go.id/info/36-newcastle-university/\n",
"34 https://iisma.kemdikbud.go.id/info/37-humboldt-universitat-zu-berlin/\n",
"35 https://iisma.kemdikbud.go.id/info/38-universiti-kebangsaan-malaysia/\n",
"36 https://iisma.kemdikbud.go.id/info/39-lancaster-university/\n",
"37 https://iisma.kemdikbud.go.id/info/40-universiti-sains-malaysia/\n",
"38 https://iisma.kemdikbud.go.id/info/41-grenoble-ecole-de-management/\n",
"39 https://iisma.kemdikbud.go.id/info/42-university-of-waterloo/\n",
"40 https://iisma.kemdikbud.go.id/info/43-university-of-york/\n",
"41 https://iisma.kemdikbud.go.id/info/44-hanyang-university-seoul-campus/\n",
"42 https://iisma.kemdikbud.go.id/info/45-michigan-state-university/\n",
"43 https://iisma.kemdikbud.go.id/info/46-western-university/\n",
"44 https://iisma.kemdikbud.go.id/info/47-sapienza-university-of-rome/\n",
"45 https://iisma.kemdikbud.go.id/info/48-university-college-dublin/\n",
"46 https://iisma.kemdikbud.go.id/info/49-university-of-twente/\n",
"47 https://iisma.kemdikbud.go.id/info/50-university-of-liverpool/\n",
"48 https://iisma.kemdikbud.go.id/info/51-university-of-otago/\n",
"49 https://iisma.kemdikbud.go.id/info/52-keio-university/\n",
"50 https://iisma.kemdikbud.go.id/info/53-universidad-autonoma-de-madrid/\n",
"51 https://iisma.kemdikbud.go.id/info/54-vrije-universiteit-amsterdam/\n",
"52 https://iisma.kemdikbud.go.id/info/55-chulalongkorn-university/\n",
"53 https://iisma.kemdikbud.go.id/info/56-arizona-state-university/\n",
"54 https://iisma.kemdikbud.go.id/info/57-radboud-university/\n",
"55 https://iisma.kemdikbud.go.id/info/58-university-of-sussex/\n",
"56 https://iisma.kemdikbud.go.id/info/59-maastricht-university/\n",
"57 https://iisma.kemdikbud.go.id/info/60-universitat-pompeu-fabra/\n",
"58 https://iisma.kemdikbud.go.id/info/61-university-of-leicester/\n",
"59 https://iisma.kemdikbud.go.id/info/62-victoria-university-of-wellington/\n",
"60 https://iisma.kemdikbud.go.id/info/63-university-of-padua/\n",
"61 https://iisma.kemdikbud.go.id/info/64-university-of-colorado-boulder/\n",
"62 https://iisma.kemdikbud.go.id/info/65-university-of-galway/\n",
"63 https://iisma.kemdikbud.go.id/info/66-university-of-canterbury/\n",
"64 https://iisma.kemdikbud.go.id/info/68-university-of-warsaw/\n",
"65 https://iisma.kemdikbud.go.id/info/69-university-of-tartu/\n",
"66 https://iisma.kemdikbud.go.id/info/71-national-taiwan-university-of-science-and-technology-taiwan-tech/\n",
"67 https://iisma.kemdikbud.go.id/info/72-university-of-pisa/\n",
"68 https://iisma.kemdikbud.go.id/info/73-leiden-university/\n",
"69 https://iisma.kemdikbud.go.id/info/73-middle-east-technical-university/\n",
"70 https://iisma.kemdikbud.go.id/info/74-singapore-management-university/\n",
"71 https://iisma.kemdikbud.go.id/info/75-university-of-szeged/\n",
"72 https://iisma.kemdikbud.go.id/info/76-palacky-university-olomouc/\n",
"73 https://iisma.kemdikbud.go.id/info/77-university-of-zagreb/\n",
"74 https://iisma.kemdikbud.go.id/info/78-vytautas-magnus-university/\n",
"75 https://iisma.kemdikbud.go.id/info/lolos-67-sciences-po/\n"
]
}
],
"source": [
"link_df = pd.read_excel(\"C:/Users/mhani/Downloads/Link Kampus IISMA.xlsx\", header=None)\n",
"link_list = link_df[0].to_list()\n",
"for i in range(len(link_list)):\n",
" print(i, link_list[i])"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"uni_details = pd.DataFrame(columns=[\"ID\", \"Name\", \"Location\", \"Requirements\", \"Period\", \"Statistics\"])\n",
"uni_courses = pd.DataFrame(columns=[\"Univ_ID\", \"Course Name\", \"Details\"])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"for i in range(len(link_list)):\n",
" driver.get(link_list[i])\n",
" name = driver.find_element(By.XPATH, '//*[@class=\"elementor-heading-title elementor-size-default\"]').text\n",
" loc = driver.find_elements(By.XPATH, '//*[@class=\"elementor-widget-container\"]')[2].text\n",
"\n",
" tab_list = driver.find_elements(By.XPATH, '//*[@class=\"elementor-tab-title elementor-tab-desktop-title\"]')\n",
" tab_list[0].click()\n",
" req = driver.find_element(By.ID, 'elementor-tab-content-4502').text\n",
" tab_list[1].click()\n",
" period = driver.find_element(By.ID, 'elementor-tab-content-4503').text\n",
" tab_list[2].click()\n",
" stats = driver.find_element(By.ID, 'elementor-tab-content-4504').text\n",
"\n",
" uni_details.loc[len(uni_details)] = [i+1, name, loc, req, period, stats]\n",
"\n",
" course_list = driver.find_elements(By.XPATH, '//*[@class=\"elementor-toggle-title\"]')\n",
" for j in range(len(course_list)):\n",
" course_list[j].click()\n",
" course_name = course_list[j].text\n",
" detail_content = driver.find_elements(By.XPATH, '//*[@class=\"elementor-tab-content elementor-clearfix elementor-active\"]')[1]\n",
" inner_detail = detail_content.get_attribute('innerHTML')\n",
" clean_detail = re.sub('<[^<]+?>', ' ', inner_detail)\n",
" clean_detail = clean_detail.replace(\" \", \"\")\n",
" course_list[j].click()\n",
" sleep(1)\n",
" uni_courses.loc[len(uni_courses)] = [i+1, course_name, clean_detail]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"uni_courses.to_excel(\"uni_courses.xlsx\", index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|