Spaces:
Sleeping
Sleeping
from bs4 import BeautifulSoup | |
import pandas as pd | |
import requests | |
headers = { | |
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0', | |
} | |
def get_html_document(url): | |
response = requests.get(url, headers=headers) | |
return response.text | |
def create_data(): | |
url = "https://www.vlr.gg/" | |
html_document_events = get_html_document(url + "events") | |
soup_events = BeautifulSoup(html_document_events, 'html.parser') | |
events_html = soup_events.find_all("a", {"class": "wf-card mod-flex event-item"}) | |
data = {"Tournament": [], "Match ID": [], "Team A": [], "Team B": [], "Map ID": [], "Map": [], "TA Agents": [], "TB Agents": [], "Result": []} | |
events_id = [] | |
events_name = [] | |
for event_html in events_html: | |
event_id = "" | |
i = 0 | |
for character in event_html["href"]: | |
if i > 6 and i < 11: | |
event_id += character | |
i += 1 | |
else: | |
i += 1 | |
event_name = event_html.find_all("div", {"class": "event-item-title"})[0].text.strip() | |
print("Tournament : " + event_name) | |
if "Champions Tour 2024" not in event_name: | |
continue | |
events_id.append(event_id) | |
events_name.append(event_name) | |
html_document_matches = get_html_document(url + f"event/matches/{event_id}") | |
soup_matches = BeautifulSoup(html_document_matches, 'html.parser') | |
matches_html = soup_matches.find_all("a", {"class": "wf-module-item match-item mod-color mod-left mod-bg-after-striped_purple mod-first"}) + soup_matches.find_all("a", {"class": "wf-module-item match-item mod-color mod-left mod-bg-after-striped_purple"}) | |
matches_id = [] | |
for match_html in matches_html: | |
match_id = "" | |
i = 0 | |
for character in match_html["href"]: | |
if i > 0 and i < 7: | |
match_id += character | |
i += 1 | |
else: | |
i += 1 | |
matches_id.append(match_id) | |
print("Match ID : " + match_id) | |
html_document_match = get_html_document(url + match_id) | |
soup_match = BeautifulSoup(html_document_match, 'html.parser') | |
match_header_vs = soup_match.find_all("div", {"class": "match-header-vs"}) | |
team_a_split = match_header_vs[0].find_all("div", {"class": "match-header-link-name mod-1"})[0].find_all("div", {"class": "wf-title-med"})[0].text.split() | |
team_a = "" | |
for a in team_a_split: | |
team_a = team_a + a + " " | |
print("Team A : " + team_a) | |
team_b_split = match_header_vs[0].find_all("div", {"class": "match-header-link-name mod-2"})[0].find_all("div", {"class": "wf-title-med"})[0].text.split() | |
team_b = "" | |
for b in team_b_split: | |
team_b = team_b + b + " " | |
print("Team B : " + team_b) | |
if team_a == "TBD" or team_b == "TBD": | |
print("not finished") | |
else: | |
maps_html = soup_match.find_all("div", {"class": "vm-stats-gamesnav-item js-map-switch"}) | |
maps_id = [] | |
for map_html in maps_html: | |
map_id = map_html["data-game-id"] | |
print("Map ID : " + map_id) | |
maps_id.append(map_id) | |
html_document_map = get_html_document(url + match_id + "/?game=" + map_id) | |
soup_map = BeautifulSoup(html_document_map, 'html.parser') | |
vm_stats_game = soup_map.find_all("div", {"class": "vm-stats-game mod-active"}) | |
map_html = vm_stats_game[0].find_all("div", {"class": "map"}) | |
map_text = map_html[0].find_all("span", {"style": "position: relative;"})[0].text.split() | |
map = map_text[0] | |
print("Map : " + map) | |
agents_html = vm_stats_game[0].find_all("span", {"class": "stats-sq mod-agent small"}) | |
i = 0 | |
ta_agents = [] | |
tb_agents = [] | |
for agent_html in agents_html: | |
if i <= 4: | |
agent = agent_html.find_all("img")[0]["title"] | |
ta_agents.append(agent) | |
i += 1 | |
else: | |
agent = agent_html.find_all("img")[0]["title"] | |
tb_agents.append(agent) | |
i += 1 | |
ta_agents = sorted(ta_agents) | |
tb_agents = sorted(tb_agents) | |
print("TA Agents : ", ta_agents) | |
print("TB Agents : ", tb_agents) | |
if ta_agents == [] or tb_agents == []: | |
print("not finished") | |
pass | |
else: | |
is_winner = vm_stats_game[0].find_all("div", {"class": "team"})[0].find_all("div", {"class": "score mod-win"}) | |
if is_winner == []: | |
result = "Team B win" | |
else: | |
result = "Team A win" | |
data["Tournament"].append(event_name) | |
data["Match ID"].append(match_id) | |
data["Team A"].append(team_a) | |
data["Team B"].append(team_b) | |
data["Map ID"].append(map_id) | |
data["Map"].append(map) | |
data["TA Agents"].append(ta_agents) | |
data["TB Agents"].append(tb_agents) | |
data["Result"].append(result) | |
print("All data of the map : ", data) | |
df = pd.DataFrame(data=data) | |
df.to_csv("data.csv", sep=",", index=False, encoding="utf-8") | |
print(".csv file downloaded...") |