VCT-map-predictor / vct_data.py
ThorkildFregi's picture
Upload 7 files
b25e82e verified
from bs4 import BeautifulSoup
import pandas as pd
import requests
headers = {
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:35.0) Gecko/20100101 Firefox/35.0',
}
def get_html_document(url):
response = requests.get(url, headers=headers)
return response.text
def create_data():
url = "https://www.vlr.gg/"
html_document_events = get_html_document(url + "events")
soup_events = BeautifulSoup(html_document_events, 'html.parser')
events_html = soup_events.find_all("a", {"class": "wf-card mod-flex event-item"})
data = {"Tournament": [], "Match ID": [], "Team A": [], "Team B": [], "Map ID": [], "Map": [], "TA Agents": [], "TB Agents": [], "Result": []}
events_id = []
events_name = []
for event_html in events_html:
event_id = ""
i = 0
for character in event_html["href"]:
if i > 6 and i < 11:
event_id += character
i += 1
else:
i += 1
event_name = event_html.find_all("div", {"class": "event-item-title"})[0].text.strip()
print("Tournament : " + event_name)
if "Champions Tour 2024" not in event_name:
continue
events_id.append(event_id)
events_name.append(event_name)
html_document_matches = get_html_document(url + f"event/matches/{event_id}")
soup_matches = BeautifulSoup(html_document_matches, 'html.parser')
matches_html = soup_matches.find_all("a", {"class": "wf-module-item match-item mod-color mod-left mod-bg-after-striped_purple mod-first"}) + soup_matches.find_all("a", {"class": "wf-module-item match-item mod-color mod-left mod-bg-after-striped_purple"})
matches_id = []
for match_html in matches_html:
match_id = ""
i = 0
for character in match_html["href"]:
if i > 0 and i < 7:
match_id += character
i += 1
else:
i += 1
matches_id.append(match_id)
print("Match ID : " + match_id)
html_document_match = get_html_document(url + match_id)
soup_match = BeautifulSoup(html_document_match, 'html.parser')
match_header_vs = soup_match.find_all("div", {"class": "match-header-vs"})
team_a_split = match_header_vs[0].find_all("div", {"class": "match-header-link-name mod-1"})[0].find_all("div", {"class": "wf-title-med"})[0].text.split()
team_a = ""
for a in team_a_split:
team_a = team_a + a + " "
print("Team A : " + team_a)
team_b_split = match_header_vs[0].find_all("div", {"class": "match-header-link-name mod-2"})[0].find_all("div", {"class": "wf-title-med"})[0].text.split()
team_b = ""
for b in team_b_split:
team_b = team_b + b + " "
print("Team B : " + team_b)
if team_a == "TBD" or team_b == "TBD":
print("not finished")
else:
maps_html = soup_match.find_all("div", {"class": "vm-stats-gamesnav-item js-map-switch"})
maps_id = []
for map_html in maps_html:
map_id = map_html["data-game-id"]
print("Map ID : " + map_id)
maps_id.append(map_id)
html_document_map = get_html_document(url + match_id + "/?game=" + map_id)
soup_map = BeautifulSoup(html_document_map, 'html.parser')
vm_stats_game = soup_map.find_all("div", {"class": "vm-stats-game mod-active"})
map_html = vm_stats_game[0].find_all("div", {"class": "map"})
map_text = map_html[0].find_all("span", {"style": "position: relative;"})[0].text.split()
map = map_text[0]
print("Map : " + map)
agents_html = vm_stats_game[0].find_all("span", {"class": "stats-sq mod-agent small"})
i = 0
ta_agents = []
tb_agents = []
for agent_html in agents_html:
if i <= 4:
agent = agent_html.find_all("img")[0]["title"]
ta_agents.append(agent)
i += 1
else:
agent = agent_html.find_all("img")[0]["title"]
tb_agents.append(agent)
i += 1
ta_agents = sorted(ta_agents)
tb_agents = sorted(tb_agents)
print("TA Agents : ", ta_agents)
print("TB Agents : ", tb_agents)
if ta_agents == [] or tb_agents == []:
print("not finished")
pass
else:
is_winner = vm_stats_game[0].find_all("div", {"class": "team"})[0].find_all("div", {"class": "score mod-win"})
if is_winner == []:
result = "Team B win"
else:
result = "Team A win"
data["Tournament"].append(event_name)
data["Match ID"].append(match_id)
data["Team A"].append(team_a)
data["Team B"].append(team_b)
data["Map ID"].append(map_id)
data["Map"].append(map)
data["TA Agents"].append(ta_agents)
data["TB Agents"].append(tb_agents)
data["Result"].append(result)
print("All data of the map : ", data)
df = pd.DataFrame(data=data)
df.to_csv("data.csv", sep=",", index=False, encoding="utf-8")
print(".csv file downloaded...")