{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pokemon Attack Scraping Script" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "\n", "\n", "\n", "physical_moves = \"https://www.serebii.net/attackdex-swsh/physical.shtml\" \n", "special_moves = \"https://www.serebii.net/attackdex-swsh/special.shtml\"\n", "status_moves = \"https://www.serebii.net/attackdex-swsh/other.shtml\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "data = requests.get(physical_moves)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "my_data = []\n", " #https://www.kite.com/python/examples/4420/beautifulsoup-parse-an-html-table-and-write-to-a-csv\n", "html = BeautifulSoup(data.text, 'html.parser')\n" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "\n", "soup = html\n", "table = soup.find_all(\"table\")[1]\n", "\n", "output_rows = []\n", "for table_row in table.findAll('tr'):\n", " columns = table_row.findAll('td')\n", " output_row = []\n", " for column in columns:\n", " output_row.append(column.text.strip())\n", " output_rows.append(output_row)\n", "\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "moves = pd.DataFrame(output_rows)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "moves.columns = moves.iloc[0]" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "moves = moves[1:]" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "moves = moves[moves.Effect.apply(lambda x: \"This move can't be used\" not in x)]" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameTypeCat.PPAtt.Acc.Effect
1Accelerock2040100The user smashes into the target at high speed...
3Acrobatics1555100The user nimbly strikes the target. If the use...
4Aerial Ace2060101The user confounds the target with speed, then...
6Anchor Shot2080100The user entangles the target with its anchor ...
7Aqua Jet2040100The user lunges at the target at a speed that ...
........................
381Wood Hammer15120100The user slams its rugged body into the target...
382Wrap201590A long body, vines, or the like are used to wr...
383X-Scissor1580100The user slashes at the target by crossing its...
384Zen Headbutt158090The user focuses its willpower to its head and...
385Zing Zap1080100A strong electric blast crashes down on the ta...
\n", "

322 rows × 7 columns

\n", "
" ], "text/plain": [ "0 Name Type Cat. PP Att. Acc. \\\n", "1 Accelerock 20 40 100 \n", "3 Acrobatics 15 55 100 \n", "4 Aerial Ace 20 60 101 \n", "6 Anchor Shot 20 80 100 \n", "7 Aqua Jet 20 40 100 \n", ".. ... ... ... .. ... ... \n", "381 Wood Hammer 15 120 100 \n", "382 Wrap 20 15 90 \n", "383 X-Scissor 15 80 100 \n", "384 Zen Headbutt 15 80 90 \n", "385 Zing Zap 10 80 100 \n", "\n", "0 Effect \n", "1 The user smashes into the target at high speed... \n", "3 The user nimbly strikes the target. If the use... \n", "4 The user confounds the target with speed, then... \n", "6 The user entangles the target with its anchor ... \n", "7 The user lunges at the target at a speed that ... \n", ".. ... \n", "381 The user slams its rugged body into the target... \n", "382 A long body, vines, or the like are used to wr... \n", "383 The user slashes at the target by crossing its... \n", "384 The user focuses its willpower to its head and... \n", "385 A strong electric blast crashes down on the ta... \n", "\n", "[322 rows x 7 columns]" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "moves" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Removing some old moves... Found 386\n", "Removing some old moves... Found 237\n", "Removing some old moves... Found 260\n" ] } ], "source": [ "def create_moves_df(url):\n", " # given a url, scrapes the moves table and turns it into a pandas df\n", " # works on Serebii's moves attackdex\n", " # removes moves that no longer are usable in that generation\n", " data = requests.get(url)\n", " soup = BeautifulSoup(data.text, 'html.parser')\n", " \n", " # first table is the table of contents for the moves, we don't want that\n", " \n", " # following code grabbed from #https://www.kite.com/python/examples/4420/beautifulsoup-parse-an-html-table-and-write-to-a-csv\n", " table = soup.find_all(\"table\")[1]\n", "\n", " output_rows = []\n", " for table_row in table.findAll('tr'):\n", " columns = table_row.findAll('td')\n", " output_row = []\n", " for column in columns:\n", " output_row.append(column.text.strip())\n", " output_rows.append(output_row)\n", " \n", " moves = pd.DataFrame(output_rows)\n", " \n", " # move first row to column, and drop row\n", " moves.columns = moves.iloc[0]\n", " moves = moves[1:].reset_index(drop = True)\n", " \n", " # drop moves that don't exist in this gen, but also count them\n", " unusable_moves = len(moves.Effect.apply(lambda x: \"This move can't be used\" in x))\n", " print(\"Removing some old moves... Found \", unusable_moves)\n", " moves = moves[moves.Effect.apply(lambda x: \"This move can't be used\" not in x)]\n", " return moves\n", "\n", "\n", "\n", "\n", "physical_moves = \"https://www.serebii.net/attackdex-swsh/physical.shtml\" \n", "special_moves = \"https://www.serebii.net/attackdex-swsh/special.shtml\"\n", "status_moves = \"https://www.serebii.net/attackdex-swsh/other.shtml\"\n", "\n", "\n", "physical_df = create_moves_df(physical_moves)\n", "special_df = create_moves_df(special_moves)\n", "status_df = create_moves_df(status_moves)\n", "\n", "moves = pd.concat([physical_df, special_df, status_df])\n" ] }, { "cell_type": "code", "execution_count": 93, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "743" ] }, "execution_count": 93, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(moves)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.3" } }, "nbformat": 4, "nbformat_minor": 4 }