{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pokemon Attack Scraping Script" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import requests\n", "from bs4 import BeautifulSoup\n", "\n", "\n", "\n", "\n", "physical_moves = \"https://www.serebii.net/attackdex-swsh/physical.shtml\" \n", "special_moves = \"https://www.serebii.net/attackdex-swsh/special.shtml\"\n", "status_moves = \"https://www.serebii.net/attackdex-swsh/other.shtml\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "\n", "\n", "data = requests.get(physical_moves)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "my_data = []\n", " #https://www.kite.com/python/examples/4420/beautifulsoup-parse-an-html-table-and-write-to-a-csv\n", "html = BeautifulSoup(data.text, 'html.parser')\n" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "\n", "soup = html\n", "table = soup.find_all(\"table\")[1]\n", "\n", "output_rows = []\n", "for table_row in table.findAll('tr'):\n", " columns = table_row.findAll('td')\n", " output_row = []\n", " for column in columns:\n", " output_row.append(column.text.strip())\n", " output_rows.append(output_row)\n", "\n" ] }, { "cell_type": "code", "execution_count": 84, "metadata": {}, "outputs": [], "source": [ "moves = pd.DataFrame(output_rows)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [], "source": [ "moves.columns = moves.iloc[0]" ] }, { "cell_type": "code", "execution_count": 86, "metadata": {}, "outputs": [], "source": [ "moves = moves[1:]" ] }, { "cell_type": "code", "execution_count": 87, "metadata": {}, "outputs": [], "source": [ "moves = moves[moves.Effect.apply(lambda x: \"This move can't be used\" not in x)]" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameTypeCat.PPAtt.Acc.Effect
1Accelerock2040100The user smashes into the target at high speed...
3Acrobatics1555100The user nimbly strikes the target. If the use...
4Aerial Ace2060101The user confounds the target with speed, then...
6Anchor Shot2080100The user entangles the target with its anchor ...
7Aqua Jet2040100The user lunges at the target at a speed that ...
........................
381Wood Hammer15120100The user slams its rugged body into the target...
382Wrap201590A long body, vines, or the like are used to wr...
383X-Scissor1580100The user slashes at the target by crossing its...
384Zen Headbutt158090The user focuses its willpower to its head and...
385Zing Zap1080100A strong electric blast crashes down on the ta...
\n", "

322 rows × 7 columns

\n", "
" ], "text/plain": [ "0 Name Type Cat. PP Att. Acc. \\\n", "1 Accelerock 20 40 100 \n", "3 Acrobatics 15 55 100 \n", "4 Aerial Ace 20 60 101 \n", "6 Anchor Shot 20 80 100 \n", "7 Aqua Jet 20 40 100 \n", ".. ... ... ... .. ... ... \n", "381 Wood Hammer 15 120 100 \n", "382 Wrap 20 15 90 \n", "383 X-Scissor 15 80 100 \n", "384 Zen Headbutt 15 80 90 \n", "385 Zing Zap 10 80 100 \n", "\n", "0 Effect \n", "1 The user smashes into the target at high speed... \n", "3 The user nimbly strikes the target. If the use... \n", "4 The user confounds the target with speed, then... \n", "6 The user entangles the target with its anchor ... \n", "7 The user lunges at the target at a speed that ... \n", ".. ... \n", "381 The user slams its rugged body into the target... \n", "382 A long body, vines, or the like are used to wr... \n", "383 The user slashes at the target by crossing its... \n", "384 The user focuses its willpower to its head and... \n", "385 A strong electric blast crashes down on the ta... \n", "\n", "[322 rows x 7 columns]" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "moves" ] }, { "cell_type": "code", "execution_count": 95, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Removing some old moves... Found 386\n", "Removing some old moves... Found 237\n", "Removing some old moves... Found 260\n" ] } ], "source": [ "def create_moves_df(url):\n", " # given a url, scrapes the moves table and turns it into a pandas df\n", " # works on Serebii's moves attackdex\n", " # removes moves that no longer are usable in that generation\n", " data = requests.get(url)\n", " soup = BeautifulSoup(data.text, 'html.parser')\n", " \n", " # first table is the table of contents for the moves, we don't want that\n", " \n", " # following code grabbed from #https://www.kite.com/python/examples/4420/beautifulsoup-parse-an-html-table-and-write-to-a-csv\n", " table = soup.find_all(\"table\")[1]\n", "\n", " output_rows = []\n", " for table_row in table.findAll('tr'):\n", " columns = table_row.findAll('td')\n", " output_row = []\n", " for column in columns:\n", " output_row.append(column.text.strip())\n", " output_rows.append(output_row)\n", " \n", " moves = pd.DataFrame(output_rows)\n", " \n", " # move first row to column, and drop row\n", " moves.columns = moves.iloc[0]\n", " moves = moves[1:].reset_index(drop = True)\n", " \n", " # drop moves that don't exist in this gen, but also count them\n", " unusable_moves = len(moves.Effect.apply(lambda x: \"This move can't be used\" in x))\n", " print(\"Removing some old moves... Found \", unusable_moves)\n", " moves = moves[moves.Effect.apply(lambda x: \"This move can't be used\" not in x)]\n", " moves = moves.replace(\"--\", 0)\n", " return moves\n", "\n", "\n", "\n", "\n", "physical_moves = \"https://www.serebii.net/attackdex-swsh/physical.shtml\" \n", "special_moves = \"https://www.serebii.net/attackdex-swsh/special.shtml\"\n", "status_moves = \"https://www.serebii.net/attackdex-swsh/other.shtml\"\n", "\n", "\n", "physical_df = create_moves_df(physical_moves)\n", "special_df = create_moves_df(special_moves)\n", "status_df = create_moves_df(status_moves)\n", "\n", "moves = pd.concat([physical_df, special_df, status_df])\n", "moves.to_csv(\"data/moves.csv\")" ] }, { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameTypeCat.PPAtt.Acc.Effect
0Accelerock2040100The user smashes into the target at high speed...
2Acrobatics1555100The user nimbly strikes the target. If the use...
3Aerial Ace2060101The user confounds the target with speed, then...
5Anchor Shot2080100The user entangles the target with its anchor ...
6Aqua Jet2040100The user lunges at the target at a speed that ...
........................
255Withdraw400101The user withdraws its body into its hard shel...
256Wonder Room100101The user creates a bizarre area in which Pokém...
257Work Up300101The user is roused, and its Attack and Sp. Atk...
258Worry Seed100100A seed that causes worry is planted on the tar...
259Yawn100101The user lets loose a huge yawn that lulls the...
\n", "

743 rows × 7 columns

\n", "
" ], "text/plain": [ "0 Name Type Cat. PP Att. Acc. \\\n", "0 Accelerock 20 40 100 \n", "2 Acrobatics 15 55 100 \n", "3 Aerial Ace 20 60 101 \n", "5 Anchor Shot 20 80 100 \n", "6 Aqua Jet 20 40 100 \n", ".. ... ... ... .. ... ... \n", "255 Withdraw 40 0 101 \n", "256 Wonder Room 10 0 101 \n", "257 Work Up 30 0 101 \n", "258 Worry Seed 10 0 100 \n", "259 Yawn 10 0 101 \n", "\n", "0 Effect \n", "0 The user smashes into the target at high speed... \n", "2 The user nimbly strikes the target. If the use... \n", "3 The user confounds the target with speed, then... \n", "5 The user entangles the target with its anchor ... \n", "6 The user lunges at the target at a speed that ... \n", ".. ... \n", "255 The user withdraws its body into its hard shel... \n", "256 The user creates a bizarre area in which Pokém... \n", "257 The user is roused, and its Attack and Sp. Atk... \n", "258 A seed that causes worry is planted on the tar... \n", "259 The user lets loose a huge yawn that lulls the... \n", "\n", "[743 rows x 7 columns]" ] }, "execution_count": 96, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.11" } }, "nbformat": 4, "nbformat_minor": 4 }