Alejandro Cremades commited on
Commit
952f04c
0 Parent(s):

first commit

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ data/*
data/data_files_go_here ADDED
File without changes
middleschool-cardlist.ipynb ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "metadata": {},
6
+ "source": [
7
+ "# middleschool-cardlist\n",
8
+ "\n",
9
+ "## Prepare the data\n",
10
+ "\n",
11
+ "Download raw data from [MTGJSON](https://mtgjson.com/) (uncomment and run only once)\n"
12
+ ]
13
+ },
14
+ {
15
+ "cell_type": "code",
16
+ "execution_count": 1,
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "# !cd data\n",
21
+ "# !wget \"https://mtgjson.com/api/v5/AllPrintings.json.bz2\"\n",
22
+ "# !bunzip2 AllPrintings.json.bz2\n",
23
+ "# !cd -\n"
24
+ ]
25
+ },
26
+ {
27
+ "cell_type": "markdown",
28
+ "metadata": {},
29
+ "source": [
30
+ "The Raw data is very large, so let's make JSON files for all relevant sets\n",
31
+ "\n",
32
+ "Note: this cell can take a couple minutes to run\n"
33
+ ]
34
+ },
35
+ {
36
+ "cell_type": "code",
37
+ "execution_count": 2,
38
+ "metadata": {},
39
+ "outputs": [],
40
+ "source": [
41
+ "setlist = ['4ED', 'ICE', 'CHR', 'HML', 'ALL', 'MIR', 'VIS', '5ED',\n",
42
+ " 'WTH', 'POR', 'TMP', 'STH', 'EXO', 'P02', 'USG', 'ULG',\n",
43
+ " '6ED', 'UDS', 'PTK', 'S99', 'MMQ', 'NEM', 'PCY', 'S00',\n",
44
+ " 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
45
+ " 'LGN', 'SCG', 'PDRC', 'PHPR']\n",
46
+ "for set in setlist:\n",
47
+ " command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
48
+ " set + '\\\".cards\\' > data/set_' + set + '.json'\n",
49
+ " !{command}\n"
50
+ ]
51
+ },
52
+ {
53
+ "cell_type": "markdown",
54
+ "metadata": {},
55
+ "source": [
56
+ "Concatenate all set files into `middleschool.json`\n"
57
+ ]
58
+ },
59
+ {
60
+ "cell_type": "code",
61
+ "execution_count": 9,
62
+ "metadata": {},
63
+ "outputs": [],
64
+ "source": [
65
+ "command = \"jq -s add data/set_* > data/middleschool.json\"\n",
66
+ "!{command}\n"
67
+ ]
68
+ },
69
+ {
70
+ "cell_type": "markdown",
71
+ "metadata": {},
72
+ "source": [
73
+ "Create a list with each card's oracle ID, English name, and Japanese name\n"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 12,
79
+ "metadata": {},
80
+ "outputs": [
81
+ {
82
+ "name": "stdout",
83
+ "output_type": "stream",
84
+ "text": [
85
+ "5793 cards found\n",
86
+ " oracle_id name name_ja\n",
87
+ "0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n",
88
+ "0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n",
89
+ "0 c208b959-d0e4-4a9a-8255-2c7cc7596767 Abbey Gargoyles 修道院のガーゴイル\n",
90
+ "0 62e3f285-886c-414e-b4ff-403a7c01c23a Abbey Matron None\n",
91
+ "0 d0e1904e-1a37-41f6-8582-b9ea794bb886 Abduction 誘拐\n",
92
+ " oracle_id name name_ja\n",
93
+ "0 ae8773a3-05f2-4074-9a53-033b0c127235 Zuo Ci, the Mocking Sage 嘲笑する仙人 左慈\n",
94
+ "0 c6eaa147-3566-43a9-999a-d58b877496f5 Zur's Weirding ズアーの運命支配\n",
95
+ "0 ee0f883f-d7c9-4acf-a78f-f733b6f268d3 Zuran Enchanter None\n",
96
+ "0 08cb8a30-9cb4-4517-bee5-8848aa60d1a2 Zuran Orb None\n",
97
+ "0 bc7b90b1-3517-4e5d-9bd8-68b4d8a259fd Zuran Spellcaster None\n"
98
+ ]
99
+ }
100
+ ],
101
+ "source": [
102
+ "import json\n",
103
+ "import pandas as pd\n",
104
+ "\n",
105
+ "with open(\"data/middleschool.json\") as json_data:\n",
106
+ " cards = json.loads(json_data.read())\n",
107
+ "\n",
108
+ "column_names = ['oracle_id', 'name', 'name_ja']\n",
109
+ "middleschool_df = pd.DataFrame(columns=column_names)\n",
110
+ "for card in cards:\n",
111
+ " oracle_id = card['identifiers']['scryfallOracleId']\n",
112
+ " name = card['name']\n",
113
+ " lang_ja = [lang for lang in card['foreignData']\n",
114
+ " if lang['language'] == 'Japanese']\n",
115
+ " if (len(lang_ja) > 0):\n",
116
+ " name_ja = lang_ja[0]['name']\n",
117
+ " else:\n",
118
+ " name_ja = None\n",
119
+ " temporary_df = pd.DataFrame({\n",
120
+ " 'oracle_id': [oracle_id],\n",
121
+ " 'name': [name],\n",
122
+ " 'name_ja': [name_ja]\n",
123
+ " })\n",
124
+ " middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
125
+ "\n",
126
+ "middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
127
+ "middleschool_df = middleschool_df.sort_values(by='name')\n",
128
+ "print(middleschool_df.shape[0], 'cards found')\n",
129
+ "print(middleschool_df.head())\n",
130
+ "print(middleschool_df.tail())\n"
131
+ ]
132
+ },
133
+ {
134
+ "cell_type": "markdown",
135
+ "metadata": {},
136
+ "source": [
137
+ "Exclude all cards banned in Middle School\n"
138
+ ]
139
+ },
140
+ {
141
+ "cell_type": "code",
142
+ "execution_count": 13,
143
+ "metadata": {},
144
+ "outputs": [
145
+ {
146
+ "name": "stdout",
147
+ "output_type": "stream",
148
+ "text": [
149
+ "Cards legal by set: 5793\n",
150
+ "Banned cards: 26\n",
151
+ "Cards legal by set and not banned: 5767\n"
152
+ ]
153
+ }
154
+ ],
155
+ "source": [
156
+ "banlist = [\"Amulet of Quoz\",\n",
157
+ " \"Balance\",\n",
158
+ " \"Brainstorm\",\n",
159
+ " \"Bronze Tablet\",\n",
160
+ " \"Channel\",\n",
161
+ " \"Dark Ritual\",\n",
162
+ " \"Demonic Consultation\",\n",
163
+ " \"Flash\",\n",
164
+ " \"Goblin Recruiter\",\n",
165
+ " \"Imperial Seal\",\n",
166
+ " \"Jeweled Bird\",\n",
167
+ " \"Lion's Eye Diamond\",\n",
168
+ " \"Mana Crypt\",\n",
169
+ " \"Mana Vault\",\n",
170
+ " \"Memory Jar\",\n",
171
+ " \"Mind's Desire\",\n",
172
+ " \"Mind Twist\",\n",
173
+ " \"Rebirth\",\n",
174
+ " \"Strip Mine\",\n",
175
+ " \"Tempest Efreet\",\n",
176
+ " \"Timmerian Fiends\",\n",
177
+ " \"Tolarian Academy\",\n",
178
+ " \"Vampiric Tutor\",\n",
179
+ " \"Windfall\",\n",
180
+ " \"Yawgmoth's Bargain\",\n",
181
+ " \"Yawgmoth's Will\"]\n",
182
+ "print('Cards legal by set:', middleschool_df.shape[0])\n",
183
+ "banned_df = middleschool_df[pd.DataFrame(\n",
184
+ " middleschool_df.name.tolist()).isin(banlist).any(1).values]\n",
185
+ "print('Banned cards:', banned_df.shape[0])\n",
186
+ "middleschool_df = pd.concat(\n",
187
+ " [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
188
+ "print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
189
+ "middleschool_df = middleschool_df.reset_index(drop=True)\n",
190
+ "middleschool_df = middleschool_df[['oracle_id', 'name', 'name_ja']]\n"
191
+ ]
192
+ },
193
+ {
194
+ "cell_type": "markdown",
195
+ "metadata": {},
196
+ "source": [
197
+ "Save the list to a CSV file and a JSON file\n"
198
+ ]
199
+ },
200
+ {
201
+ "cell_type": "code",
202
+ "execution_count": 14,
203
+ "metadata": {},
204
+ "outputs": [],
205
+ "source": [
206
+ "middleschool_df.to_csv('output/middleschool.csv')\n",
207
+ "middleschool_df.to_json('output/middleschool.json')\n"
208
+ ]
209
+ }
210
+ ],
211
+ "metadata": {
212
+ "kernelspec": {
213
+ "display_name": "Python 3.9.12 64-bit",
214
+ "language": "python",
215
+ "name": "python3"
216
+ },
217
+ "language_info": {
218
+ "codemirror_mode": {
219
+ "name": "ipython",
220
+ "version": 3
221
+ },
222
+ "file_extension": ".py",
223
+ "mimetype": "text/x-python",
224
+ "name": "python",
225
+ "nbconvert_exporter": "python",
226
+ "pygments_lexer": "ipython3",
227
+ "version": "3.9.12"
228
+ },
229
+ "orig_nbformat": 4,
230
+ "vscode": {
231
+ "interpreter": {
232
+ "hash": "b0fa6594d8f4cbf19f97940f81e996739fb7646882a419484c72d19e05852a7e"
233
+ }
234
+ }
235
+ },
236
+ "nbformat": 4,
237
+ "nbformat_minor": 2
238
+ }
output/middleschool.csv ADDED
The diff for this file is too large to render. See raw diff
 
output/middleschool.json ADDED
The diff for this file is too large to render. See raw diff