Spaces:
Running
Running
Alejandro Cremades
commited on
Commit
•
dd85b1e
1
Parent(s):
106cfa9
Bonus, comments
Browse files- middleschool-cardlist.ipynb +27 -9
middleschool-cardlist.ipynb
CHANGED
@@ -44,6 +44,7 @@
|
|
44 |
" 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
|
45 |
" 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
|
46 |
"for set in setlist:\n",
|
|
|
47 |
" command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
|
48 |
" set + '\\\".cards\\' > data/set_' + set + '.json'\n",
|
49 |
" !{command}\n"
|
@@ -75,19 +76,15 @@
|
|
75 |
},
|
76 |
{
|
77 |
"cell_type": "code",
|
78 |
-
"execution_count":
|
79 |
"metadata": {},
|
80 |
"outputs": [
|
81 |
{
|
82 |
"name": "stdout",
|
83 |
"output_type": "stream",
|
84 |
"text": [
|
85 |
-
" oracle_id name name_ja\n",
|
86 |
-
"0 5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982 Arc Lightning 弧状の稲妻\n",
|
87 |
-
"0 5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982 Arc Lightning None\n",
|
88 |
-
" oracle_id name name_ja\n",
|
89 |
-
"0 5acc8b39-3c3e-4012-8cfd-ac3c2c4ca982 Arc Lightning 弧状の稲妻\n",
|
90 |
"5800 cards found\n",
|
|
|
91 |
" oracle_id name name_ja\n",
|
92 |
"0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n",
|
93 |
"0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n",
|
@@ -110,6 +107,7 @@
|
|
110 |
"with open(\"data/middleschool.json\") as json_data:\n",
|
111 |
" cards = json.loads(json_data.read())\n",
|
112 |
"\n",
|
|
|
113 |
"column_names = ['oracle_id', 'name', 'name_ja']\n",
|
114 |
"middleschool_df = pd.DataFrame(columns=column_names)\n",
|
115 |
"for card in cards:\n",
|
@@ -117,6 +115,7 @@
|
|
117 |
" name = card['name']\n",
|
118 |
" lang_ja = [lang for lang in card['foreignData']\n",
|
119 |
" if lang['language'] == 'Japanese']\n",
|
|
|
120 |
" if (len(lang_ja) > 0):\n",
|
121 |
" name_ja = lang_ja[0]['name']\n",
|
122 |
" else:\n",
|
@@ -128,20 +127,22 @@
|
|
128 |
" })\n",
|
129 |
" middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
|
130 |
"\n",
|
|
|
131 |
"middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
|
132 |
-
"
|
133 |
"middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
|
134 |
-
"print(middleschool_df[middleschool_df['name'] == 'Arc Lightning'])\n",
|
135 |
"print(middleschool_df.shape[0], 'cards found')\n",
|
|
|
136 |
"print(middleschool_df.head())\n",
|
137 |
"print(middleschool_df.tail())\n"
|
138 |
]
|
139 |
},
|
140 |
{
|
|
|
141 |
"cell_type": "markdown",
|
142 |
"metadata": {},
|
143 |
"source": [
|
144 |
-
"
|
145 |
]
|
146 |
},
|
147 |
{
|
@@ -169,11 +170,16 @@
|
|
169 |
"def find_japanese_name(name):\n",
|
170 |
" url = 'http://whisper.wisdom-guild.net/search.php?q=' + name\n",
|
171 |
" r = session.get(url)\n",
|
|
|
172 |
" title = r.html.find('title')[0].text\n",
|
|
|
173 |
" idx = title.find(name)\n",
|
|
|
|
|
174 |
" if idx == 0:\n",
|
175 |
" return None\n",
|
176 |
" else:\n",
|
|
|
177 |
" name_ja = title[0:idx - 1]\n",
|
178 |
" return name_ja\n",
|
179 |
"\n",
|
@@ -192,6 +198,14 @@
|
|
192 |
"print(name, '→', find_japanese_name(name))\n"
|
193 |
]
|
194 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
{
|
196 |
"cell_type": "code",
|
197 |
"execution_count": 19,
|
@@ -235,9 +249,13 @@
|
|
235 |
" \"Yawgmoth's Bargain\",\n",
|
236 |
" \"Yawgmoth's Will\"]\n",
|
237 |
"print('Cards legal by set:', middleschool_df.shape[0])\n",
|
|
|
238 |
"banned_df = middleschool_df[pd.DataFrame(\n",
|
239 |
" middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
|
240 |
"print('Banned cards:', banned_df.shape[0])\n",
|
|
|
|
|
|
|
241 |
"middleschool_df = pd.concat(\n",
|
242 |
" [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
|
243 |
"print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
|
|
|
44 |
" 'INV', 'PLS', '7ED', 'APC', 'ODY', 'TOR', 'JUD', 'ONS',\n",
|
45 |
" 'LGN', 'SCG', 'PDRC', 'PHPR', 'ATH', 'BRB', 'BTD', 'DKM']\n",
|
46 |
"for set in setlist:\n",
|
47 |
+
" # Write a separate JSON document for each Middle School legal set\n",
|
48 |
" command = 'cat data/AllPrintings.json | jq \\'.data.\\\"' + \\\n",
|
49 |
" set + '\\\".cards\\' > data/set_' + set + '.json'\n",
|
50 |
" !{command}\n"
|
|
|
76 |
},
|
77 |
{
|
78 |
"cell_type": "code",
|
79 |
+
"execution_count": 56,
|
80 |
"metadata": {},
|
81 |
"outputs": [
|
82 |
{
|
83 |
"name": "stdout",
|
84 |
"output_type": "stream",
|
85 |
"text": [
|
|
|
|
|
|
|
|
|
|
|
86 |
"5800 cards found\n",
|
87 |
+
"These are the first and last 5 cards\n",
|
88 |
" oracle_id name name_ja\n",
|
89 |
"0 8adbba6e-03ef-4278-aec5-8a4496b377a8 Abandon Hope 断念\n",
|
90 |
"0 5a70ccfa-d12d-4e62-a1a4-f05cda2fd442 Abandoned Outpost 見捨てられた前哨地\n",
|
|
|
107 |
"with open(\"data/middleschool.json\") as json_data:\n",
|
108 |
" cards = json.loads(json_data.read())\n",
|
109 |
"\n",
|
110 |
+
"# Create a pandas DataFrame with all cards from all legal sets\n",
|
111 |
"column_names = ['oracle_id', 'name', 'name_ja']\n",
|
112 |
"middleschool_df = pd.DataFrame(columns=column_names)\n",
|
113 |
"for card in cards:\n",
|
|
|
115 |
" name = card['name']\n",
|
116 |
" lang_ja = [lang for lang in card['foreignData']\n",
|
117 |
" if lang['language'] == 'Japanese']\n",
|
118 |
+
" # Some cards do not have a Japanese name\n",
|
119 |
" if (len(lang_ja) > 0):\n",
|
120 |
" name_ja = lang_ja[0]['name']\n",
|
121 |
" else:\n",
|
|
|
127 |
" })\n",
|
128 |
" middleschool_df = pd.concat([middleschool_df, temporary_df])\n",
|
129 |
"\n",
|
130 |
+
"# For cards with multiple occurrences, put the rows that have the Japanese name on top\n",
|
131 |
"middleschool_df = middleschool_df.sort_values(by=['name', 'name_ja'])\n",
|
132 |
+
"# For cards with multiple occurrences, delete all rows except for the top one\n",
|
133 |
"middleschool_df = middleschool_df.drop_duplicates(subset=['oracle_id'])\n",
|
|
|
134 |
"print(middleschool_df.shape[0], 'cards found')\n",
|
135 |
+
"print('These are the first and last 5 cards')\n",
|
136 |
"print(middleschool_df.head())\n",
|
137 |
"print(middleschool_df.tail())\n"
|
138 |
]
|
139 |
},
|
140 |
{
|
141 |
+
"attachments": {},
|
142 |
"cell_type": "markdown",
|
143 |
"metadata": {},
|
144 |
"source": [
|
145 |
+
"Find Japanese names for cards that were not released in Japanese in Middle School legal sets"
|
146 |
]
|
147 |
},
|
148 |
{
|
|
|
170 |
"def find_japanese_name(name):\n",
|
171 |
" url = 'http://whisper.wisdom-guild.net/search.php?q=' + name\n",
|
172 |
" r = session.get(url)\n",
|
173 |
+
" # Find the text on the <title> element in the HTML document\n",
|
174 |
" title = r.html.find('title')[0].text\n",
|
175 |
+
" # Find the position of the English card name within the title\n",
|
176 |
" idx = title.find(name)\n",
|
177 |
+
" # The Japanese name should be before the English name, \n",
|
178 |
+
" # so if idx is 0, there is no Japanese name\n",
|
179 |
" if idx == 0:\n",
|
180 |
" return None\n",
|
181 |
" else:\n",
|
182 |
+
" # Take only the Japanese name from the title\n",
|
183 |
" name_ja = title[0:idx - 1]\n",
|
184 |
" return name_ja\n",
|
185 |
"\n",
|
|
|
198 |
"print(name, '→', find_japanese_name(name))\n"
|
199 |
]
|
200 |
},
|
201 |
+
{
|
202 |
+
"attachments": {},
|
203 |
+
"cell_type": "markdown",
|
204 |
+
"metadata": {},
|
205 |
+
"source": [
|
206 |
+
"Exclude all cards banned in Middle School"
|
207 |
+
]
|
208 |
+
},
|
209 |
{
|
210 |
"cell_type": "code",
|
211 |
"execution_count": 19,
|
|
|
249 |
" \"Yawgmoth's Bargain\",\n",
|
250 |
" \"Yawgmoth's Will\"]\n",
|
251 |
"print('Cards legal by set:', middleschool_df.shape[0])\n",
|
252 |
+
"# Find the rows with the banned cards\n",
|
253 |
"banned_df = middleschool_df[pd.DataFrame(\n",
|
254 |
" middleschool_df.name.tolist()).isin(banlist).any(axis=1).values]\n",
|
255 |
"print('Banned cards:', banned_df.shape[0])\n",
|
256 |
+
"# Append the banned cards to the main Middle School DataFrame,\n",
|
257 |
+
"# then remove any rows that appear twice,\n",
|
258 |
+
"# effectively leaving only the legal cards\n",
|
259 |
"middleschool_df = pd.concat(\n",
|
260 |
" [middleschool_df, banned_df]).drop_duplicates(keep=False)\n",
|
261 |
"print('Cards legal by set and not banned:', middleschool_df.shape[0])\n",
|