Sasha commited on
Commit
9c420d3
1 Parent(s): 95b75a7

Cleaning things up a bit

Browse files
data/carbon_df.pkl ADDED
Binary file (7.32 kB). View file
hf-earth.png ADDED
notebooks/APICarbonQuery.ipynb ADDED
@@ -0,0 +1,200 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "id": "c82eb8a8",
7
+ "metadata": {},
8
+ "outputs": [],
9
+ "source": [
10
+ "import json, requests, urllib"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": 2,
16
+ "id": "45a53227",
17
+ "metadata": {},
18
+ "outputs": [],
19
+ "source": [
20
+ "import pandas as pd"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 3,
26
+ "id": "5ee17bd2",
27
+ "metadata": {},
28
+ "outputs": [],
29
+ "source": [
30
+ "response = requests.get(\"https://huggingface.co/api/models?filter=co2_eq_emissions\")"
31
+ ]
32
+ },
33
+ {
34
+ "cell_type": "code",
35
+ "execution_count": 4,
36
+ "id": "805a29d7",
37
+ "metadata": {},
38
+ "outputs": [
39
+ {
40
+ "name": "stdout",
41
+ "output_type": "stream",
42
+ "text": [
43
+ "Out of 78 models, 78 of them reported carbon emissions\n"
44
+ ]
45
+ }
46
+ ],
47
+ "source": [
48
+ "modelcount=0\n",
49
+ "carboncount=0\n",
50
+ "\n",
51
+ "carbon_df = pd.DataFrame(columns=['name','task','carbon'])\n",
52
+ "\n",
53
+ "for model in response.json():\n",
54
+ " modelcount+=1\n",
55
+ " if model['private'] == False:\n",
56
+ " try:\n",
57
+ " readme = urllib.request.urlopen(\"https://huggingface.co/\"+model['modelId']+\"/raw/main/README.md\")\n",
58
+ " for line in readme:\n",
59
+ " decoded_line = line.decode(\"utf-8\")\n",
60
+ " if 'co2_eq_emissions' in decoded_line:\n",
61
+ " carboncount+=1\n",
62
+ " #print(model['modelId'], model['pipeline_tag'], decoded_line.split(\":\")[1])\n",
63
+ " try:\n",
64
+ " carbon_df.at[carboncount,'name'] = str(model['modelId'])\n",
65
+ " carbon_df.at[carboncount,'task'] = str(model['pipeline_tag'])\n",
66
+ " carbon_df.at[carboncount,'carbon'] = float(decoded_line.split(\":\")[1].replace('\\n',''))\n",
67
+ " except:\n",
68
+ " carbon_df.at[carboncount,'name'] = str(model['modelId'])\n",
69
+ " carbon_df.at[carboncount,'task'] = ''\n",
70
+ " carbon_df.at[carboncount,'carbon'] = float(decoded_line.split(\":\")[1].replace('\\n',''))\n",
71
+ " except:\n",
72
+ " continue\n",
73
+ "print(\"Out of \"+str(modelcount)+\" models, \"+str(carboncount)+ \" of them reported carbon emissions\")"
74
+ ]
75
+ },
76
+ {
77
+ "cell_type": "code",
78
+ "execution_count": 5,
79
+ "id": "ce21fde5",
80
+ "metadata": {},
81
+ "outputs": [
82
+ {
83
+ "data": {
84
+ "text/html": [
85
+ "<div>\n",
86
+ "<style scoped>\n",
87
+ " .dataframe tbody tr th:only-of-type {\n",
88
+ " vertical-align: middle;\n",
89
+ " }\n",
90
+ "\n",
91
+ " .dataframe tbody tr th {\n",
92
+ " vertical-align: top;\n",
93
+ " }\n",
94
+ "\n",
95
+ " .dataframe thead th {\n",
96
+ " text-align: right;\n",
97
+ " }\n",
98
+ "</style>\n",
99
+ "<table border=\"1\" class=\"dataframe\">\n",
100
+ " <thead>\n",
101
+ " <tr style=\"text-align: right;\">\n",
102
+ " <th></th>\n",
103
+ " <th>name</th>\n",
104
+ " <th>task</th>\n",
105
+ " <th>carbon</th>\n",
106
+ " </tr>\n",
107
+ " </thead>\n",
108
+ " <tbody>\n",
109
+ " <tr>\n",
110
+ " <th>1</th>\n",
111
+ " <td>Aimendo/autonlp-triage-35248482</td>\n",
112
+ " <td>text-classification</td>\n",
113
+ " <td>7.989145</td>\n",
114
+ " </tr>\n",
115
+ " <tr>\n",
116
+ " <th>2</th>\n",
117
+ " <td>Anorak/nirvana</td>\n",
118
+ " <td>text2text-generation</td>\n",
119
+ " <td>4.214013</td>\n",
120
+ " </tr>\n",
121
+ " <tr>\n",
122
+ " <th>3</th>\n",
123
+ " <td>AryanLala/autonlp-Scientific_Title_Generator-3...</td>\n",
124
+ " <td>text2text-generation</td>\n",
125
+ " <td>137.605741</td>\n",
126
+ " </tr>\n",
127
+ " <tr>\n",
128
+ " <th>4</th>\n",
129
+ " <td>Crasher222/kaggle-comp-test</td>\n",
130
+ " <td>text-classification</td>\n",
131
+ " <td>60.744727</td>\n",
132
+ " </tr>\n",
133
+ " <tr>\n",
134
+ " <th>5</th>\n",
135
+ " <td>Emanuel/autonlp-pos-tag-bosque</td>\n",
136
+ " <td>token-classification</td>\n",
137
+ " <td>6.210727</td>\n",
138
+ " </tr>\n",
139
+ " </tbody>\n",
140
+ "</table>\n",
141
+ "</div>"
142
+ ],
143
+ "text/plain": [
144
+ " name task \\\n",
145
+ "1 Aimendo/autonlp-triage-35248482 text-classification \n",
146
+ "2 Anorak/nirvana text2text-generation \n",
147
+ "3 AryanLala/autonlp-Scientific_Title_Generator-3... text2text-generation \n",
148
+ "4 Crasher222/kaggle-comp-test text-classification \n",
149
+ "5 Emanuel/autonlp-pos-tag-bosque token-classification \n",
150
+ "\n",
151
+ " carbon \n",
152
+ "1 7.989145 \n",
153
+ "2 4.214013 \n",
154
+ "3 137.605741 \n",
155
+ "4 60.744727 \n",
156
+ "5 6.210727 "
157
+ ]
158
+ },
159
+ "execution_count": 5,
160
+ "metadata": {},
161
+ "output_type": "execute_result"
162
+ }
163
+ ],
164
+ "source": [
165
+ "carbon_df.head()"
166
+ ]
167
+ },
168
+ {
169
+ "cell_type": "code",
170
+ "execution_count": 11,
171
+ "id": "fe01a841",
172
+ "metadata": {},
173
+ "outputs": [],
174
+ "source": [
175
+ "carbon_df.to_pickle(\"./carbon_df.pkl\")"
176
+ ]
177
+ }
178
+ ],
179
+ "metadata": {
180
+ "kernelspec": {
181
+ "display_name": "datametrics",
182
+ "language": "python",
183
+ "name": "datametrics"
184
+ },
185
+ "language_info": {
186
+ "codemirror_mode": {
187
+ "name": "ipython",
188
+ "version": 3
189
+ },
190
+ "file_extension": ".py",
191
+ "mimetype": "text/x-python",
192
+ "name": "python",
193
+ "nbconvert_exporter": "python",
194
+ "pygments_lexer": "ipython3",
195
+ "version": "3.8.2"
196
+ }
197
+ },
198
+ "nbformat": 4,
199
+ "nbformat_minor": 5
200
+ }