{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "c82eb8a8", "metadata": {}, "outputs": [], "source": [ "import json, requests, urllib" ] }, { "cell_type": "code", "execution_count": 2, "id": "45a53227", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 3, "id": "5ee17bd2", "metadata": {}, "outputs": [], "source": [ "response = requests.get(\"https://huggingface.co/api/models?filter=co2_eq_emissions\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "805a29d7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Out of 78 models, 78 of them reported carbon emissions\n" ] } ], "source": [ "modelcount=0\n", "carboncount=0\n", "\n", "carbon_df = pd.DataFrame(columns=['name','task','carbon'])\n", "\n", "for model in response.json():\n", " modelcount+=1\n", " if model['private'] == False:\n", " try:\n", " readme = urllib.request.urlopen(\"https://huggingface.co/\"+model['modelId']+\"/raw/main/README.md\")\n", " for line in readme:\n", " decoded_line = line.decode(\"utf-8\")\n", " if 'co2_eq_emissions' in decoded_line:\n", " carboncount+=1\n", " #print(model['modelId'], model['pipeline_tag'], decoded_line.split(\":\")[1])\n", " try:\n", " carbon_df.at[carboncount,'name'] = str(model['modelId'])\n", " carbon_df.at[carboncount,'task'] = str(model['pipeline_tag'])\n", " carbon_df.at[carboncount,'carbon'] = float(decoded_line.split(\":\")[1].replace('\\n',''))\n", " except:\n", " carbon_df.at[carboncount,'name'] = str(model['modelId'])\n", " carbon_df.at[carboncount,'task'] = ''\n", " carbon_df.at[carboncount,'carbon'] = float(decoded_line.split(\":\")[1].replace('\\n',''))\n", " except:\n", " continue\n", "print(\"Out of \"+str(modelcount)+\" models, \"+str(carboncount)+ \" of them reported carbon emissions\")" ] }, { "cell_type": "code", "execution_count": 5, "id": "ce21fde5", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
nametaskcarbon
1Aimendo/autonlp-triage-35248482text-classification7.989145
2Anorak/nirvanatext2text-generation4.214013
3AryanLala/autonlp-Scientific_Title_Generator-3...text2text-generation137.605741
4Crasher222/kaggle-comp-testtext-classification60.744727
5Emanuel/autonlp-pos-tag-bosquetoken-classification6.210727
\n", "
" ], "text/plain": [ " name task \\\n", "1 Aimendo/autonlp-triage-35248482 text-classification \n", "2 Anorak/nirvana text2text-generation \n", "3 AryanLala/autonlp-Scientific_Title_Generator-3... text2text-generation \n", "4 Crasher222/kaggle-comp-test text-classification \n", "5 Emanuel/autonlp-pos-tag-bosque token-classification \n", "\n", " carbon \n", "1 7.989145 \n", "2 4.214013 \n", "3 137.605741 \n", "4 60.744727 \n", "5 6.210727 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "carbon_df.head()" ] }, { "cell_type": "code", "execution_count": 11, "id": "fe01a841", "metadata": {}, "outputs": [], "source": [ "carbon_df.to_pickle(\"./carbon_df.pkl\")" ] } ], "metadata": { "kernelspec": { "display_name": "datametrics", "language": "python", "name": "datametrics" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.2" } }, "nbformat": 4, "nbformat_minor": 5 }