pythia-12b / EleutherAI_pythia-12b.json

add AIBOM

56ce20c verified about 1 month ago

7.97 kB

	{
	"bomFormat": "CycloneDX",
	"specVersion": "1.6",
	"serialNumber": "urn:uuid:e33947f8-aae7-4995-adeb-d36d728818aa",
	"version": 1,
	"metadata": {
	"timestamp": "2025-10-07T08:19:30.409870+00:00",
	"component": {
	"type": "machine-learning-model",
	"bom-ref": "EleutherAI/pythia-12b-e54a9a31-ff47-5eb1-8c49-d06c73636dc7",
	"licenses": [
	{
	"license": {
	"id": "Apache-2.0",
	"url": "https://spdx.org/licenses/Apache-2.0.html"
	}
	}
	],
	"externalReferences": [
	{
	"url": "https://huggingface.co/EleutherAI/pythia-12b",
	"type": "documentation"
	}
	],
	"modelCard": {
	"modelParameters": {
	"datasets": [
	{
	"ref": "EleutherAI/pile-671dd665-c5ab-5ddc-8ee4-6f2955b81787"
	}
	],
	"task": "text-generation",
	"architectureFamily": "gpt_neox",
	"modelArchitecture": "GPTNeoXForCausalLM"
	},
	"properties": [
	{
	"name": "library_name",
	"value": "transformers"
	}
	],
	"consideration": {
	"useCases": "## Uses and Limitations\n"
	}
	},
	"name": "EleutherAI/pythia-12b",
	"authors": [
	{
	"name": "EleutherAI"
	}
	],
	"description": "## Model Details\n\n- Developed by: [EleutherAI](http://eleuther.ai)\n- Model type: Transformer-based Language Model\n- Language: English\n- Learn more: [Pythia's GitHub repository](https://github.com/EleutherAI/pythia)\nfor training procedure, config files, and details on how to use.\n[See paper](https://arxiv.org/pdf/2304.01373.pdf) for more evals and implementation\ndetails.\n- Library: [GPT-NeoX](https://github.com/EleutherAI/gpt-neox)\n- License: Apache 2.0\n- Contact: to ask questions about this model, join the [EleutherAI\nDiscord](https://discord.gg/zBGx3azzUn), and post them in `#release-discussion`.\nPlease read the existing Pythia documentation before asking about it in the\nEleutherAI Discord. For general correspondence: [contact@eleuther.\nai](mailto:contact@eleuther.ai).\n\n<figure>\n\n\| Pythia model \| Non-Embedding Params \| Layers \| Model Dim \| Heads \| Batch Size \| Learning Rate \| Equivalent Models \|\n\| -----------: \| -------------------: \| :----: \| :-------: \| :---: \| :--------: \| :-------------------: \| :--------------------: \|\n\| 70M \| 18,915,328 \| 6 \| 512 \| 8 \| 2M \| 1.0 x 10<sup>-3</sup> \| \u2014 \|\n\| 160M \| 85,056,000 \| 12 \| 768 \| 12 \| 2M \| 6.0 x 10<sup>-4</sup> \| GPT-Neo 125M, OPT-125M \|\n\| 410M \| 302,311,424 \| 24 \| 1024 \| 16 \| 2M \| 3.0 x 10<sup>-4</sup> \| OPT-350M \|\n\| 1.0B \| 805,736,448 \| 16 \| 2048 \| 8 \| 2M \| 3.0 x 10<sup>-4</sup> \| \u2014 \|\n\| 1.4B \| 1,208,602,624 \| 24 \| 2048 \| 16 \| 2M \| 2.0 x 10<sup>-4</sup> \| GPT-Neo 1.3B, OPT-1.3B \|\n\| 2.8B \| 2,517,652,480 \| 32 \| 2560 \| 32 \| 2M \| 1.6 x 10<sup>-4</sup> \| GPT-Neo 2.7B, OPT-2.7B \|\n\| 6.9B \| 6,444,163,072 \| 32 \| 4096 \| 32 \| 2M \| 1.2 x 10<sup>-4</sup> \| OPT-6.7B \|\n\| 12B \| 11,327,027,200 \| 36 \| 5120 \| 40 \| 2M \| 1.2 x 10<sup>-4</sup> \| \u2014 \|\n<figcaption>Engineering details for the <i>Pythia Suite</i>. Deduped and\nnon-deduped models of a given size have the same hyperparameters. \u201cEquivalent\u201d\nmodels have <b>exactly</b> the same architecture, and the same number of\nnon-embedding parameters.</figcaption>\n</figure>\n",
	"tags": [
	"transformers",
	"pytorch",
	"safetensors",
	"gpt_neox",
	"text-generation",
	"causal-lm",
	"pythia",
	"en",
	"dataset:EleutherAI/pile",
	"arxiv:2304.01373",
	"arxiv:2101.00027",
	"arxiv:2201.07311",
	"license:apache-2.0",
	"autotrain_compatible",
	"text-generation-inference",
	"endpoints_compatible",
	"region:us"
	]
	}
	},
	"components": [
	{
	"type": "data",
	"bom-ref": "EleutherAI/pile-671dd665-c5ab-5ddc-8ee4-6f2955b81787",
	"name": "EleutherAI/pile",
	"data": [
	{
	"type": "dataset",
	"bom-ref": "EleutherAI/pile-671dd665-c5ab-5ddc-8ee4-6f2955b81787",
	"name": "EleutherAI/pile",
	"contents": {
	"url": "https://huggingface.co/datasets/EleutherAI/pile",
	"properties": [
	{
	"name": "task_categories",
	"value": "text-generation, fill-mask"
	},
	{
	"name": "task_ids",
	"value": "language-modeling, masked-language-modeling"
	},
	{
	"name": "language",
	"value": "en"
	},
	{
	"name": "size_categories",
	"value": "100B<n<1T"
	},
	{
	"name": "annotations_creators",
	"value": "no-annotation"
	},
	{
	"name": "language_creators",
	"value": "found"
	},
	{
	"name": "pretty_name",
	"value": "the Pile"
	},
	{
	"name": "source_datasets",
	"value": "original"
	},
	{
	"name": "paperswithcode_id",
	"value": "the-pile"
	},
	{
	"name": "license",
	"value": "other"
	}
	]
	},
	"description": "The Pile is a 825 GiB diverse, open source language modelling data set that consists of 22 smaller, high-quality\ndatasets combined together.",
	"governance": {
	"owners": [
	{
	"organization": {
	"name": "EleutherAI",
	"url": "https://huggingface.co/EleutherAI"
	}
	}
	]
	}
	}
	]
	}
	]
	}