| import os |
| import re |
| from pathlib import Path |
| from typing import Any, Literal, Optional, Union |
|
|
| import yaml |
|
|
| from huggingface_hub.file_download import hf_hub_download |
| from huggingface_hub.hf_api import upload_file |
| from huggingface_hub.repocard_data import ( |
| CardData, |
| DatasetCardData, |
| EvalResult, |
| ModelCardData, |
| SpaceCardData, |
| eval_results_to_model_index, |
| model_index_to_eval_results, |
| ) |
| from huggingface_hub.utils import HfHubHTTPError, get_session, hf_raise_for_status, is_jinja_available, yaml_dump |
|
|
| from . import constants |
| from .errors import EntryNotFoundError |
| from .utils import SoftTemporaryDirectory, logging, validate_hf_hub_args |
|
|
|
|
| logger = logging.get_logger(__name__) |
|
|
|
|
| TEMPLATE_MODELCARD_PATH = Path(__file__).parent / "templates" / "modelcard_template.md" |
| TEMPLATE_DATASETCARD_PATH = Path(__file__).parent / "templates" / "datasetcard_template.md" |
|
|
| |
| |
| REGEX_YAML_BLOCK = re.compile(r"^(\s*---[\r\n]+)([\S\s]*?)([\r\n]+---(\r\n|\n|$))") |
|
|
|
|
| class RepoCard: |
| card_data_class = CardData |
| default_template_path = TEMPLATE_MODELCARD_PATH |
| repo_type = "model" |
|
|
| def __init__(self, content: str, ignore_metadata_errors: bool = False): |
| """Initialize a RepoCard from string content. The content should be a |
| Markdown file with a YAML block at the beginning and a Markdown body. |
| |
| Args: |
| content (`str`): The content of the Markdown file. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub.repocard import RepoCard |
| >>> text = ''' |
| ... --- |
| ... language: en |
| ... license: mit |
| ... --- |
| ... |
| ... # My repo |
| ... ''' |
| >>> card = RepoCard(text) |
| >>> card.data.to_dict() |
| {'language': 'en', 'license': 'mit'} |
| >>> card.text |
| '\\n# My repo\\n' |
| |
| ``` |
| > [!TIP] |
| > Raises the following error: |
| > |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > when the content of the repo card metadata is not a dictionary. |
| """ |
|
|
| |
| |
| self.ignore_metadata_errors = ignore_metadata_errors |
| self.content = content |
|
|
| @property |
| def content(self): |
| """The content of the RepoCard, including the YAML block and the Markdown body.""" |
| line_break = _detect_line_ending(self._content) or "\n" |
| return f"---{line_break}{self.data.to_yaml(line_break=line_break, original_order=self._original_order)}{line_break}---{line_break}{self.text}" |
|
|
| @content.setter |
| def content(self, content: str): |
| """Set the content of the RepoCard.""" |
| self._content = content |
|
|
| match = REGEX_YAML_BLOCK.search(content) |
| if match: |
| |
| yaml_block = match.group(2) |
| self.text = content[match.end() :] |
| data_dict = yaml.safe_load(yaml_block) |
|
|
| if data_dict is None: |
| data_dict = {} |
|
|
| |
| if not isinstance(data_dict, dict): |
| raise ValueError("repo card metadata block should be a dict") |
| else: |
| |
| logger.warning("Repo card metadata block was not found. Setting CardData to empty.") |
| data_dict = {} |
| self.text = content |
|
|
| self.data = self.card_data_class(**data_dict, ignore_metadata_errors=self.ignore_metadata_errors) |
| self._original_order = list(data_dict.keys()) |
|
|
| def __str__(self): |
| return self.content |
|
|
| def save(self, filepath: Union[Path, str]): |
| r"""Save a RepoCard to a file. |
| |
| Args: |
| filepath (`Union[Path, str]`): Filepath to the markdown file to save. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub.repocard import RepoCard |
| >>> card = RepoCard("---\nlanguage: en\n---\n# This is a test repo card") |
| >>> card.save("/tmp/test.md") |
| |
| ``` |
| """ |
| filepath = Path(filepath) |
| filepath.parent.mkdir(parents=True, exist_ok=True) |
| |
| with open(filepath, mode="w", newline="", encoding="utf-8") as f: |
| f.write(str(self)) |
|
|
| @classmethod |
| def load( |
| cls, |
| repo_id_or_path: Union[str, Path], |
| repo_type: Optional[str] = None, |
| token: Optional[str] = None, |
| ignore_metadata_errors: bool = False, |
| ): |
| """Initialize a RepoCard from a Hugging Face Hub repo's README.md or a local filepath. |
| |
| Args: |
| repo_id_or_path (`Union[str, Path]`): |
| The repo ID associated with a Hugging Face Hub repo or a local filepath. |
| repo_type (`str`, *optional*): |
| The type of Hugging Face repo to push to. Defaults to None, which will use "model". Other options |
| are "dataset" and "space". Not used when loading from a local filepath. If this is called from a child |
| class, the default value will be the child class's `repo_type`. |
| token (`str`, *optional*): |
| Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to the stored token. |
| ignore_metadata_errors (`str`): |
| If True, errors while parsing the metadata section will be ignored. Some information might be lost during |
| the process. Use it at your own risk. |
| |
| Returns: |
| [`huggingface_hub.repocard.RepoCard`]: The RepoCard (or subclass) initialized from the repo's |
| README.md file or filepath. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub.repocard import RepoCard |
| >>> card = RepoCard.load("nateraw/food") |
| >>> assert card.data.tags == ["generated_from_trainer", "image-classification", "pytorch"] |
| |
| ``` |
| """ |
|
|
| if Path(repo_id_or_path).is_file(): |
| card_path = Path(repo_id_or_path) |
| elif isinstance(repo_id_or_path, str): |
| card_path = Path( |
| hf_hub_download( |
| repo_id_or_path, |
| constants.REPOCARD_NAME, |
| repo_type=repo_type or cls.repo_type, |
| token=token, |
| ) |
| ) |
| else: |
| raise ValueError(f"Cannot load RepoCard: path not found on disk ({repo_id_or_path}).") |
|
|
| |
| with card_path.open(mode="r", newline="", encoding="utf-8") as f: |
| return cls(f.read(), ignore_metadata_errors=ignore_metadata_errors) |
|
|
| def validate(self, repo_type: Optional[str] = None): |
| """Validates card against Hugging Face Hub's card validation logic. |
| Using this function requires access to the internet, so it is only called |
| internally by [`huggingface_hub.repocard.RepoCard.push_to_hub`]. |
| |
| Args: |
| repo_type (`str`, *optional*, defaults to "model"): |
| The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". |
| If this function is called from a child class, the default will be the child class's `repo_type`. |
| |
| > [!TIP] |
| > Raises the following errors: |
| > |
| > - [`ValueError`](https://docs.python.org/3/library/exceptions.html#ValueError) |
| > if the card fails validation checks. |
| > - [`HTTPError`](https://requests.readthedocs.io/en/latest/api/#requests.HTTPError) |
| > if the request to the Hub API fails for any other reason. |
| """ |
|
|
| |
| repo_type = repo_type or self.repo_type |
|
|
| body = { |
| "repoType": repo_type, |
| "content": str(self), |
| } |
| headers = {"Accept": "text/plain"} |
|
|
| try: |
| response = get_session().post("https://huggingface.co/api/validate-yaml", json=body, headers=headers) |
| hf_raise_for_status(response) |
| except HfHubHTTPError as exc: |
| if response.status_code == 400: |
| raise ValueError(response.text) |
| else: |
| raise exc |
|
|
| def push_to_hub( |
| self, |
| repo_id: str, |
| token: Optional[str] = None, |
| repo_type: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: Optional[bool] = None, |
| parent_commit: Optional[str] = None, |
| ): |
| """Push a RepoCard to a Hugging Face Hub repo. |
| |
| Args: |
| repo_id (`str`): |
| The repo ID of the Hugging Face Hub repo to push to. Example: "nateraw/food". |
| token (`str`, *optional*): |
| Authentication token, obtained with `huggingface_hub.HfApi.login` method. Will default to |
| the stored token. |
| repo_type (`str`, *optional*, defaults to "model"): |
| The type of Hugging Face repo to push to. Options are "model", "dataset", and "space". If this |
| function is called by a child class, it will default to the child class's `repo_type`. |
| commit_message (`str`, *optional*): |
| The summary / title / first line of the generated commit. |
| commit_description (`str`, *optional*) |
| The description of the generated commit. |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the `"main"` branch. |
| create_pr (`bool`, *optional*): |
| Whether or not to create a Pull Request with this commit. Defaults to `False`. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed too concurrently. |
| Returns: |
| `str`: URL of the commit which updated the card metadata. |
| """ |
|
|
| |
| repo_type = repo_type or self.repo_type |
|
|
| |
| self.validate(repo_type=repo_type) |
|
|
| with SoftTemporaryDirectory() as tmpdir: |
| tmp_path = Path(tmpdir) / constants.REPOCARD_NAME |
| tmp_path.write_text(str(self), encoding="utf-8") |
| url = upload_file( |
| path_or_fileobj=str(tmp_path), |
| path_in_repo=constants.REPOCARD_NAME, |
| repo_id=repo_id, |
| token=token, |
| repo_type=repo_type, |
| commit_message=commit_message, |
| commit_description=commit_description, |
| create_pr=create_pr, |
| revision=revision, |
| parent_commit=parent_commit, |
| ) |
| return url |
|
|
| @classmethod |
| def from_template( |
| cls, |
| card_data: CardData, |
| template_path: Optional[str] = None, |
| template_str: Optional[str] = None, |
| **template_kwargs, |
| ): |
| """Initialize a RepoCard from a template. By default, it uses the default template. |
| |
| Templates are Jinja2 templates that can be customized by passing keyword arguments. |
| |
| Args: |
| card_data (`huggingface_hub.CardData`): |
| A huggingface_hub.CardData instance containing the metadata you want to include in the YAML |
| header of the repo card on the Hugging Face Hub. |
| template_path (`str`, *optional*): |
| A path to a markdown file with optional Jinja template variables that can be filled |
| in with `template_kwargs`. Defaults to the default template. |
| |
| Returns: |
| [`huggingface_hub.repocard.RepoCard`]: A RepoCard instance with the specified card data and content from the |
| template. |
| """ |
| if is_jinja_available(): |
| import jinja2 |
| else: |
| raise ImportError( |
| "Using RepoCard.from_template requires Jinja2 to be installed. Please" |
| " install it with `pip install Jinja2`." |
| ) |
|
|
| kwargs = card_data.to_dict().copy() |
| kwargs.update(template_kwargs) |
|
|
| if template_path is not None: |
| template_str = Path(template_path).read_text() |
| if template_str is None: |
| template_str = Path(cls.default_template_path).read_text() |
| template = jinja2.Template(template_str) |
| content = template.render(card_data=card_data.to_yaml(), **kwargs) |
| return cls(content) |
|
|
|
|
| class ModelCard(RepoCard): |
| card_data_class = ModelCardData |
| default_template_path = TEMPLATE_MODELCARD_PATH |
| repo_type = "model" |
|
|
| @classmethod |
| def from_template( |
| cls, |
| card_data: ModelCardData, |
| template_path: Optional[str] = None, |
| template_str: Optional[str] = None, |
| **template_kwargs, |
| ): |
| """Initialize a ModelCard from a template. By default, it uses the default template, which can be found here: |
| https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/modelcard_template.md |
| |
| Templates are Jinja2 templates that can be customized by passing keyword arguments. |
| |
| Args: |
| card_data (`huggingface_hub.ModelCardData`): |
| A huggingface_hub.ModelCardData instance containing the metadata you want to include in the YAML |
| header of the model card on the Hugging Face Hub. |
| template_path (`str`, *optional*): |
| A path to a markdown file with optional Jinja template variables that can be filled |
| in with `template_kwargs`. Defaults to the default template. |
| |
| Returns: |
| [`huggingface_hub.ModelCard`]: A ModelCard instance with the specified card data and content from the |
| template. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import ModelCard, ModelCardData, EvalResult |
| |
| >>> # Using the Default Template |
| >>> card_data = ModelCardData( |
| ... language='en', |
| ... license='mit', |
| ... library_name='timm', |
| ... tags=['image-classification', 'resnet'], |
| ... datasets=['beans'], |
| ... metrics=['accuracy'], |
| ... ) |
| >>> card = ModelCard.from_template( |
| ... card_data, |
| ... model_description='This model does x + y...' |
| ... ) |
| |
| >>> # Including Evaluation Results |
| >>> card_data = ModelCardData( |
| ... language='en', |
| ... tags=['image-classification', 'resnet'], |
| ... eval_results=[ |
| ... EvalResult( |
| ... task_type='image-classification', |
| ... dataset_type='beans', |
| ... dataset_name='Beans', |
| ... metric_type='accuracy', |
| ... metric_value=0.9, |
| ... ), |
| ... ], |
| ... model_name='my-cool-model', |
| ... ) |
| >>> card = ModelCard.from_template(card_data) |
| |
| >>> # Using a Custom Template |
| >>> card_data = ModelCardData( |
| ... language='en', |
| ... tags=['image-classification', 'resnet'] |
| ... ) |
| >>> card = ModelCard.from_template( |
| ... card_data=card_data, |
| ... template_path='./src/huggingface_hub/templates/modelcard_template.md', |
| ... custom_template_var='custom value', # will be replaced in template if it exists |
| ... ) |
| |
| ``` |
| """ |
| return super().from_template(card_data, template_path, template_str, **template_kwargs) |
|
|
|
|
| class DatasetCard(RepoCard): |
| card_data_class = DatasetCardData |
| default_template_path = TEMPLATE_DATASETCARD_PATH |
| repo_type = "dataset" |
|
|
| @classmethod |
| def from_template( |
| cls, |
| card_data: DatasetCardData, |
| template_path: Optional[str] = None, |
| template_str: Optional[str] = None, |
| **template_kwargs, |
| ): |
| """Initialize a DatasetCard from a template. By default, it uses the default template, which can be found here: |
| https://github.com/huggingface/huggingface_hub/blob/main/src/huggingface_hub/templates/datasetcard_template.md |
| |
| Templates are Jinja2 templates that can be customized by passing keyword arguments. |
| |
| Args: |
| card_data (`huggingface_hub.DatasetCardData`): |
| A huggingface_hub.DatasetCardData instance containing the metadata you want to include in the YAML |
| header of the dataset card on the Hugging Face Hub. |
| template_path (`str`, *optional*): |
| A path to a markdown file with optional Jinja template variables that can be filled |
| in with `template_kwargs`. Defaults to the default template. |
| |
| Returns: |
| [`huggingface_hub.DatasetCard`]: A DatasetCard instance with the specified card data and content from the |
| template. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import DatasetCard, DatasetCardData |
| |
| >>> # Using the Default Template |
| >>> card_data = DatasetCardData( |
| ... language='en', |
| ... license='mit', |
| ... annotations_creators='crowdsourced', |
| ... task_categories=['text-classification'], |
| ... task_ids=['sentiment-classification', 'text-scoring'], |
| ... multilinguality='monolingual', |
| ... pretty_name='My Text Classification Dataset', |
| ... ) |
| >>> card = DatasetCard.from_template( |
| ... card_data, |
| ... pretty_name=card_data.pretty_name, |
| ... ) |
| |
| >>> # Using a Custom Template |
| >>> card_data = DatasetCardData( |
| ... language='en', |
| ... license='mit', |
| ... ) |
| >>> card = DatasetCard.from_template( |
| ... card_data=card_data, |
| ... template_path='./src/huggingface_hub/templates/datasetcard_template.md', |
| ... custom_template_var='custom value', # will be replaced in template if it exists |
| ... ) |
| |
| ``` |
| """ |
| return super().from_template(card_data, template_path, template_str, **template_kwargs) |
|
|
|
|
| class SpaceCard(RepoCard): |
| card_data_class = SpaceCardData |
| default_template_path = TEMPLATE_MODELCARD_PATH |
| repo_type = "space" |
|
|
|
|
| def _detect_line_ending(content: str) -> Literal["\r", "\n", "\r\n", None]: |
| """Detect the line ending of a string. Used by RepoCard to avoid making huge diff on newlines. |
| |
| Uses same implementation as in Hub server, keep it in sync. |
| |
| Returns: |
| str: The detected line ending of the string. |
| """ |
| cr = content.count("\r") |
| lf = content.count("\n") |
| crlf = content.count("\r\n") |
| if cr + lf == 0: |
| return None |
| if crlf == cr and crlf == lf: |
| return "\r\n" |
| if cr > lf: |
| return "\r" |
| else: |
| return "\n" |
|
|
|
|
| def metadata_load(local_path: Union[str, Path]) -> Optional[dict]: |
| content = Path(local_path).read_text() |
| match = REGEX_YAML_BLOCK.search(content) |
| if match: |
| yaml_block = match.group(2) |
| data = yaml.safe_load(yaml_block) |
| if data is None or isinstance(data, dict): |
| return data |
| raise ValueError("repo card metadata block should be a dict") |
| else: |
| return None |
|
|
|
|
| def metadata_save(local_path: Union[str, Path], data: dict) -> None: |
| """ |
| Save the metadata dict in the upper YAML part Trying to preserve newlines as |
| in the existing file. Docs about open() with newline="" parameter: |
| https://docs.python.org/3/library/functions.html?highlight=open#open Does |
| not work with "^M" linebreaks, which are replaced by \n |
| """ |
| line_break = "\n" |
| content = "" |
| |
| if os.path.exists(local_path): |
| with open(local_path, "r", newline="", encoding="utf8") as readme: |
| content = readme.read() |
| if isinstance(readme.newlines, tuple): |
| line_break = readme.newlines[0] |
| elif isinstance(readme.newlines, str): |
| line_break = readme.newlines |
|
|
| |
| with open(local_path, "w", newline="", encoding="utf8") as readme: |
| data_yaml = yaml_dump(data, sort_keys=False, line_break=line_break) |
| |
| match = REGEX_YAML_BLOCK.search(content) |
| if match: |
| output = content[: match.start()] + f"---{line_break}{data_yaml}---{line_break}" + content[match.end() :] |
| else: |
| output = f"---{line_break}{data_yaml}---{line_break}{content}" |
|
|
| readme.write(output) |
| readme.close() |
|
|
|
|
| def metadata_eval_result( |
| *, |
| model_pretty_name: str, |
| task_pretty_name: str, |
| task_id: str, |
| metrics_pretty_name: str, |
| metrics_id: str, |
| metrics_value: Any, |
| dataset_pretty_name: str, |
| dataset_id: str, |
| metrics_config: Optional[str] = None, |
| metrics_verified: bool = False, |
| dataset_config: Optional[str] = None, |
| dataset_split: Optional[str] = None, |
| dataset_revision: Optional[str] = None, |
| metrics_verification_token: Optional[str] = None, |
| ) -> dict: |
| """ |
| Creates a metadata dict with the result from a model evaluated on a dataset. |
| |
| Args: |
| model_pretty_name (`str`): |
| The name of the model in natural language. |
| task_pretty_name (`str`): |
| The name of a task in natural language. |
| task_id (`str`): |
| Example: automatic-speech-recognition. A task id. |
| metrics_pretty_name (`str`): |
| A name for the metric in natural language. Example: Test WER. |
| metrics_id (`str`): |
| Example: wer. A metric id from https://hf.co/metrics. |
| metrics_value (`Any`): |
| The value from the metric. Example: 20.0 or "20.0 ± 1.2". |
| dataset_pretty_name (`str`): |
| The name of the dataset in natural language. |
| dataset_id (`str`): |
| Example: common_voice. A dataset id from https://hf.co/datasets. |
| metrics_config (`str`, *optional*): |
| The name of the metric configuration used in `load_metric()`. |
| Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. |
| metrics_verified (`bool`, *optional*, defaults to `False`): |
| Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. |
| dataset_config (`str`, *optional*): |
| Example: fr. The name of the dataset configuration used in `load_dataset()`. |
| dataset_split (`str`, *optional*): |
| Example: test. The name of the dataset split used in `load_dataset()`. |
| dataset_revision (`str`, *optional*): |
| Example: 5503434ddd753f426f4b38109466949a1217c2bb. The name of the dataset dataset revision |
| used in `load_dataset()`. |
| metrics_verification_token (`bool`, *optional*): |
| A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. |
| |
| Returns: |
| `dict`: a metadata dict with the result from a model evaluated on a dataset. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import metadata_eval_result |
| >>> results = metadata_eval_result( |
| ... model_pretty_name="RoBERTa fine-tuned on ReactionGIF", |
| ... task_pretty_name="Text Classification", |
| ... task_id="text-classification", |
| ... metrics_pretty_name="Accuracy", |
| ... metrics_id="accuracy", |
| ... metrics_value=0.2662102282047272, |
| ... dataset_pretty_name="ReactionJPEG", |
| ... dataset_id="julien-c/reactionjpeg", |
| ... dataset_config="default", |
| ... dataset_split="test", |
| ... ) |
| >>> results == { |
| ... 'model-index': [ |
| ... { |
| ... 'name': 'RoBERTa fine-tuned on ReactionGIF', |
| ... 'results': [ |
| ... { |
| ... 'task': { |
| ... 'type': 'text-classification', |
| ... 'name': 'Text Classification' |
| ... }, |
| ... 'dataset': { |
| ... 'name': 'ReactionJPEG', |
| ... 'type': 'julien-c/reactionjpeg', |
| ... 'config': 'default', |
| ... 'split': 'test' |
| ... }, |
| ... 'metrics': [ |
| ... { |
| ... 'type': 'accuracy', |
| ... 'value': 0.2662102282047272, |
| ... 'name': 'Accuracy', |
| ... 'verified': False |
| ... } |
| ... ] |
| ... } |
| ... ] |
| ... } |
| ... ] |
| ... } |
| True |
| |
| ``` |
| """ |
|
|
| return { |
| "model-index": eval_results_to_model_index( |
| model_name=model_pretty_name, |
| eval_results=[ |
| EvalResult( |
| task_name=task_pretty_name, |
| task_type=task_id, |
| metric_name=metrics_pretty_name, |
| metric_type=metrics_id, |
| metric_value=metrics_value, |
| dataset_name=dataset_pretty_name, |
| dataset_type=dataset_id, |
| metric_config=metrics_config, |
| verified=metrics_verified, |
| verify_token=metrics_verification_token, |
| dataset_config=dataset_config, |
| dataset_split=dataset_split, |
| dataset_revision=dataset_revision, |
| ) |
| ], |
| ) |
| } |
|
|
|
|
| @validate_hf_hub_args |
| def metadata_update( |
| repo_id: str, |
| metadata: dict, |
| *, |
| repo_type: Optional[str] = None, |
| overwrite: bool = False, |
| token: Optional[str] = None, |
| commit_message: Optional[str] = None, |
| commit_description: Optional[str] = None, |
| revision: Optional[str] = None, |
| create_pr: bool = False, |
| parent_commit: Optional[str] = None, |
| ) -> str: |
| """ |
| Updates the metadata in the README.md of a repository on the Hugging Face Hub. |
| If the README.md file doesn't exist yet, a new one is created with metadata and |
| the default ModelCard or DatasetCard template. For `space` repo, an error is thrown |
| as a Space cannot exist without a `README.md` file. |
| |
| Args: |
| repo_id (`str`): |
| The name of the repository. |
| metadata (`dict`): |
| A dictionary containing the metadata to be updated. |
| repo_type (`str`, *optional*): |
| Set to `"dataset"` or `"space"` if updating to a dataset or space, |
| `None` or `"model"` if updating to a model. Default is `None`. |
| overwrite (`bool`, *optional*, defaults to `False`): |
| If set to `True` an existing field can be overwritten, otherwise |
| attempting to overwrite an existing field will cause an error. |
| token (`str`, *optional*): |
| The Hugging Face authentication token. |
| commit_message (`str`, *optional*): |
| The summary / title / first line of the generated commit. Defaults to |
| `f"Update metadata with huggingface_hub"` |
| commit_description (`str` *optional*) |
| The description of the generated commit |
| revision (`str`, *optional*): |
| The git revision to commit from. Defaults to the head of the |
| `"main"` branch. |
| create_pr (`boolean`, *optional*): |
| Whether or not to create a Pull Request from `revision` with that commit. |
| Defaults to `False`. |
| parent_commit (`str`, *optional*): |
| The OID / SHA of the parent commit, as a hexadecimal string. Shorthands (7 first characters) are also supported. |
| If specified and `create_pr` is `False`, the commit will fail if `revision` does not point to `parent_commit`. |
| If specified and `create_pr` is `True`, the pull request will be created from `parent_commit`. |
| Specifying `parent_commit` ensures the repo has not changed before committing the changes, and can be |
| especially useful if the repo is updated / committed too concurrently. |
| Returns: |
| `str`: URL of the commit which updated the card metadata. |
| |
| Example: |
| ```python |
| >>> from huggingface_hub import metadata_update |
| >>> metadata = {'model-index': [{'name': 'RoBERTa fine-tuned on ReactionGIF', |
| ... 'results': [{'dataset': {'name': 'ReactionGIF', |
| ... 'type': 'julien-c/reactiongif'}, |
| ... 'metrics': [{'name': 'Recall', |
| ... 'type': 'recall', |
| ... 'value': 0.7762102282047272}], |
| ... 'task': {'name': 'Text Classification', |
| ... 'type': 'text-classification'}}]}]} |
| >>> url = metadata_update("hf-internal-testing/reactiongif-roberta-card", metadata) |
| |
| ``` |
| """ |
| commit_message = commit_message if commit_message is not None else "Update metadata with huggingface_hub" |
|
|
| |
| card_class: type[RepoCard] |
| if repo_type is None or repo_type == "model": |
| card_class = ModelCard |
| elif repo_type == "dataset": |
| card_class = DatasetCard |
| elif repo_type == "space": |
| card_class = RepoCard |
| else: |
| raise ValueError(f"Unknown repo_type: {repo_type}") |
|
|
| |
| |
| try: |
| card = card_class.load(repo_id, token=token, repo_type=repo_type) |
| except EntryNotFoundError: |
| if repo_type == "space": |
| raise ValueError("Cannot update metadata on a Space that doesn't contain a `README.md` file.") |
|
|
| |
| |
| card = card_class.from_template(CardData()) |
|
|
| for key, value in metadata.items(): |
| if key == "model-index": |
| |
| if "name" not in value[0]: |
| value[0]["name"] = getattr(card, "model_name", repo_id) |
| model_name, new_results = model_index_to_eval_results(value) |
| if card.data.eval_results is None: |
| card.data.eval_results = new_results |
| card.data.model_name = model_name |
| else: |
| existing_results = card.data.eval_results |
|
|
| |
| |
| |
| |
| |
| |
| for new_result in new_results: |
| result_found = False |
| for existing_result in existing_results: |
| if new_result.is_equal_except_value(existing_result): |
| if new_result != existing_result and not overwrite: |
| raise ValueError( |
| "You passed a new value for the existing metric" |
| f" 'name: {new_result.metric_name}, type: " |
| f"{new_result.metric_type}'. Set `overwrite=True`" |
| " to overwrite existing metrics." |
| ) |
| result_found = True |
| existing_result.metric_value = new_result.metric_value |
| if existing_result.verified is True: |
| existing_result.verify_token = new_result.verify_token |
| if not result_found: |
| card.data.eval_results.append(new_result) |
| else: |
| |
| if card.data.get(key) is not None and not overwrite and card.data.get(key) != value: |
| raise ValueError( |
| f"You passed a new value for the existing meta data field '{key}'." |
| " Set `overwrite=True` to overwrite existing metadata." |
| ) |
| else: |
| card.data[key] = value |
|
|
| return card.push_to_hub( |
| repo_id, |
| token=token, |
| repo_type=repo_type, |
| commit_message=commit_message, |
| commit_description=commit_description, |
| create_pr=create_pr, |
| revision=revision, |
| parent_commit=parent_commit, |
| ) |
|
|