diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/LICENSE b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..7082a2d5b9047bfc09589f387053e24ea490bc54 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/LICENSE @@ -0,0 +1,201 @@ +Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2013-2019 Nikolay Kim and Andrew Svetlov + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..6839bf9c50f5862f05e98ce66b7fd96740c3b4e6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/METADATA @@ -0,0 +1,123 @@ +Metadata-Version: 2.1 +Name: aiosignal +Version: 1.3.2 +Summary: aiosignal: a list of registered asynchronous callbacks +Home-page: https://github.com/aio-libs/aiosignal +Maintainer: aiohttp team +Maintainer-email: team@aiohttp.org +License: Apache 2.0 +Project-URL: Chat: Gitter, https://gitter.im/aio-libs/Lobby +Project-URL: CI: GitHub Actions, https://github.com/aio-libs/aiosignal/actions +Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/aiosignal +Project-URL: Docs: RTD, https://docs.aiosignal.org +Project-URL: GitHub: issues, https://github.com/aio-libs/aiosignal/issues +Project-URL: GitHub: repo, https://github.com/aio-libs/aiosignal +Classifier: License :: OSI Approved :: Apache Software License +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Development Status :: 5 - Production/Stable +Classifier: Operating System :: POSIX +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft :: Windows +Classifier: Framework :: AsyncIO +Requires-Python: >=3.9 +Description-Content-Type: text/x-rst +License-File: LICENSE +Requires-Dist: frozenlist>=1.1.0 + +========= +aiosignal +========= + +.. image:: https://github.com/aio-libs/aiosignal/workflows/CI/badge.svg + :target: https://github.com/aio-libs/aiosignal/actions?query=workflow%3ACI + :alt: GitHub status for master branch + +.. image:: https://codecov.io/gh/aio-libs/aiosignal/branch/master/graph/badge.svg + :target: https://codecov.io/gh/aio-libs/aiosignal + :alt: codecov.io status for master branch + +.. image:: https://badge.fury.io/py/aiosignal.svg + :target: https://pypi.org/project/aiosignal + :alt: Latest PyPI package version + +.. image:: https://readthedocs.org/projects/aiosignal/badge/?version=latest + :target: https://aiosignal.readthedocs.io/ + :alt: Latest Read The Docs + +.. image:: https://img.shields.io/discourse/topics?server=https%3A%2F%2Faio-libs.discourse.group%2F + :target: https://aio-libs.discourse.group/ + :alt: Discourse group for io-libs + +.. image:: https://badges.gitter.im/Join%20Chat.svg + :target: https://gitter.im/aio-libs/Lobby + :alt: Chat on Gitter + +Introduction +============ + +A project to manage callbacks in `asyncio` projects. + +``Signal`` is a list of registered asynchronous callbacks. + +The signal's life-cycle has two stages: after creation its content +could be filled by using standard list operations: ``sig.append()`` +etc. + +After you call ``sig.freeze()`` the signal is *frozen*: adding, removing +and dropping callbacks is forbidden. + +The only available operation is calling the previously registered +callbacks by using ``await sig.send(data)``. + +For concrete usage examples see the `Signals + +section of the `Web Server Advanced +` chapter of the `aiohttp +documentation`_. + + +Installation +------------ + +:: + + $ pip install aiosignal + +The library requires Python 3.8 or newer. + + +Documentation +============= + +https://aiosignal.readthedocs.io/ + +Communication channels +====================== + +*gitter chat* https://gitter.im/aio-libs/Lobby + +Requirements +============ + +- Python >= 3.8 +- frozenlist >= 1.0.0 + +License +======= + +``aiosignal`` is offered under the Apache 2 license. + +Source code +=========== + +The project is hosted on GitHub_ + +Please file an issue in the `bug tracker +`_ if you have found a bug +or have some suggestions to improve the library. + +.. _GitHub: https://github.com/aio-libs/aiosignal +.. _aiohttp documentation: https://docs.aiohttp.org/ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..d0905865b34c573fb8033b30fa1a2633525b2fa3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/RECORD @@ -0,0 +1,10 @@ +aiosignal-1.3.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +aiosignal-1.3.2.dist-info/LICENSE,sha256=b9UkPpLdf5jsacesN3co50kFcJ_1J6W_mNbQJjwE9bY,11332 +aiosignal-1.3.2.dist-info/METADATA,sha256=TeI_xgZ191qgx37rviEnpMWC0QnYsg_j9EGVivNqqjc,3753 +aiosignal-1.3.2.dist-info/RECORD,, +aiosignal-1.3.2.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109 +aiosignal-1.3.2.dist-info/top_level.txt,sha256=z45aNOKGDdrI1roqZY3BGXQ22kJFPHBmVdwtLYLtXC0,10 +aiosignal/__init__.py,sha256=1oIrRl6kNpqFh32e7HfMFbMV_35v8sqJJFfnuKgmtEU,867 +aiosignal/__init__.pyi,sha256=xeCddYSS8fZAkz8S4HuKSR2IDe3N7RW_LKcXDPPA1Xk,311 +aiosignal/__pycache__/__init__.cpython-312.pyc,, +aiosignal/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..104f3874635f24f0d2918dfeaf6a59652274460c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: true +Tag: py2-none-any +Tag: py3-none-any + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/top_level.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..ac6df3afe74a5fd43afc7ab7f8393571a495fdc5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/top_level.txt @@ -0,0 +1 @@ +aiosignal diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e8023ff6c5783ab98e4c689c6be8c5321eae0b05 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.py @@ -0,0 +1,69 @@ +# SPDX-License-Identifier: MIT + +from attr import ( + NOTHING, + Attribute, + AttrsInstance, + Converter, + Factory, + NothingType, + _make_getattr, + assoc, + cmp_using, + define, + evolve, + field, + fields, + fields_dict, + frozen, + has, + make_class, + mutable, + resolve_types, + validate, +) +from attr._next_gen import asdict, astuple + +from . import converters, exceptions, filters, setters, validators + + +__all__ = [ + "NOTHING", + "Attribute", + "AttrsInstance", + "Converter", + "Factory", + "NothingType", + "__author__", + "__copyright__", + "__description__", + "__doc__", + "__email__", + "__license__", + "__title__", + "__url__", + "__version__", + "__version_info__", + "asdict", + "assoc", + "astuple", + "cmp_using", + "converters", + "define", + "evolve", + "exceptions", + "field", + "fields", + "fields_dict", + "filters", + "frozen", + "has", + "make_class", + "mutable", + "resolve_types", + "setters", + "validate", + "validators", +] + +__getattr__ = _make_getattr(__name__) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.pyi b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..648fa7a344433df00fbdc2852da2281b7178bb3c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.pyi @@ -0,0 +1,263 @@ +import sys + +from typing import ( + Any, + Callable, + Mapping, + Sequence, + overload, + TypeVar, +) + +# Because we need to type our own stuff, we have to make everything from +# attr explicitly public too. +from attr import __author__ as __author__ +from attr import __copyright__ as __copyright__ +from attr import __description__ as __description__ +from attr import __email__ as __email__ +from attr import __license__ as __license__ +from attr import __title__ as __title__ +from attr import __url__ as __url__ +from attr import __version__ as __version__ +from attr import __version_info__ as __version_info__ +from attr import assoc as assoc +from attr import Attribute as Attribute +from attr import AttrsInstance as AttrsInstance +from attr import cmp_using as cmp_using +from attr import converters as converters +from attr import Converter as Converter +from attr import evolve as evolve +from attr import exceptions as exceptions +from attr import Factory as Factory +from attr import fields as fields +from attr import fields_dict as fields_dict +from attr import filters as filters +from attr import has as has +from attr import make_class as make_class +from attr import NOTHING as NOTHING +from attr import resolve_types as resolve_types +from attr import setters as setters +from attr import validate as validate +from attr import validators as validators +from attr import attrib, asdict as asdict, astuple as astuple +from attr import NothingType as NothingType + +if sys.version_info >= (3, 11): + from typing import dataclass_transform +else: + from typing_extensions import dataclass_transform + +_T = TypeVar("_T") +_C = TypeVar("_C", bound=type) + +_EqOrderType = bool | Callable[[Any], Any] +_ValidatorType = Callable[[Any, "Attribute[_T]", _T], Any] +_CallableConverterType = Callable[[Any], Any] +_ConverterType = _CallableConverterType | Converter[Any, Any] +_ReprType = Callable[[Any], str] +_ReprArgType = bool | _ReprType +_OnSetAttrType = Callable[[Any, "Attribute[Any]", Any], Any] +_OnSetAttrArgType = _OnSetAttrType | list[_OnSetAttrType] | setters._NoOpType +_FieldTransformer = Callable[ + [type, list["Attribute[Any]"]], list["Attribute[Any]"] +] +# FIXME: in reality, if multiple validators are passed they must be in a list +# or tuple, but those are invariant and so would prevent subtypes of +# _ValidatorType from working when passed in a list or tuple. +_ValidatorArgType = _ValidatorType[_T] | Sequence[_ValidatorType[_T]] + +@overload +def field( + *, + default: None = ..., + validator: None = ..., + repr: _ReprArgType = ..., + hash: bool | None = ..., + init: bool = ..., + metadata: Mapping[Any, Any] | None = ..., + converter: None = ..., + factory: None = ..., + kw_only: bool = ..., + eq: bool | None = ..., + order: bool | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + alias: str | None = ..., + type: type | None = ..., +) -> Any: ... + +# This form catches an explicit None or no default and infers the type from the +# other arguments. +@overload +def field( + *, + default: None = ..., + validator: _ValidatorArgType[_T] | None = ..., + repr: _ReprArgType = ..., + hash: bool | None = ..., + init: bool = ..., + metadata: Mapping[Any, Any] | None = ..., + converter: _ConverterType + | list[_ConverterType] + | tuple[_ConverterType] + | None = ..., + factory: Callable[[], _T] | None = ..., + kw_only: bool = ..., + eq: _EqOrderType | None = ..., + order: _EqOrderType | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + alias: str | None = ..., + type: type | None = ..., +) -> _T: ... + +# This form catches an explicit default argument. +@overload +def field( + *, + default: _T, + validator: _ValidatorArgType[_T] | None = ..., + repr: _ReprArgType = ..., + hash: bool | None = ..., + init: bool = ..., + metadata: Mapping[Any, Any] | None = ..., + converter: _ConverterType + | list[_ConverterType] + | tuple[_ConverterType] + | None = ..., + factory: Callable[[], _T] | None = ..., + kw_only: bool = ..., + eq: _EqOrderType | None = ..., + order: _EqOrderType | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + alias: str | None = ..., + type: type | None = ..., +) -> _T: ... + +# This form covers type=non-Type: e.g. forward references (str), Any +@overload +def field( + *, + default: _T | None = ..., + validator: _ValidatorArgType[_T] | None = ..., + repr: _ReprArgType = ..., + hash: bool | None = ..., + init: bool = ..., + metadata: Mapping[Any, Any] | None = ..., + converter: _ConverterType + | list[_ConverterType] + | tuple[_ConverterType] + | None = ..., + factory: Callable[[], _T] | None = ..., + kw_only: bool = ..., + eq: _EqOrderType | None = ..., + order: _EqOrderType | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + alias: str | None = ..., + type: type | None = ..., +) -> Any: ... +@overload +@dataclass_transform(field_specifiers=(attrib, field)) +def define( + maybe_cls: _C, + *, + these: dict[str, Any] | None = ..., + repr: bool = ..., + unsafe_hash: bool | None = ..., + hash: bool | None = ..., + init: bool = ..., + slots: bool = ..., + frozen: bool = ..., + weakref_slot: bool = ..., + str: bool = ..., + auto_attribs: bool = ..., + kw_only: bool = ..., + cache_hash: bool = ..., + auto_exc: bool = ..., + eq: bool | None = ..., + order: bool | None = ..., + auto_detect: bool = ..., + getstate_setstate: bool | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + field_transformer: _FieldTransformer | None = ..., + match_args: bool = ..., +) -> _C: ... +@overload +@dataclass_transform(field_specifiers=(attrib, field)) +def define( + maybe_cls: None = ..., + *, + these: dict[str, Any] | None = ..., + repr: bool = ..., + unsafe_hash: bool | None = ..., + hash: bool | None = ..., + init: bool = ..., + slots: bool = ..., + frozen: bool = ..., + weakref_slot: bool = ..., + str: bool = ..., + auto_attribs: bool = ..., + kw_only: bool = ..., + cache_hash: bool = ..., + auto_exc: bool = ..., + eq: bool | None = ..., + order: bool | None = ..., + auto_detect: bool = ..., + getstate_setstate: bool | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + field_transformer: _FieldTransformer | None = ..., + match_args: bool = ..., +) -> Callable[[_C], _C]: ... + +mutable = define + +@overload +@dataclass_transform(frozen_default=True, field_specifiers=(attrib, field)) +def frozen( + maybe_cls: _C, + *, + these: dict[str, Any] | None = ..., + repr: bool = ..., + unsafe_hash: bool | None = ..., + hash: bool | None = ..., + init: bool = ..., + slots: bool = ..., + frozen: bool = ..., + weakref_slot: bool = ..., + str: bool = ..., + auto_attribs: bool = ..., + kw_only: bool = ..., + cache_hash: bool = ..., + auto_exc: bool = ..., + eq: bool | None = ..., + order: bool | None = ..., + auto_detect: bool = ..., + getstate_setstate: bool | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + field_transformer: _FieldTransformer | None = ..., + match_args: bool = ..., +) -> _C: ... +@overload +@dataclass_transform(frozen_default=True, field_specifiers=(attrib, field)) +def frozen( + maybe_cls: None = ..., + *, + these: dict[str, Any] | None = ..., + repr: bool = ..., + unsafe_hash: bool | None = ..., + hash: bool | None = ..., + init: bool = ..., + slots: bool = ..., + frozen: bool = ..., + weakref_slot: bool = ..., + str: bool = ..., + auto_attribs: bool = ..., + kw_only: bool = ..., + cache_hash: bool = ..., + auto_exc: bool = ..., + eq: bool | None = ..., + order: bool | None = ..., + auto_detect: bool = ..., + getstate_setstate: bool | None = ..., + on_setattr: _OnSetAttrArgType | None = ..., + field_transformer: _FieldTransformer | None = ..., + match_args: bool = ..., +) -> Callable[[_C], _C]: ... diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/converters.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/converters.py new file mode 100644 index 0000000000000000000000000000000000000000..7821f6c02cca81277d1ecc87b6bdafad886d8b70 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/converters.py @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: MIT + +from attr.converters import * # noqa: F403 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/exceptions.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/exceptions.py new file mode 100644 index 0000000000000000000000000000000000000000..3323f9d2112c54b203763d45b455bd5abbe020f6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/exceptions.py @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: MIT + +from attr.exceptions import * # noqa: F403 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/filters.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/filters.py new file mode 100644 index 0000000000000000000000000000000000000000..3080f48398e5ed8d3428ca3efeb7500633b0cb0f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/filters.py @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: MIT + +from attr.filters import * # noqa: F403 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/py.typed b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/setters.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/setters.py new file mode 100644 index 0000000000000000000000000000000000000000..f3d73bb793dd49c138950961f41943bb26c57fde --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/setters.py @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: MIT + +from attr.setters import * # noqa: F403 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/validators.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/validators.py new file mode 100644 index 0000000000000000000000000000000000000000..037e124f29f32d37c1642d159bf828de44f7c349 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/validators.py @@ -0,0 +1,3 @@ +# SPDX-License-Identifier: MIT + +from attr.validators import * # noqa: F403 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..de2806a563a40c546258bcc60539244962d45007 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/__init__.py @@ -0,0 +1,79 @@ +# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +__version__ = "2.20.0" + +from .arrow_dataset import Dataset +from .arrow_reader import ReadInstruction +from .builder import ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder +from .combine import concatenate_datasets, interleave_datasets +from .dataset_dict import DatasetDict, IterableDatasetDict +from .download import * +from .features import * +from .fingerprint import disable_caching, enable_caching, is_caching_enabled, set_caching_enabled +from .info import DatasetInfo, MetricInfo +from .inspect import ( + get_dataset_config_info, + get_dataset_config_names, + get_dataset_default_config_name, + get_dataset_infos, + get_dataset_split_names, + inspect_dataset, + inspect_metric, + list_datasets, + list_metrics, +) +from .iterable_dataset import IterableDataset +from .load import load_dataset, load_dataset_builder, load_from_disk, load_metric +from .metric import Metric +from .splits import ( + NamedSplit, + NamedSplitAll, + Split, + SplitBase, + SplitDict, + SplitGenerator, + SplitInfo, + SubSplitInfo, + percent, +) +from .tasks import * +from .utils import * +from .utils import logging + + +# isort: split + +# Deprecated modules +from . import arrow_dataset as _arrow_dataset +from . import utils as _utils +from .exceptions import ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits +from .utils import download_manager as _deprecated_download_manager +from .utils import info_utils as _deprecated_info_utils + + +_arrow_dataset.concatenate_datasets = concatenate_datasets +_utils.DownloadConfig = DownloadConfig +_utils.DownloadManager = DownloadManager +_utils.DownloadMode = DownloadMode +_deprecated_download_manager.DownloadConfig = DownloadConfig +_deprecated_download_manager.DownloadMode = DownloadMode +_deprecated_download_manager.DownloadManager = DownloadManager +_deprecated_info_utils.ExpectedMoreDownloadedFiles = ExpectedMoreDownloadedFiles +_deprecated_info_utils.ExpectedMoreSplits = ExpectedMoreSplits +_deprecated_info_utils.UnexpectedDownloadedFile = UnexpectedDownloadedFile +_deprecated_info_utils.UnexpectedSplits = UnexpectedSplits + +del _arrow_dataset, _utils, _deprecated_download_manager +del _deprecated_info_utils, ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_writer.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_writer.py new file mode 100644 index 0000000000000000000000000000000000000000..ea12d094e6316c8537e6f3cb556b5fc20b5cd92d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_writer.py @@ -0,0 +1,746 @@ +# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""To write records into Parquet files.""" + +import errno +import json +import os +import sys +from pathlib import Path +from typing import Any, Dict, Iterable, List, Optional, Tuple, Union + +import fsspec +import numpy as np +import pyarrow as pa +import pyarrow.parquet as pq +from fsspec.core import url_to_fs + +from . import config +from .features import Features, Image, Value +from .features.features import ( + FeatureType, + _ArrayXDExtensionType, + cast_to_python_objects, + generate_from_arrow_type, + get_nested_type, + list_of_np_array_to_pyarrow_listarray, + numpy_to_pyarrow_listarray, + to_pyarrow_listarray, +) +from .filesystems import is_remote_filesystem +from .info import DatasetInfo +from .keyhash import DuplicatedKeysError, KeyHasher +from .table import array_cast, cast_array_to_feature, embed_table_storage, table_cast +from .utils import logging +from .utils import tqdm as hf_tqdm +from .utils.file_utils import hash_url_to_filename +from .utils.py_utils import asdict, first_non_null_value + + +logger = logging.get_logger(__name__) + +type_ = type # keep python's type function + + +class SchemaInferenceError(ValueError): + pass + + +class TypedSequence: + """ + This data container generalizes the typing when instantiating pyarrow arrays, tables or batches. + + More specifically it adds several features: + - Support extension types like ``datasets.features.Array2DExtensionType``: + By default pyarrow arrays don't return extension arrays. One has to call + ``pa.ExtensionArray.from_storage(type, pa.array(data, type.storage_type))`` + in order to get an extension array. + - Support for ``try_type`` parameter that can be used instead of ``type``: + When an array is transformed, we like to keep the same type as before if possible. + For example when calling :func:`datasets.Dataset.map`, we don't want to change the type + of each column by default. + - Better error message when a pyarrow array overflows. + + Example:: + + from datasets.features import Array2D, Array2DExtensionType, Value + from datasets.arrow_writer import TypedSequence + import pyarrow as pa + + arr = pa.array(TypedSequence([1, 2, 3], type=Value("int32"))) + assert arr.type == pa.int32() + + arr = pa.array(TypedSequence([1, 2, 3], try_type=Value("int32"))) + assert arr.type == pa.int32() + + arr = pa.array(TypedSequence(["foo", "bar"], try_type=Value("int32"))) + assert arr.type == pa.string() + + arr = pa.array(TypedSequence([[[1, 2, 3]]], type=Array2D((1, 3), "int64"))) + assert arr.type == Array2DExtensionType((1, 3), "int64") + + table = pa.Table.from_pydict({ + "image": TypedSequence([[[1, 2, 3]]], type=Array2D((1, 3), "int64")) + }) + assert table["image"].type == Array2DExtensionType((1, 3), "int64") + + """ + + def __init__( + self, + data: Iterable, + type: Optional[FeatureType] = None, + try_type: Optional[FeatureType] = None, + optimized_int_type: Optional[FeatureType] = None, + ): + # assert type is None or try_type is None, + if type is not None and try_type is not None: + raise ValueError("You cannot specify both type and try_type") + # set attributes + self.data = data + self.type = type + self.try_type = try_type # is ignored if it doesn't match the data + self.optimized_int_type = optimized_int_type + # when trying a type (is ignored if data is not compatible) + self.trying_type = self.try_type is not None + self.trying_int_optimization = optimized_int_type is not None and type is None and try_type is None + # used to get back the inferred type after __arrow_array__() is called once + self._inferred_type = None + + def get_inferred_type(self) -> FeatureType: + """Return the inferred feature type. + This is done by converting the sequence to an Arrow array, and getting the corresponding + feature type. + + Since building the Arrow array can be expensive, the value of the inferred type is cached + as soon as pa.array is called on the typed sequence. + + Returns: + FeatureType: inferred feature type of the sequence. + """ + if self._inferred_type is None: + self._inferred_type = generate_from_arrow_type(pa.array(self).type) + return self._inferred_type + + @staticmethod + def _infer_custom_type_and_encode(data: Iterable) -> Tuple[Iterable, Optional[FeatureType]]: + """Implement type inference for custom objects like PIL.Image.Image -> Image type. + + This function is only used for custom python objects that can't be direclty passed to build + an Arrow array. In such cases is infers the feature type to use, and it encodes the data so + that they can be passed to an Arrow array. + + Args: + data (Iterable): array of data to infer the type, e.g. a list of PIL images. + + Returns: + Tuple[Iterable, Optional[FeatureType]]: a tuple with: + - the (possibly encoded) array, if the inferred feature type requires encoding + - the inferred feature type if the array is made of supported custom objects like + PIL images, else None. + """ + if config.PIL_AVAILABLE and "PIL" in sys.modules: + import PIL.Image + + non_null_idx, non_null_value = first_non_null_value(data) + if isinstance(non_null_value, PIL.Image.Image): + return [Image().encode_example(value) if value is not None else None for value in data], Image() + return data, None + + def __arrow_array__(self, type: Optional[pa.DataType] = None): + """This function is called when calling pa.array(typed_sequence)""" + + if type is not None: + raise ValueError("TypedSequence is supposed to be used with pa.array(typed_sequence, type=None)") + del type # make sure we don't use it + data = self.data + # automatic type inference for custom objects + if self.type is None and self.try_type is None: + data, self._inferred_type = self._infer_custom_type_and_encode(data) + if self._inferred_type is None: + type = self.try_type if self.trying_type else self.type + else: + type = self._inferred_type + pa_type = get_nested_type(type) if type is not None else None + optimized_int_pa_type = ( + get_nested_type(self.optimized_int_type) if self.optimized_int_type is not None else None + ) + trying_cast_to_python_objects = False + try: + # custom pyarrow types + if isinstance(pa_type, _ArrayXDExtensionType): + storage = to_pyarrow_listarray(data, pa_type) + return pa.ExtensionArray.from_storage(pa_type, storage) + + # efficient np array to pyarrow array + if isinstance(data, np.ndarray): + out = numpy_to_pyarrow_listarray(data) + elif isinstance(data, list) and data and isinstance(first_non_null_value(data)[1], np.ndarray): + out = list_of_np_array_to_pyarrow_listarray(data) + else: + trying_cast_to_python_objects = True + out = pa.array(cast_to_python_objects(data, only_1d_for_numpy=True)) + # use smaller integer precisions if possible + if self.trying_int_optimization: + if pa.types.is_int64(out.type): + out = out.cast(optimized_int_pa_type) + elif pa.types.is_list(out.type): + if pa.types.is_int64(out.type.value_type): + out = array_cast(out, pa.list_(optimized_int_pa_type)) + elif pa.types.is_list(out.type.value_type) and pa.types.is_int64(out.type.value_type.value_type): + out = array_cast(out, pa.list_(pa.list_(optimized_int_pa_type))) + # otherwise we can finally use the user's type + elif type is not None: + # We use cast_array_to_feature to support casting to custom types like Audio and Image + # Also, when trying type "string", we don't want to convert integers or floats to "string". + # We only do it if trying_type is False - since this is what the user asks for. + out = cast_array_to_feature( + out, type, allow_primitive_to_str=not self.trying_type, allow_decimal_to_str=not self.trying_type + ) + return out + except ( + TypeError, + pa.lib.ArrowInvalid, + pa.lib.ArrowNotImplementedError, + ) as e: # handle type errors and overflows + # Ignore ArrowNotImplementedError caused by trying type, otherwise re-raise + if not self.trying_type and isinstance(e, pa.lib.ArrowNotImplementedError): + raise + + if self.trying_type: + try: # second chance + if isinstance(data, np.ndarray): + return numpy_to_pyarrow_listarray(data) + elif isinstance(data, list) and data and any(isinstance(value, np.ndarray) for value in data): + return list_of_np_array_to_pyarrow_listarray(data) + else: + trying_cast_to_python_objects = True + return pa.array(cast_to_python_objects(data, only_1d_for_numpy=True)) + except pa.lib.ArrowInvalid as e: + if "overflow" in str(e): + raise OverflowError( + f"There was an overflow with type {type_(data)}. Try to reduce writer_batch_size to have batches smaller than 2GB.\n({e})" + ) from None + elif self.trying_int_optimization and "not in range" in str(e): + optimized_int_pa_type_str = np.dtype(optimized_int_pa_type.to_pandas_dtype()).name + logger.info( + f"Failed to cast a sequence to {optimized_int_pa_type_str}. Falling back to int64." + ) + return out + elif trying_cast_to_python_objects and "Could not convert" in str(e): + out = pa.array( + cast_to_python_objects(data, only_1d_for_numpy=True, optimize_list_casting=False) + ) + if type is not None: + out = cast_array_to_feature( + out, type, allow_primitive_to_str=True, allow_decimal_to_str=True + ) + return out + else: + raise + elif "overflow" in str(e): + raise OverflowError( + f"There was an overflow with type {type_(data)}. Try to reduce writer_batch_size to have batches smaller than 2GB.\n({e})" + ) from None + elif self.trying_int_optimization and "not in range" in str(e): + optimized_int_pa_type_str = np.dtype(optimized_int_pa_type.to_pandas_dtype()).name + logger.info(f"Failed to cast a sequence to {optimized_int_pa_type_str}. Falling back to int64.") + return out + elif trying_cast_to_python_objects and "Could not convert" in str(e): + out = pa.array(cast_to_python_objects(data, only_1d_for_numpy=True, optimize_list_casting=False)) + if type is not None: + out = cast_array_to_feature(out, type, allow_primitive_to_str=True, allow_decimal_to_str=True) + return out + else: + raise + + +class OptimizedTypedSequence(TypedSequence): + def __init__( + self, + data, + type: Optional[FeatureType] = None, + try_type: Optional[FeatureType] = None, + col: Optional[str] = None, + optimized_int_type: Optional[FeatureType] = None, + ): + optimized_int_type_by_col = { + "attention_mask": Value("int8"), # binary tensor + "special_tokens_mask": Value("int8"), + "input_ids": Value("int32"), # typical vocab size: 0-50k (max ~500k, never > 1M) + "token_type_ids": Value( + "int8" + ), # binary mask; some (XLNetModel) use an additional token represented by a 2 + } + if type is None and try_type is None: + optimized_int_type = optimized_int_type_by_col.get(col, None) + super().__init__(data, type=type, try_type=try_type, optimized_int_type=optimized_int_type) + + +class ArrowWriter: + """Shuffles and writes Examples to Arrow files.""" + + _WRITER_CLASS = pa.RecordBatchStreamWriter + + def __init__( + self, + schema: Optional[pa.Schema] = None, + features: Optional[Features] = None, + path: Optional[str] = None, + stream: Optional[pa.NativeFile] = None, + fingerprint: Optional[str] = None, + writer_batch_size: Optional[int] = None, + hash_salt: Optional[str] = None, + check_duplicates: Optional[bool] = False, + disable_nullable: bool = False, + update_features: bool = False, + with_metadata: bool = True, + unit: str = "examples", + embed_local_files: bool = False, + storage_options: Optional[dict] = None, + ): + if path is None and stream is None: + raise ValueError("At least one of path and stream must be provided.") + if features is not None: + self._features = features + self._schema = None + elif schema is not None: + self._schema: pa.Schema = schema + self._features = Features.from_arrow_schema(self._schema) + else: + self._features = None + self._schema = None + + if hash_salt is not None: + # Create KeyHasher instance using split name as hash salt + self._hasher = KeyHasher(hash_salt) + else: + self._hasher = KeyHasher("") + + self._check_duplicates = check_duplicates + self._disable_nullable = disable_nullable + + if stream is None: + fs, path = url_to_fs(path, **(storage_options or {})) + self._fs: fsspec.AbstractFileSystem = fs + self._path = path if not is_remote_filesystem(self._fs) else self._fs.unstrip_protocol(path) + self.stream = self._fs.open(path, "wb") + self._closable_stream = True + else: + self._fs = None + self._path = None + self.stream = stream + self._closable_stream = False + + self.fingerprint = fingerprint + self.disable_nullable = disable_nullable + self.writer_batch_size = writer_batch_size or config.DEFAULT_MAX_BATCH_SIZE + self.update_features = update_features + self.with_metadata = with_metadata + self.unit = unit + self.embed_local_files = embed_local_files + + self._num_examples = 0 + self._num_bytes = 0 + self.current_examples: List[Tuple[Dict[str, Any], str]] = [] + self.current_rows: List[pa.Table] = [] + self.pa_writer: Optional[pa.RecordBatchStreamWriter] = None + self.hkey_record = [] + + def __len__(self): + """Return the number of writed and staged examples""" + return self._num_examples + len(self.current_examples) + len(self.current_rows) + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + + def close(self): + # Try closing if opened; if closed: pyarrow.lib.ArrowInvalid: Invalid operation on closed file + if self.pa_writer: # it might be None + try: + self.pa_writer.close() + except Exception: # pyarrow.lib.ArrowInvalid, OSError + pass + if self._closable_stream and not self.stream.closed: + self.stream.close() # This also closes self.pa_writer if it is opened + + def _build_writer(self, inferred_schema: pa.Schema): + schema = self.schema + inferred_features = Features.from_arrow_schema(inferred_schema) + if self._features is not None: + if self.update_features: # keep original features it they match, or update them + fields = {field.name: field for field in self._features.type} + for inferred_field in inferred_features.type: + name = inferred_field.name + if name in fields: + if inferred_field == fields[name]: + inferred_features[name] = self._features[name] + self._features = inferred_features + schema: pa.Schema = inferred_schema + else: + self._features = inferred_features + schema: pa.Schema = inferred_features.arrow_schema + if self.disable_nullable: + schema = pa.schema(pa.field(field.name, field.type, nullable=False) for field in schema) + if self.with_metadata: + schema = schema.with_metadata(self._build_metadata(DatasetInfo(features=self._features), self.fingerprint)) + else: + schema = schema.with_metadata({}) + self._schema = schema + self.pa_writer = self._WRITER_CLASS(self.stream, schema) + + @property + def schema(self): + _schema = ( + self._schema + if self._schema is not None + else (pa.schema(self._features.type) if self._features is not None else None) + ) + if self._disable_nullable and _schema is not None: + _schema = pa.schema(pa.field(field.name, field.type, nullable=False) for field in _schema) + return _schema if _schema is not None else [] + + @staticmethod + def _build_metadata(info: DatasetInfo, fingerprint: Optional[str] = None) -> Dict[str, str]: + info_keys = ["features"] # we can add support for more DatasetInfo keys in the future + info_as_dict = asdict(info) + metadata = {} + metadata["info"] = {key: info_as_dict[key] for key in info_keys} + if fingerprint is not None: + metadata["fingerprint"] = fingerprint + return {"huggingface": json.dumps(metadata)} + + def write_examples_on_file(self): + """Write stored examples from the write-pool of examples. It makes a table out of the examples and write it.""" + if not self.current_examples: + return + # preserve the order the columns + if self.schema: + schema_cols = set(self.schema.names) + examples_cols = self.current_examples[0][0].keys() # .keys() preserves the order (unlike set) + common_cols = [col for col in self.schema.names if col in examples_cols] + extra_cols = [col for col in examples_cols if col not in schema_cols] + cols = common_cols + extra_cols + else: + cols = list(self.current_examples[0][0]) + batch_examples = {} + for col in cols: + # We use row[0][col] since current_examples contains (example, key) tuples. + # Morever, examples could be Arrow arrays of 1 element. + # This can happen in `.map()` when we want to re-write the same Arrow data + if all(isinstance(row[0][col], (pa.Array, pa.ChunkedArray)) for row in self.current_examples): + arrays = [row[0][col] for row in self.current_examples] + arrays = [ + chunk + for array in arrays + for chunk in (array.chunks if isinstance(array, pa.ChunkedArray) else [array]) + ] + batch_examples[col] = pa.concat_arrays(arrays) + else: + batch_examples[col] = [ + row[0][col].to_pylist()[0] if isinstance(row[0][col], (pa.Array, pa.ChunkedArray)) else row[0][col] + for row in self.current_examples + ] + self.write_batch(batch_examples=batch_examples) + self.current_examples = [] + + def write_rows_on_file(self): + """Write stored rows from the write-pool of rows. It concatenates the single-row tables and it writes the resulting table.""" + if not self.current_rows: + return + table = pa.concat_tables(self.current_rows) + self.write_table(table) + self.current_rows = [] + + def write( + self, + example: Dict[str, Any], + key: Optional[Union[str, int, bytes]] = None, + writer_batch_size: Optional[int] = None, + ): + """Add a given (Example,Key) pair to the write-pool of examples which is written to file. + + Args: + example: the Example to add. + key: Optional, a unique identifier(str, int or bytes) associated with each example + """ + # Utilize the keys and duplicate checking when `self._check_duplicates` is passed True + if self._check_duplicates: + # Create unique hash from key and store as (key, example) pairs + hash = self._hasher.hash(key) + self.current_examples.append((example, hash)) + # Maintain record of keys and their respective hashes for checking duplicates + self.hkey_record.append((hash, key)) + else: + # Store example as a tuple so as to keep the structure of `self.current_examples` uniform + self.current_examples.append((example, "")) + + if writer_batch_size is None: + writer_batch_size = self.writer_batch_size + if writer_batch_size is not None and len(self.current_examples) >= writer_batch_size: + if self._check_duplicates: + self.check_duplicate_keys() + # Re-intializing to empty list for next batch + self.hkey_record = [] + + self.write_examples_on_file() + + def check_duplicate_keys(self): + """Raises error if duplicates found in a batch""" + tmp_record = set() + for hash, key in self.hkey_record: + if hash in tmp_record: + duplicate_key_indices = [ + str(self._num_examples + index) + for index, (duplicate_hash, _) in enumerate(self.hkey_record) + if duplicate_hash == hash + ] + + raise DuplicatedKeysError(key, duplicate_key_indices) + else: + tmp_record.add(hash) + + def write_row(self, row: pa.Table, writer_batch_size: Optional[int] = None): + """Add a given single-row Table to the write-pool of rows which is written to file. + + Args: + row: the row to add. + """ + if len(row) != 1: + raise ValueError(f"Only single-row pyarrow tables are allowed but got table with {len(row)} rows.") + self.current_rows.append(row) + if writer_batch_size is None: + writer_batch_size = self.writer_batch_size + if writer_batch_size is not None and len(self.current_rows) >= writer_batch_size: + self.write_rows_on_file() + + def write_batch( + self, + batch_examples: Dict[str, List], + writer_batch_size: Optional[int] = None, + ): + """Write a batch of Example to file. + Ignores the batch if it appears to be empty, + preventing a potential schema update of unknown types. + + Args: + batch_examples: the batch of examples to add. + """ + if batch_examples and len(next(iter(batch_examples.values()))) == 0: + return + features = None if self.pa_writer is None and self.update_features else self._features + try_features = self._features if self.pa_writer is None and self.update_features else None + arrays = [] + inferred_features = Features() + # preserve the order the columns + if self.schema: + schema_cols = set(self.schema.names) + batch_cols = batch_examples.keys() # .keys() preserves the order (unlike set) + common_cols = [col for col in self.schema.names if col in batch_cols] + extra_cols = [col for col in batch_cols if col not in schema_cols] + cols = common_cols + extra_cols + else: + cols = list(batch_examples) + for col in cols: + col_values = batch_examples[col] + col_type = features[col] if features else None + if isinstance(col_values, (pa.Array, pa.ChunkedArray)): + array = cast_array_to_feature(col_values, col_type) if col_type is not None else col_values + arrays.append(array) + inferred_features[col] = generate_from_arrow_type(col_values.type) + else: + col_try_type = try_features[col] if try_features is not None and col in try_features else None + typed_sequence = OptimizedTypedSequence(col_values, type=col_type, try_type=col_try_type, col=col) + arrays.append(pa.array(typed_sequence)) + inferred_features[col] = typed_sequence.get_inferred_type() + schema = inferred_features.arrow_schema if self.pa_writer is None else self.schema + pa_table = pa.Table.from_arrays(arrays, schema=schema) + self.write_table(pa_table, writer_batch_size) + + def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[int] = None): + """Write a Table to file. + + Args: + example: the Table to add. + """ + if writer_batch_size is None: + writer_batch_size = self.writer_batch_size + if self.pa_writer is None: + self._build_writer(inferred_schema=pa_table.schema) + pa_table = pa_table.combine_chunks() + pa_table = table_cast(pa_table, self._schema) + if self.embed_local_files: + pa_table = embed_table_storage(pa_table) + self._num_bytes += pa_table.nbytes + self._num_examples += pa_table.num_rows + self.pa_writer.write_table(pa_table, writer_batch_size) + + def finalize(self, close_stream=True): + self.write_rows_on_file() + # In case current_examples < writer_batch_size, but user uses finalize() + if self._check_duplicates: + self.check_duplicate_keys() + # Re-intializing to empty list for next batch + self.hkey_record = [] + self.write_examples_on_file() + # If schema is known, infer features even if no examples were written + if self.pa_writer is None and self.schema: + self._build_writer(self.schema) + if self.pa_writer is not None: + self.pa_writer.close() + self.pa_writer = None + if close_stream: + self.stream.close() + else: + if close_stream: + self.stream.close() + raise SchemaInferenceError("Please pass `features` or at least one example when writing data") + logger.debug( + f"Done writing {self._num_examples} {self.unit} in {self._num_bytes} bytes {self._path if self._path else ''}." + ) + return self._num_examples, self._num_bytes + + +class ParquetWriter(ArrowWriter): + _WRITER_CLASS = pq.ParquetWriter + + +class BeamWriter: + """ + Shuffles and writes Examples to Arrow files. + The Arrow files are converted from Parquet files that are the output of Apache Beam pipelines. + """ + + def __init__( + self, + features: Optional[Features] = None, + schema: Optional[pa.Schema] = None, + path: Optional[str] = None, + namespace: Optional[str] = None, + cache_dir: Optional[str] = None, + ): + if features is None and schema is None: + raise ValueError("At least one of features and schema must be provided.") + if path is None: + raise ValueError("Path must be provided.") + + if features is not None: + self._features: Features = features + self._schema: pa.Schema = features.arrow_schema + else: + self._schema: pa.Schema = schema + self._features: Features = Features.from_arrow_schema(schema) + + self._path = path + self._parquet_path = os.path.splitext(path)[0] # remove extension + self._namespace = namespace or "default" + self._num_examples = None + self._cache_dir = cache_dir or config.HF_DATASETS_CACHE + + def write_from_pcollection(self, pcoll_examples): + """Add the final steps of the beam pipeline: write to parquet files.""" + import apache_beam as beam + + def inc_num_examples(example): + beam.metrics.Metrics.counter(self._namespace, "num_examples").inc() + + # count examples + _ = pcoll_examples | "Count N. Examples" >> beam.Map(inc_num_examples) + + # save dataset + return ( + pcoll_examples + | "Get values" >> beam.Values() + | "Save to parquet" + >> beam.io.parquetio.WriteToParquet( + self._parquet_path, self._schema, shard_name_template="-SSSSS-of-NNNNN.parquet" + ) + ) + + def finalize(self, metrics_query_result: dict): + """ + Run after the pipeline has finished. + It converts the resulting parquet files to arrow and it completes the info from the pipeline metrics. + + Args: + metrics_query_result: `dict` obtained from pipeline_results.metrics().query(m_filter). Make sure + that the filter keeps only the metrics for the considered split, under the namespace `split_name`. + """ + + # Beam FileSystems require the system's path separator in the older versions + fs, parquet_path = url_to_fs(self._parquet_path) + parquet_path = str(Path(parquet_path)) if not is_remote_filesystem(fs) else fs.unstrip_protocol(parquet_path) + + shards = fs.glob(parquet_path + "*.parquet") + num_bytes = sum(fs.sizes(shards)) + shard_lengths = get_parquet_lengths(shards) + + # Convert to arrow + if self._path.endswith(".arrow"): + logger.info(f"Converting parquet files {self._parquet_path} to arrow {self._path}") + try: # stream conversion + num_bytes = 0 + for shard in hf_tqdm(shards, unit="shards"): + with fs.open(shard, "rb") as source: + with fs.open(shard.replace(".parquet", ".arrow"), "wb") as destination: + shard_num_bytes, _ = parquet_to_arrow(source, destination) + num_bytes += shard_num_bytes + except OSError as e: # broken pipe can happen if the connection is unstable, do local conversion instead + if e.errno != errno.EPIPE: # not a broken pipe + raise + logger.warning( + "Broken Pipe during stream conversion from parquet to arrow. Using local convert instead" + ) + local_convert_dir = os.path.join(self._cache_dir, "beam_convert") + os.makedirs(local_convert_dir, exist_ok=True) + num_bytes = 0 + for shard in hf_tqdm(shards, unit="shards"): + local_parquet_path = os.path.join(local_convert_dir, hash_url_to_filename(shard) + ".parquet") + fs.download(shard, local_parquet_path) + local_arrow_path = local_parquet_path.replace(".parquet", ".arrow") + shard_num_bytes, _ = parquet_to_arrow(local_parquet_path, local_arrow_path) + num_bytes += shard_num_bytes + remote_arrow_path = shard.replace(".parquet", ".arrow") + fs.upload(local_arrow_path, remote_arrow_path) + + # Save metrics + counters_dict = {metric.key.metric.name: metric.result for metric in metrics_query_result["counters"]} + self._num_examples = counters_dict["num_examples"] + self._num_bytes = num_bytes + self._shard_lengths = shard_lengths + return self._num_examples, self._num_bytes + + +def get_parquet_lengths(sources) -> List[int]: + shard_lengths = [] + for source in hf_tqdm(sources, unit="parquet files"): + parquet_file = pa.parquet.ParquetFile(source) + shard_lengths.append(parquet_file.metadata.num_rows) + return shard_lengths + + +def parquet_to_arrow(source, destination) -> List[int]: + """Convert parquet file to arrow file. Inputs can be str paths or file-like objects""" + stream = None if isinstance(destination, str) else destination + parquet_file = pa.parquet.ParquetFile(source) + # Beam can create empty Parquet files, so we need to pass the source Parquet file's schema + with ArrowWriter(schema=parquet_file.schema_arrow, path=destination, stream=stream) as writer: + for record_batch in parquet_file.iter_batches(): + pa_table = pa.Table.from_batches([record_batch]) + writer.write_table(pa_table) + num_bytes, num_examples = writer.finalize() + return num_bytes, num_examples diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/combine.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/combine.py new file mode 100644 index 0000000000000000000000000000000000000000..d2aad87f0cc9278626d0be5111f91b6de49ef935 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/combine.py @@ -0,0 +1,215 @@ +from typing import List, Optional, TypeVar + +from .arrow_dataset import Dataset, _concatenate_map_style_datasets, _interleave_map_style_datasets +from .dataset_dict import DatasetDict, IterableDatasetDict +from .info import DatasetInfo +from .iterable_dataset import IterableDataset, _concatenate_iterable_datasets, _interleave_iterable_datasets +from .splits import NamedSplit +from .utils import logging +from .utils.py_utils import Literal + + +logger = logging.get_logger(__name__) + + +DatasetType = TypeVar("DatasetType", Dataset, IterableDataset) + + +def interleave_datasets( + datasets: List[DatasetType], + probabilities: Optional[List[float]] = None, + seed: Optional[int] = None, + info: Optional[DatasetInfo] = None, + split: Optional[NamedSplit] = None, + stopping_strategy: Literal["first_exhausted", "all_exhausted"] = "first_exhausted", +) -> DatasetType: + """ + Interleave several datasets (sources) into a single dataset. + The new dataset is constructed by alternating between the sources to get the examples. + + You can use this function on a list of [`Dataset`] objects, or on a list of [`IterableDataset`] objects. + + - If `probabilities` is `None` (default) the new dataset is constructed by cycling between each source to get the examples. + - If `probabilities` is not `None`, the new dataset is constructed by getting examples from a random source at a time according to the provided probabilities. + + The resulting dataset ends when one of the source datasets runs out of examples except when `oversampling` is `True`, + in which case, the resulting dataset ends when all datasets have ran out of examples at least one time. + + Note for iterable datasets: + + In a distributed setup or in PyTorch DataLoader workers, the stopping strategy is applied per process. + Therefore the "first_exhausted" strategy on an sharded iterable dataset can generate less samples in total (up to 1 missing sample per subdataset per worker). + + Args: + datasets (`List[Dataset]` or `List[IterableDataset]`): + List of datasets to interleave. + probabilities (`List[float]`, *optional*, defaults to `None`): + If specified, the new dataset is constructed by sampling + examples from one source at a time according to these probabilities. + seed (`int`, *optional*, defaults to `None`): + The random seed used to choose a source for each example. + info ([`DatasetInfo`], *optional*): + Dataset information, like description, citation, etc. + + split ([`NamedSplit`], *optional*): + Name of the dataset split. + + stopping_strategy (`str`, defaults to `first_exhausted`): + Two strategies are proposed right now, `first_exhausted` and `all_exhausted`. + By default, `first_exhausted` is an undersampling strategy, i.e the dataset construction is stopped as soon as one dataset has ran out of samples. + If the strategy is `all_exhausted`, we use an oversampling strategy, i.e the dataset construction is stopped as soon as every samples of every dataset has been added at least once. + Note that if the strategy is `all_exhausted`, the interleaved dataset size can get enormous: + - with no probabilities, the resulting dataset will have `max_length_datasets*nb_dataset` samples. + - with given probabilities, the resulting dataset will have more samples if some datasets have really low probability of visiting. + Returns: + [`Dataset`] or [`IterableDataset`]: Return type depends on the input `datasets` + parameter. `Dataset` if the input is a list of `Dataset`, `IterableDataset` if the input is a list of + `IterableDataset`. + + Example: + + For regular datasets (map-style): + + ```python + >>> from datasets import Dataset, interleave_datasets + >>> d1 = Dataset.from_dict({"a": [0, 1, 2]}) + >>> d2 = Dataset.from_dict({"a": [10, 11, 12]}) + >>> d3 = Dataset.from_dict({"a": [20, 21, 22]}) + >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42, stopping_strategy="all_exhausted") + >>> dataset["a"] + [10, 0, 11, 1, 2, 20, 12, 10, 0, 1, 2, 21, 0, 11, 1, 2, 0, 1, 12, 2, 10, 0, 22] + >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42) + >>> dataset["a"] + [10, 0, 11, 1, 2] + >>> dataset = interleave_datasets([d1, d2, d3]) + >>> dataset["a"] + [0, 10, 20, 1, 11, 21, 2, 12, 22] + >>> dataset = interleave_datasets([d1, d2, d3], stopping_strategy="all_exhausted") + >>> dataset["a"] + [0, 10, 20, 1, 11, 21, 2, 12, 22] + >>> d1 = Dataset.from_dict({"a": [0, 1, 2]}) + >>> d2 = Dataset.from_dict({"a": [10, 11, 12, 13]}) + >>> d3 = Dataset.from_dict({"a": [20, 21, 22, 23, 24]}) + >>> dataset = interleave_datasets([d1, d2, d3]) + >>> dataset["a"] + [0, 10, 20, 1, 11, 21, 2, 12, 22] + >>> dataset = interleave_datasets([d1, d2, d3], stopping_strategy="all_exhausted") + >>> dataset["a"] + [0, 10, 20, 1, 11, 21, 2, 12, 22, 0, 13, 23, 1, 10, 24] + >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42) + >>> dataset["a"] + [10, 0, 11, 1, 2] + >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42, stopping_strategy="all_exhausted") + >>> dataset["a"] + [10, 0, 11, 1, 2, 20, 12, 13, ..., 0, 1, 2, 0, 24] + For datasets in streaming mode (iterable): + + >>> from datasets import load_dataset, interleave_datasets + >>> d1 = load_dataset("oscar", "unshuffled_deduplicated_en", split="train", streaming=True) + >>> d2 = load_dataset("oscar", "unshuffled_deduplicated_fr", split="train", streaming=True) + >>> dataset = interleave_datasets([d1, d2]) + >>> iterator = iter(dataset) + >>> next(iterator) + {'text': 'Mtendere Village was inspired by the vision...} + >>> next(iterator) + {'text': "Média de débat d'idées, de culture...} + ``` + """ + from .arrow_dataset import Dataset + from .iterable_dataset import IterableDataset + + if not datasets: + raise ValueError("Unable to interleave an empty list of datasets.") + for i, dataset in enumerate(datasets): + if not isinstance(dataset, (Dataset, IterableDataset)): + if isinstance(dataset, (DatasetDict, IterableDatasetDict)): + if not dataset: + raise ValueError( + f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} " + "is an empty dataset dictionary." + ) + raise ValueError( + f"Dataset at position {i} has at least one split: {list(dataset)}\n" + f"Please pick one to interleave with the other datasets, for example: dataset['{next(iter(dataset))}']" + ) + raise ValueError( + f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} is a {type(dataset).__name__}." + ) + if i == 0: + dataset_type, other_type = ( + (Dataset, IterableDataset) if isinstance(dataset, Dataset) else (IterableDataset, Dataset) + ) + elif not isinstance(dataset, dataset_type): + raise ValueError( + f"Unable to interleave a {dataset_type.__name__} (at position 0) with a {other_type.__name__} (at position {i}). Expected a list of Dataset objects or a list of IterableDataset objects." + ) + if stopping_strategy not in ["first_exhausted", "all_exhausted"]: + raise ValueError(f"{stopping_strategy} is not supported. Please enter a valid stopping_strategy.") + if dataset_type is Dataset: + return _interleave_map_style_datasets( + datasets, probabilities, seed, info=info, split=split, stopping_strategy=stopping_strategy + ) + else: + return _interleave_iterable_datasets( + datasets, probabilities, seed, info=info, split=split, stopping_strategy=stopping_strategy + ) + + +def concatenate_datasets( + dsets: List[DatasetType], + info: Optional[DatasetInfo] = None, + split: Optional[NamedSplit] = None, + axis: int = 0, +) -> DatasetType: + """ + Converts a list of [`Dataset`] with the same schema into a single [`Dataset`]. + + Args: + dsets (`List[datasets.Dataset]`): + List of Datasets to concatenate. + info (`DatasetInfo`, *optional*): + Dataset information, like description, citation, etc. + split (`NamedSplit`, *optional*): + Name of the dataset split. + axis (`{0, 1}`, defaults to `0`): + Axis to concatenate over, where `0` means over rows (vertically) and `1` means over columns + (horizontally). + + + + Example: + + ```py + >>> ds3 = concatenate_datasets([ds1, ds2]) + ``` + """ + + if not dsets: + raise ValueError("Unable to concatenate an empty list of datasets.") + for i, dataset in enumerate(dsets): + if not isinstance(dataset, (Dataset, IterableDataset)): + if isinstance(dataset, (DatasetDict, IterableDatasetDict)): + if not dataset: + raise ValueError( + f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} " + "is an empty dataset dictionary." + ) + raise ValueError( + f"Dataset at position {i} has at least one split: {list(dataset)}\n" + f"Please pick one to interleave with the other datasets, for example: dataset['{next(iter(dataset))}']" + ) + raise ValueError( + f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} is a {type(dataset).__name__}." + ) + if i == 0: + dataset_type, other_type = ( + (Dataset, IterableDataset) if isinstance(dataset, Dataset) else (IterableDataset, Dataset) + ) + elif not isinstance(dataset, dataset_type): + raise ValueError( + f"Unable to interleave a {dataset_type.__name__} (at position 0) with a {other_type.__name__} (at position {i}). Expected a list of Dataset objects or a list of IterableDataset objects." + ) + if dataset_type is Dataset: + return _concatenate_map_style_datasets(dsets, info=info, split=split, axis=axis) + else: + return _concatenate_iterable_datasets(dsets, info=info, split=split, axis=axis) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/data_files.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/data_files.py new file mode 100644 index 0000000000000000000000000000000000000000..793c6ed81154b93a80ff4e3601f01e7a5d6d1e09 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/data_files.py @@ -0,0 +1,825 @@ +import os +import re +from functools import partial +from glob import has_magic +from pathlib import Path, PurePath +from typing import Callable, Dict, List, Optional, Set, Tuple, Union + +import huggingface_hub +from fsspec.core import url_to_fs +from fsspec.implementations.http import HTTPFileSystem +from huggingface_hub import HfFileSystem +from packaging import version +from tqdm.contrib.concurrent import thread_map + +from . import config +from .download import DownloadConfig +from .naming import _split_re +from .splits import Split +from .utils import logging +from .utils import tqdm as hf_tqdm +from .utils.file_utils import _prepare_path_and_storage_options, is_local_path, is_relative_path, xbasename, xjoin +from .utils.py_utils import glob_pattern_to_regex, string_to_dict + + +SingleOriginMetadata = Union[Tuple[str, str], Tuple[str], Tuple[()]] + + +SANITIZED_DEFAULT_SPLIT = str(Split.TRAIN) + + +logger = logging.get_logger(__name__) + + +class Url(str): + pass + + +class EmptyDatasetError(FileNotFoundError): + pass + + +SPLIT_PATTERN_SHARDED = "data/{split}-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*" + +SPLIT_KEYWORDS = { + Split.TRAIN: ["train", "training"], + Split.VALIDATION: ["validation", "valid", "dev", "val"], + Split.TEST: ["test", "testing", "eval", "evaluation"], +} +NON_WORDS_CHARS = "-._ 0-9" +if config.FSSPEC_VERSION < version.parse("2023.9.0"): + KEYWORDS_IN_FILENAME_BASE_PATTERNS = ["**[{sep}/]{keyword}[{sep}]*", "{keyword}[{sep}]*"] + KEYWORDS_IN_DIR_NAME_BASE_PATTERNS = [ + "{keyword}/**", + "{keyword}[{sep}]*/**", + "**[{sep}/]{keyword}/**", + "**[{sep}/]{keyword}[{sep}]*/**", + ] +elif config.FSSPEC_VERSION < version.parse("2023.12.0"): + KEYWORDS_IN_FILENAME_BASE_PATTERNS = ["**/*[{sep}/]{keyword}[{sep}]*", "{keyword}[{sep}]*"] + KEYWORDS_IN_DIR_NAME_BASE_PATTERNS = [ + "{keyword}/**/*", + "{keyword}[{sep}]*/**/*", + "**/*[{sep}/]{keyword}/**/*", + "**/*[{sep}/]{keyword}[{sep}]*/**/*", + ] +else: + KEYWORDS_IN_FILENAME_BASE_PATTERNS = ["**/{keyword}[{sep}]*", "**/*[{sep}]{keyword}[{sep}]*"] + KEYWORDS_IN_DIR_NAME_BASE_PATTERNS = [ + "**/{keyword}/**", + "**/{keyword}[{sep}]*/**", + "**/*[{sep}]{keyword}/**", + "**/*[{sep}]{keyword}[{sep}]*/**", + ] + +DEFAULT_SPLITS = [Split.TRAIN, Split.VALIDATION, Split.TEST] +DEFAULT_PATTERNS_SPLIT_IN_FILENAME = { + split: [ + pattern.format(keyword=keyword, sep=NON_WORDS_CHARS) + for keyword in SPLIT_KEYWORDS[split] + for pattern in KEYWORDS_IN_FILENAME_BASE_PATTERNS + ] + for split in DEFAULT_SPLITS +} +DEFAULT_PATTERNS_SPLIT_IN_DIR_NAME = { + split: [ + pattern.format(keyword=keyword, sep=NON_WORDS_CHARS) + for keyword in SPLIT_KEYWORDS[split] + for pattern in KEYWORDS_IN_DIR_NAME_BASE_PATTERNS + ] + for split in DEFAULT_SPLITS +} + + +DEFAULT_PATTERNS_ALL = { + Split.TRAIN: ["**"], +} + +ALL_SPLIT_PATTERNS = [SPLIT_PATTERN_SHARDED] +ALL_DEFAULT_PATTERNS = [ + DEFAULT_PATTERNS_SPLIT_IN_DIR_NAME, + DEFAULT_PATTERNS_SPLIT_IN_FILENAME, + DEFAULT_PATTERNS_ALL, +] +if config.FSSPEC_VERSION < version.parse("2023.9.0"): + METADATA_PATTERNS = [ + "metadata.csv", + "**/metadata.csv", + "metadata.jsonl", + "**/metadata.jsonl", + ] # metadata file for ImageFolder and AudioFolder +else: + METADATA_PATTERNS = [ + "**/metadata.csv", + "**/metadata.jsonl", + ] # metadata file for ImageFolder and AudioFolder +WILDCARD_CHARACTERS = "*[]" +FILES_TO_IGNORE = [ + "README.md", + "config.json", + "dataset_info.json", + "dataset_infos.json", + "dummy_data.zip", + "dataset_dict.json", +] + + +def contains_wildcards(pattern: str) -> bool: + return any(wilcard_character in pattern for wilcard_character in WILDCARD_CHARACTERS) + + +def sanitize_patterns(patterns: Union[Dict, List, str]) -> Dict[str, Union[List[str], "DataFilesList"]]: + """ + Take the data_files patterns from the user, and format them into a dictionary. + Each key is the name of the split, and each value is a list of data files patterns (paths or urls). + The default split is "train". + + Returns: + patterns: dictionary of split_name -> list of patterns + """ + if isinstance(patterns, dict): + return {str(key): value if isinstance(value, list) else [value] for key, value in patterns.items()} + elif isinstance(patterns, str): + return {SANITIZED_DEFAULT_SPLIT: [patterns]} + elif isinstance(patterns, list): + if any(isinstance(pattern, dict) for pattern in patterns): + for pattern in patterns: + if not ( + isinstance(pattern, dict) + and len(pattern) == 2 + and "split" in pattern + and isinstance(pattern.get("path"), (str, list)) + ): + raise ValueError( + f"Expected each split to have a 'path' key which can be a string or a list of strings, but got {pattern}" + ) + splits = [pattern["split"] for pattern in patterns] + if len(set(splits)) != len(splits): + raise ValueError(f"Some splits are duplicated in data_files: {splits}") + return { + str(pattern["split"]): pattern["path"] if isinstance(pattern["path"], list) else [pattern["path"]] + for pattern in patterns + } + else: + return {SANITIZED_DEFAULT_SPLIT: patterns} + else: + return sanitize_patterns(list(patterns)) + + +def _is_inside_unrequested_special_dir(matched_rel_path: str, pattern: str) -> bool: + """ + When a path matches a pattern, we additionnally check if it's inside a special directory + we ignore by default (if it starts with a double underscore). + + Users can still explicitly request a filepath inside such a directory if "__pycache__" is + mentioned explicitly in the requested pattern. + + Some examples: + + base directory: + + ./ + └── __pycache__ + └── b.txt + + >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "**") + True + >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "*/b.txt") + True + >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "__pycache__/*") + False + >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "__*/*") + False + """ + # We just need to check if every special directories from the path is present explicly in the pattern. + # Since we assume that the path matches the pattern, it's equivalent to counting that both + # the parent path and the parent pattern have the same number of special directories. + data_dirs_to_ignore_in_path = [part for part in PurePath(matched_rel_path).parent.parts if part.startswith("__")] + data_dirs_to_ignore_in_pattern = [part for part in PurePath(pattern).parent.parts if part.startswith("__")] + return len(data_dirs_to_ignore_in_path) != len(data_dirs_to_ignore_in_pattern) + + +def _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(matched_rel_path: str, pattern: str) -> bool: + """ + When a path matches a pattern, we additionnally check if it's a hidden file or if it's inside + a hidden directory we ignore by default, i.e. if the file name or a parent directory name starts with a dot. + + Users can still explicitly request a filepath that is hidden or is inside a hidden directory + if the hidden part is mentioned explicitly in the requested pattern. + + Some examples: + + base directory: + + ./ + └── .hidden_file.txt + + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_file.txt", "**") + True + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_file.txt", ".*") + False + + base directory: + + ./ + └── .hidden_dir + └── a.txt + + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/a.txt", "**") + True + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/a.txt", ".*/*") + False + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/a.txt", ".hidden_dir/*") + False + + base directory: + + ./ + └── .hidden_dir + └── .hidden_file.txt + + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", "**") + True + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".*/*") + True + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".*/.*") + False + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".hidden_dir/*") + True + >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".hidden_dir/.*") + False + """ + # We just need to check if every hidden part from the path is present explicly in the pattern. + # Since we assume that the path matches the pattern, it's equivalent to counting that both + # the path and the pattern have the same number of hidden parts. + hidden_directories_in_path = [ + part for part in PurePath(matched_rel_path).parts if part.startswith(".") and not set(part) == {"."} + ] + hidden_directories_in_pattern = [ + part for part in PurePath(pattern).parts if part.startswith(".") and not set(part) == {"."} + ] + return len(hidden_directories_in_path) != len(hidden_directories_in_pattern) + + +def _get_data_files_patterns(pattern_resolver: Callable[[str], List[str]]) -> Dict[str, List[str]]: + """ + Get the default pattern from a directory or repository by testing all the supported patterns. + The first patterns to return a non-empty list of data files is returned. + + In order, it first tests if SPLIT_PATTERN_SHARDED works, otherwise it tests the patterns in ALL_DEFAULT_PATTERNS. + """ + # first check the split patterns like data/{split}-00000-of-00001.parquet + for split_pattern in ALL_SPLIT_PATTERNS: + pattern = split_pattern.replace("{split}", "*") + try: + data_files = pattern_resolver(pattern) + except FileNotFoundError: + continue + if len(data_files) > 0: + splits: Set[str] = { + string_to_dict(xbasename(p), glob_pattern_to_regex(xbasename(split_pattern)))["split"] + for p in data_files + } + if any(not re.match(_split_re, split) for split in splits): + raise ValueError(f"Split name should match '{_split_re}'' but got '{splits}'.") + sorted_splits = [str(split) for split in DEFAULT_SPLITS if split in splits] + sorted( + splits - set(DEFAULT_SPLITS) + ) + return {split: [split_pattern.format(split=split)] for split in sorted_splits} + # then check the default patterns based on train/valid/test splits + for patterns_dict in ALL_DEFAULT_PATTERNS: + non_empty_splits = [] + for split, patterns in patterns_dict.items(): + for pattern in patterns: + try: + data_files = pattern_resolver(pattern) + except FileNotFoundError: + continue + if len(data_files) > 0: + non_empty_splits.append(split) + break + if non_empty_splits: + return {split: patterns_dict[split] for split in non_empty_splits} + raise FileNotFoundError(f"Couldn't resolve pattern {pattern} with resolver {pattern_resolver}") + + +def _get_metadata_files_patterns(pattern_resolver: Callable[[str], List[str]]) -> List[str]: + """ + Get the supported metadata patterns from a directory or repository. + """ + non_empty_patterns = [] + for pattern in METADATA_PATTERNS: + try: + metadata_files = pattern_resolver(pattern) + if len(metadata_files) > 0: + non_empty_patterns.append(pattern) + except FileNotFoundError: + pass + if non_empty_patterns: + return non_empty_patterns + raise FileNotFoundError(f"Couldn't resolve pattern {pattern} with resolver {pattern_resolver}") + + +def resolve_pattern( + pattern: str, + base_path: str, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, +) -> List[str]: + """ + Resolve the paths and URLs of the data files from the pattern passed by the user. + + You can use patterns to resolve multiple local files. Here are a few examples: + - *.csv to match all the CSV files at the first level + - **.csv to match all the CSV files at any level + - data/* to match all the files inside "data" + - data/** to match all the files inside "data" and its subdirectories + + The patterns are resolved using the fsspec glob. In fsspec>=2023.12.0 this is equivalent to + Python's glob.glob, Path.glob, Path.match and fnmatch where ** is unsupported with a prefix/suffix + other than a forward slash /. + + More generally: + - '*' matches any character except a forward-slash (to match just the file or directory name) + - '**' matches any character including a forward-slash / + + Hidden files and directories (i.e. whose names start with a dot) are ignored, unless they are explicitly requested. + The same applies to special directories that start with a double underscore like "__pycache__". + You can still include one if the pattern explicilty mentions it: + - to include a hidden file: "*/.hidden.txt" or "*/.*" + - to include a hidden directory: ".hidden/*" or ".*/*" + - to include a special directory: "__special__/*" or "__*/*" + + Example:: + + >>> from datasets.data_files import resolve_pattern + >>> base_path = "." + >>> resolve_pattern("docs/**/*.py", base_path) + [/Users/mariosasko/Desktop/projects/datasets/docs/source/_config.py'] + + Args: + pattern (str): Unix pattern or paths or URLs of the data files to resolve. + The paths can be absolute or relative to base_path. + Remote filesystems using fsspec are supported, e.g. with the hf:// protocol. + base_path (str): Base path to use when resolving relative paths. + allowed_extensions (Optional[list], optional): White-list of file extensions to use. Defaults to None (all extensions). + For example: allowed_extensions=[".csv", ".json", ".txt", ".parquet"] + download_config ([`DownloadConfig`], *optional*): Specific download configuration parameters. + Returns: + List[str]: List of paths or URLs to the local or remote files that match the patterns. + """ + if is_relative_path(pattern): + pattern = xjoin(base_path, pattern) + elif is_local_path(pattern): + base_path = os.path.splitdrive(pattern)[0] + os.sep + else: + base_path = "" + pattern, storage_options = _prepare_path_and_storage_options(pattern, download_config=download_config) + fs, fs_pattern = url_to_fs(pattern, **storage_options) + files_to_ignore = set(FILES_TO_IGNORE) - {xbasename(pattern)} + protocol = fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0] + protocol_prefix = protocol + "://" if protocol != "file" else "" + glob_kwargs = {} + if protocol == "hf" and config.HF_HUB_VERSION >= version.parse("0.20.0"): + # 10 times faster glob with detail=True (ignores costly info like lastCommit) + glob_kwargs["expand_info"] = False + matched_paths = [ + filepath if filepath.startswith(protocol_prefix) else protocol_prefix + filepath + for filepath, info in fs.glob(pattern, detail=True, **glob_kwargs).items() + if info["type"] == "file" + and (xbasename(filepath) not in files_to_ignore) + and not _is_inside_unrequested_special_dir(filepath, fs_pattern) + and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(filepath, fs_pattern) + ] # ignore .ipynb and __pycache__, but keep /../ + if allowed_extensions is not None: + out = [ + filepath + for filepath in matched_paths + if any("." + suffix in allowed_extensions for suffix in xbasename(filepath).split(".")[1:]) + ] + if len(out) < len(matched_paths): + invalid_matched_files = list(set(matched_paths) - set(out)) + logger.info( + f"Some files matched the pattern '{pattern}' but don't have valid data file extensions: {invalid_matched_files}" + ) + else: + out = matched_paths + if not out: + error_msg = f"Unable to find '{pattern}'" + if allowed_extensions is not None: + error_msg += f" with any supported extension {list(allowed_extensions)}" + raise FileNotFoundError(error_msg) + return out + + +def get_data_patterns(base_path: str, download_config: Optional[DownloadConfig] = None) -> Dict[str, List[str]]: + """ + Get the default pattern from a directory testing all the supported patterns. + The first patterns to return a non-empty list of data files is returned. + + Some examples of supported patterns: + + Input: + + my_dataset_repository/ + ├── README.md + └── dataset.csv + + Output: + + {'train': ['**']} + + Input: + + my_dataset_repository/ + ├── README.md + ├── train.csv + └── test.csv + + my_dataset_repository/ + ├── README.md + └── data/ + ├── train.csv + └── test.csv + + my_dataset_repository/ + ├── README.md + ├── train_0.csv + ├── train_1.csv + ├── train_2.csv + ├── train_3.csv + ├── test_0.csv + └── test_1.csv + + Output: + + {'train': ['**/train[-._ 0-9]*', '**/*[-._ 0-9]train[-._ 0-9]*', '**/training[-._ 0-9]*', '**/*[-._ 0-9]training[-._ 0-9]*'], + 'test': ['**/test[-._ 0-9]*', '**/*[-._ 0-9]test[-._ 0-9]*', '**/testing[-._ 0-9]*', '**/*[-._ 0-9]testing[-._ 0-9]*', ...]} + + Input: + + my_dataset_repository/ + ├── README.md + └── data/ + ├── train/ + │ ├── shard_0.csv + │ ├── shard_1.csv + │ ├── shard_2.csv + │ └── shard_3.csv + └── test/ + ├── shard_0.csv + └── shard_1.csv + + Output: + + {'train': ['**/train/**', '**/train[-._ 0-9]*/**', '**/*[-._ 0-9]train/**', '**/*[-._ 0-9]train[-._ 0-9]*/**', ...], + 'test': ['**/test/**', '**/test[-._ 0-9]*/**', '**/*[-._ 0-9]test/**', '**/*[-._ 0-9]test[-._ 0-9]*/**', ...]} + + Input: + + my_dataset_repository/ + ├── README.md + └── data/ + ├── train-00000-of-00003.csv + ├── train-00001-of-00003.csv + ├── train-00002-of-00003.csv + ├── test-00000-of-00001.csv + ├── random-00000-of-00003.csv + ├── random-00001-of-00003.csv + └── random-00002-of-00003.csv + + Output: + + {'train': ['data/train-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*'], + 'test': ['data/test-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*'], + 'random': ['data/random-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*']} + + In order, it first tests if SPLIT_PATTERN_SHARDED works, otherwise it tests the patterns in ALL_DEFAULT_PATTERNS. + """ + resolver = partial(resolve_pattern, base_path=base_path, download_config=download_config) + try: + return _get_data_files_patterns(resolver) + except FileNotFoundError: + raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None + + +def get_metadata_patterns( + base_path: str, + download_config: Optional[DownloadConfig] = None, +) -> List[str]: + """ + Get the supported metadata patterns from a local directory. + """ + resolver = partial(resolve_pattern, base_path=base_path, download_config=download_config) + try: + return _get_metadata_files_patterns(resolver) + except FileNotFoundError: + raise FileNotFoundError(f"The directory at {base_path} doesn't contain any metadata file") from None + + +def _get_single_origin_metadata( + data_file: str, + download_config: Optional[DownloadConfig] = None, +) -> SingleOriginMetadata: + data_file, storage_options = _prepare_path_and_storage_options(data_file, download_config=download_config) + fs, *_ = url_to_fs(data_file, **storage_options) + if isinstance(fs, HfFileSystem): + resolved_path = fs.resolve_path(data_file) + return resolved_path.repo_id, resolved_path.revision + elif isinstance(fs, HTTPFileSystem) and data_file.startswith(config.HF_ENDPOINT): + hffs = HfFileSystem(endpoint=config.HF_ENDPOINT, token=download_config.token) + data_file = "hf://" + data_file[len(config.HF_ENDPOINT) + 1 :].replace("/resolve/", "@", 1) + resolved_path = hffs.resolve_path(data_file) + return resolved_path.repo_id, resolved_path.revision + info = fs.info(data_file) + # s3fs uses "ETag", gcsfs uses "etag", and for local we simply check mtime + for key in ["ETag", "etag", "mtime"]: + if key in info: + return (str(info[key]),) + return () + + +def _get_origin_metadata( + data_files: List[str], + download_config: Optional[DownloadConfig] = None, + max_workers: Optional[int] = None, +) -> List[SingleOriginMetadata]: + max_workers = max_workers if max_workers is not None else config.HF_DATASETS_MULTITHREADING_MAX_WORKERS + return thread_map( + partial(_get_single_origin_metadata, download_config=download_config), + data_files, + max_workers=max_workers, + tqdm_class=hf_tqdm, + desc="Resolving data files", + # set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached + disable=len(data_files) <= 16 or None, + ) + + +class DataFilesList(List[str]): + """ + List of data files (absolute local paths or URLs). + It has two construction methods given the user's data files patterns: + - ``from_hf_repo``: resolve patterns inside a dataset repository + - ``from_local_or_remote``: resolve patterns from a local path + + Moreover, DataFilesList has an additional attribute ``origin_metadata``. + It can store: + - the last modified time of local files + - ETag of remote files + - commit sha of a dataset repository + + Thanks to this additional attribute, it is possible to hash the list + and get a different hash if and only if at least one file changed. + This is useful for caching Dataset objects that are obtained from a list of data files. + """ + + def __init__(self, data_files: List[str], origin_metadata: List[SingleOriginMetadata]) -> None: + super().__init__(data_files) + self.origin_metadata = origin_metadata + + def __add__(self, other: "DataFilesList") -> "DataFilesList": + return DataFilesList([*self, *other], self.origin_metadata + other.origin_metadata) + + @classmethod + def from_hf_repo( + cls, + patterns: List[str], + dataset_info: huggingface_hub.hf_api.DatasetInfo, + base_path: Optional[str] = None, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesList": + base_path = f"hf://datasets/{dataset_info.id}@{dataset_info.sha}/{base_path or ''}".rstrip("/") + return cls.from_patterns( + patterns, base_path=base_path, allowed_extensions=allowed_extensions, download_config=download_config + ) + + @classmethod + def from_local_or_remote( + cls, + patterns: List[str], + base_path: Optional[str] = None, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesList": + base_path = base_path if base_path is not None else Path().resolve().as_posix() + return cls.from_patterns( + patterns, base_path=base_path, allowed_extensions=allowed_extensions, download_config=download_config + ) + + @classmethod + def from_patterns( + cls, + patterns: List[str], + base_path: Optional[str] = None, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesList": + base_path = base_path if base_path is not None else Path().resolve().as_posix() + data_files = [] + for pattern in patterns: + try: + data_files.extend( + resolve_pattern( + pattern, + base_path=base_path, + allowed_extensions=allowed_extensions, + download_config=download_config, + ) + ) + except FileNotFoundError: + if not has_magic(pattern): + raise + origin_metadata = _get_origin_metadata(data_files, download_config=download_config) + return cls(data_files, origin_metadata) + + def filter_extensions(self, extensions: List[str]) -> "DataFilesList": + pattern = "|".join("\\" + ext for ext in extensions) + pattern = re.compile(f".*({pattern})(\\..+)?$") + return DataFilesList( + [data_file for data_file in self if pattern.match(data_file)], + origin_metadata=self.origin_metadata, + ) + + +class DataFilesDict(Dict[str, DataFilesList]): + """ + Dict of split_name -> list of data files (absolute local paths or URLs). + It has two construction methods given the user's data files patterns : + - ``from_hf_repo``: resolve patterns inside a dataset repository + - ``from_local_or_remote``: resolve patterns from a local path + + Moreover, each list is a DataFilesList. It is possible to hash the dictionary + and get a different hash if and only if at least one file changed. + For more info, see [`DataFilesList`]. + + This is useful for caching Dataset objects that are obtained from a list of data files. + + Changing the order of the keys of this dictionary also doesn't change its hash. + """ + + @classmethod + def from_local_or_remote( + cls, + patterns: Dict[str, Union[List[str], DataFilesList]], + base_path: Optional[str] = None, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesDict": + out = cls() + for key, patterns_for_key in patterns.items(): + out[key] = ( + patterns_for_key + if isinstance(patterns_for_key, DataFilesList) + else DataFilesList.from_local_or_remote( + patterns_for_key, + base_path=base_path, + allowed_extensions=allowed_extensions, + download_config=download_config, + ) + ) + return out + + @classmethod + def from_hf_repo( + cls, + patterns: Dict[str, Union[List[str], DataFilesList]], + dataset_info: huggingface_hub.hf_api.DatasetInfo, + base_path: Optional[str] = None, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesDict": + out = cls() + for key, patterns_for_key in patterns.items(): + out[key] = ( + patterns_for_key + if isinstance(patterns_for_key, DataFilesList) + else DataFilesList.from_hf_repo( + patterns_for_key, + dataset_info=dataset_info, + base_path=base_path, + allowed_extensions=allowed_extensions, + download_config=download_config, + ) + ) + return out + + @classmethod + def from_patterns( + cls, + patterns: Dict[str, Union[List[str], DataFilesList]], + base_path: Optional[str] = None, + allowed_extensions: Optional[List[str]] = None, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesDict": + out = cls() + for key, patterns_for_key in patterns.items(): + out[key] = ( + patterns_for_key + if isinstance(patterns_for_key, DataFilesList) + else DataFilesList.from_patterns( + patterns_for_key, + base_path=base_path, + allowed_extensions=allowed_extensions, + download_config=download_config, + ) + ) + return out + + def filter_extensions(self, extensions: List[str]) -> "DataFilesDict": + out = type(self)() + for key, data_files_list in self.items(): + out[key] = data_files_list.filter_extensions(extensions) + return out + + +class DataFilesPatternsList(List[str]): + """ + List of data files patterns (absolute local paths or URLs). + For each pattern there should also be a list of allowed extensions + to keep, or a None ot keep all the files for the pattern. + """ + + def __init__( + self, + patterns: List[str], + allowed_extensions: List[Optional[List[str]]], + ): + super().__init__(patterns) + self.allowed_extensions = allowed_extensions + + def __add__(self, other): + return DataFilesList([*self, *other], self.allowed_extensions + other.allowed_extensions) + + @classmethod + def from_patterns( + cls, patterns: List[str], allowed_extensions: Optional[List[str]] = None + ) -> "DataFilesPatternsList": + return cls(patterns, [allowed_extensions] * len(patterns)) + + def resolve( + self, + base_path: str, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesList": + base_path = base_path if base_path is not None else Path().resolve().as_posix() + data_files = [] + for pattern, allowed_extensions in zip(self, self.allowed_extensions): + try: + data_files.extend( + resolve_pattern( + pattern, + base_path=base_path, + allowed_extensions=allowed_extensions, + download_config=download_config, + ) + ) + except FileNotFoundError: + if not has_magic(pattern): + raise + origin_metadata = _get_origin_metadata(data_files, download_config=download_config) + return DataFilesList(data_files, origin_metadata) + + def filter_extensions(self, extensions: List[str]) -> "DataFilesPatternsList": + return DataFilesPatternsList( + self, [allowed_extensions + extensions for allowed_extensions in self.allowed_extensions] + ) + + +class DataFilesPatternsDict(Dict[str, DataFilesPatternsList]): + """ + Dict of split_name -> list of data files patterns (absolute local paths or URLs). + """ + + @classmethod + def from_patterns( + cls, patterns: Dict[str, List[str]], allowed_extensions: Optional[List[str]] = None + ) -> "DataFilesPatternsDict": + out = cls() + for key, patterns_for_key in patterns.items(): + out[key] = ( + patterns_for_key + if isinstance(patterns_for_key, DataFilesPatternsList) + else DataFilesPatternsList.from_patterns( + patterns_for_key, + allowed_extensions=allowed_extensions, + ) + ) + return out + + def resolve( + self, + base_path: str, + download_config: Optional[DownloadConfig] = None, + ) -> "DataFilesDict": + out = DataFilesDict() + for key, data_files_patterns_list in self.items(): + out[key] = data_files_patterns_list.resolve(base_path, download_config) + return out + + def filter_extensions(self, extensions: List[str]) -> "DataFilesPatternsDict": + out = type(self)() + for key, data_files_patterns_list in self.items(): + out[key] = data_files_patterns_list.filter_extensions(extensions) + return out diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/fingerprint.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/fingerprint.py new file mode 100644 index 0000000000000000000000000000000000000000..b26caff328bd799c508641fd7289c8c01a28d5f8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/fingerprint.py @@ -0,0 +1,494 @@ +import inspect +import os +import random +import shutil +import tempfile +import weakref +from functools import wraps +from pathlib import Path +from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union + +import numpy as np +import xxhash + +from . import config +from .naming import INVALID_WINDOWS_CHARACTERS_IN_PATH +from .utils._dill import dumps +from .utils.deprecation_utils import deprecated +from .utils.logging import get_logger + + +if TYPE_CHECKING: + from .arrow_dataset import Dataset + + +logger = get_logger(__name__) + + +# Fingerprinting allows to have one deterministic fingerprint per dataset state. +# A dataset fingerprint is updated after each transform. +# Re-running the same transforms on a dataset in a different session results in the same fingerprint. +# This is possible thanks to a custom hashing function that works with most python objects. + +# Fingerprinting is the main mechanism that enables caching. +# The caching mechanism allows to reload an existing cache file if it's already been computed. + + +################# +# Caching +################# + +_CACHING_ENABLED = True +_TEMP_DIR_FOR_TEMP_CACHE_FILES: Optional["_TempCacheDir"] = None +_DATASETS_WITH_TABLE_IN_TEMP_DIR: Optional[weakref.WeakSet] = None + + +class _TempCacheDir: + """ + A temporary directory for storing cached Arrow files with a cleanup that frees references to the Arrow files + before deleting the directory itself to avoid permission errors on Windows. + """ + + def __init__(self): + self.name = tempfile.mkdtemp(prefix=config.TEMP_CACHE_DIR_PREFIX) + self._finalizer = weakref.finalize(self, self._cleanup) + + def _cleanup(self): + for dset in get_datasets_with_cache_file_in_temp_dir(): + dset.__del__() + if os.path.exists(self.name): + try: + shutil.rmtree(self.name) + except Exception as e: + raise OSError( + f"An error occured while trying to delete temporary cache directory {self.name}. Please delete it manually." + ) from e + + def cleanup(self): + if self._finalizer.detach(): + self._cleanup() + + +def maybe_register_dataset_for_temp_dir_deletion(dataset): + """ + This function registers the datasets that have cache files in _TEMP_DIR_FOR_TEMP_CACHE_FILES in order + to properly delete them before deleting the temporary directory. + The temporary directory _TEMP_DIR_FOR_TEMP_CACHE_FILES is used when caching is disabled. + """ + if _TEMP_DIR_FOR_TEMP_CACHE_FILES is None: + return + + global _DATASETS_WITH_TABLE_IN_TEMP_DIR + if _DATASETS_WITH_TABLE_IN_TEMP_DIR is None: + _DATASETS_WITH_TABLE_IN_TEMP_DIR = weakref.WeakSet() + if any( + Path(_TEMP_DIR_FOR_TEMP_CACHE_FILES.name) in Path(cache_file["filename"]).parents + for cache_file in dataset.cache_files + ): + _DATASETS_WITH_TABLE_IN_TEMP_DIR.add(dataset) + + +def get_datasets_with_cache_file_in_temp_dir(): + return list(_DATASETS_WITH_TABLE_IN_TEMP_DIR) if _DATASETS_WITH_TABLE_IN_TEMP_DIR is not None else [] + + +def enable_caching(): + """ + When applying transforms on a dataset, the data are stored in cache files. + The caching mechanism allows to reload an existing cache file if it's already been computed. + + Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated + after each transform. + + If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets. + More precisely, if the caching is disabled: + - cache files are always recreated + - cache files are written to a temporary directory that is deleted when session closes + - cache files are named using a random hash instead of the dataset fingerprint + - use [`~datasets.Dataset.save_to_disk`] to save a transformed dataset or it will be deleted when session closes + - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use + the `download_mode` parameter in [`~datasets.load_dataset`]. + """ + global _CACHING_ENABLED + _CACHING_ENABLED = True + + +def disable_caching(): + """ + When applying transforms on a dataset, the data are stored in cache files. + The caching mechanism allows to reload an existing cache file if it's already been computed. + + Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated + after each transform. + + If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets. + More precisely, if the caching is disabled: + - cache files are always recreated + - cache files are written to a temporary directory that is deleted when session closes + - cache files are named using a random hash instead of the dataset fingerprint + - use [`~datasets.Dataset.save_to_disk`] to save a transformed dataset or it will be deleted when session closes + - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use + the `download_mode` parameter in [`~datasets.load_dataset`]. + """ + global _CACHING_ENABLED + _CACHING_ENABLED = False + + +@deprecated( + "Use datasets.enable_caching() or datasets.disable_caching() instead. This function will be removed in a future version of datasets." +) +def set_caching_enabled(boolean: bool): + """ + When applying transforms on a dataset, the data are stored in cache files. + The caching mechanism allows to reload an existing cache file if it's already been computed. + + Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated + after each transform. + + If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets. + More precisely, if the caching is disabled: + - cache files are always recreated + - cache files are written to a temporary directory that is deleted when session closes + - cache files are named using a random hash instead of the dataset fingerprint + - use :func:`datasets.Dataset.save_to_disk` to save a transformed dataset or it will be deleted when session closes + - caching doesn't affect :func:`datasets.load_dataset`. If you want to regenerate a dataset from scratch you should use + the ``download_mode`` parameter in :func:`datasets.load_dataset`. + """ + global _CACHING_ENABLED + _CACHING_ENABLED = bool(boolean) + + +def is_caching_enabled() -> bool: + """ + When applying transforms on a dataset, the data are stored in cache files. + The caching mechanism allows to reload an existing cache file if it's already been computed. + + Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated + after each transform. + + If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets. + More precisely, if the caching is disabled: + - cache files are always recreated + - cache files are written to a temporary directory that is deleted when session closes + - cache files are named using a random hash instead of the dataset fingerprint + - use [`~datasets.Dataset.save_to_disk`]] to save a transformed dataset or it will be deleted when session closes + - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use + the `download_mode` parameter in [`~datasets.load_dataset`]. + """ + global _CACHING_ENABLED + return bool(_CACHING_ENABLED) + + +def get_temporary_cache_files_directory() -> str: + """Return a directory that is deleted when session closes.""" + global _TEMP_DIR_FOR_TEMP_CACHE_FILES + if _TEMP_DIR_FOR_TEMP_CACHE_FILES is None: + _TEMP_DIR_FOR_TEMP_CACHE_FILES = _TempCacheDir() + return _TEMP_DIR_FOR_TEMP_CACHE_FILES.name + + +################# +# Hashing +################# + + +@deprecated("Use `copyreg.pickle` to register a custom reducer.") +def hashregister(*types): + def proxy(func): + for t in types: + Hasher.dispatch[t] = func + return func + + return proxy + + +class Hasher: + """Hasher that accepts python objects as inputs.""" + + dispatch: Dict = {} + + def __init__(self): + self.m = xxhash.xxh64() + + @classmethod + def hash_bytes(cls, value: Union[bytes, List[bytes]]) -> str: + value = [value] if isinstance(value, bytes) else value + m = xxhash.xxh64() + for x in value: + m.update(x) + return m.hexdigest() + + @classmethod + @deprecated("Use `Hasher.hash` instead.") + def hash_default(cls, value: Any) -> str: + return cls.hash(value) + + @classmethod + def hash(cls, value: Any) -> str: + return cls.hash_bytes(dumps(value)) + + def update(self, value: Any) -> None: + header_for_update = f"=={type(value)}==" + value_for_update = self.hash(value) + self.m.update(header_for_update.encode("utf8")) + self.m.update(value_for_update.encode("utf-8")) + + def hexdigest(self) -> str: + return self.m.hexdigest() + + +################# +# Fingerprinting +################# + +fingerprint_rng = random.Random() +# we show a warning only once when fingerprinting fails to avoid spam +fingerprint_warnings: Dict[str, bool] = {} + + +def generate_fingerprint(dataset: "Dataset") -> str: + state = dataset.__dict__ + hasher = Hasher() + for key in sorted(state): + if key == "_fingerprint": + continue + hasher.update(key) + hasher.update(state[key]) + # hash data files last modification timestamps as well + for cache_file in dataset.cache_files: + hasher.update(os.path.getmtime(cache_file["filename"])) + return hasher.hexdigest() + + +def generate_random_fingerprint(nbits: int = 64) -> str: + return f"{fingerprint_rng.getrandbits(nbits):0{nbits//4}x}" + + +def update_fingerprint(fingerprint, transform, transform_args): + global fingerprint_warnings + hasher = Hasher() + hasher.update(fingerprint) + try: + hasher.update(transform) + except: # noqa various errors might raise here from pickle or dill + if _CACHING_ENABLED: + if not fingerprint_warnings.get("update_fingerprint_transform_hash_failed", False): + logger.warning( + f"Transform {transform} couldn't be hashed properly, a random hash was used instead. " + "Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. " + "If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. " + "This warning is only showed once. Subsequent hashing failures won't be showed." + ) + fingerprint_warnings["update_fingerprint_transform_hash_failed"] = True + else: + logger.info(f"Transform {transform} couldn't be hashed properly, a random hash was used instead.") + else: + logger.info( + f"Transform {transform} couldn't be hashed properly, a random hash was used instead. This doesn't affect caching since it's disabled." + ) + + return generate_random_fingerprint() + for key in sorted(transform_args): + hasher.update(key) + try: + hasher.update(transform_args[key]) + except: # noqa various errors might raise here from pickle or dill + if _CACHING_ENABLED: + if not fingerprint_warnings.get("update_fingerprint_transform_hash_failed", False): + logger.warning( + f"Parameter '{key}'={transform_args[key]} of the transform {transform} couldn't be hashed properly, a random hash was used instead. " + "Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. " + "If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. " + "This warning is only showed once. Subsequent hashing failures won't be showed." + ) + fingerprint_warnings["update_fingerprint_transform_hash_failed"] = True + else: + logger.info( + f"Parameter '{key}'={transform_args[key]} of the transform {transform} couldn't be hashed properly, a random hash was used instead." + ) + else: + logger.info( + f"Parameter '{key}'={transform_args[key]} of the transform {transform} couldn't be hashed properly, a random hash was used instead. This doesn't affect caching since it's disabled." + ) + return generate_random_fingerprint() + return hasher.hexdigest() + + +def validate_fingerprint(fingerprint: str, max_length=64): + """ + Make sure the fingerprint is a non-empty string that is not longer that max_length=64 by default, + so that the fingerprint can be used to name cache files without issues. + """ + if not isinstance(fingerprint, str) or not fingerprint: + raise ValueError(f"Invalid fingerprint '{fingerprint}': it should be a non-empty string.") + for invalid_char in INVALID_WINDOWS_CHARACTERS_IN_PATH: + if invalid_char in fingerprint: + raise ValueError( + f"Invalid fingerprint. Bad characters from black list '{INVALID_WINDOWS_CHARACTERS_IN_PATH}' found in '{fingerprint}'. " + f"They could create issues when creating cache files." + ) + if len(fingerprint) > max_length: + raise ValueError( + f"Invalid fingerprint. Maximum lenth is {max_length} but '{fingerprint}' has length {len(fingerprint)}." + "It could create issues when creating cache files." + ) + + +def format_transform_for_fingerprint(func: Callable, version: Optional[str] = None) -> str: + """ + Format a transform to the format that will be used to update the fingerprint. + """ + transform = f"{func.__module__}.{func.__qualname__}" + if version is not None: + transform += f"@{version}" + return transform + + +def format_kwargs_for_fingerprint( + func: Callable, + args: Tuple, + kwargs: Dict[str, Any], + use_kwargs: Optional[List[str]] = None, + ignore_kwargs: Optional[List[str]] = None, + randomized_function: bool = False, +) -> Dict[str, Any]: + """ + Format the kwargs of a transform to the format that will be used to update the fingerprint. + """ + kwargs_for_fingerprint = kwargs.copy() + if args: + params = [p.name for p in inspect.signature(func).parameters.values() if p != p.VAR_KEYWORD] + args = args[1:] # assume the first argument is the dataset + params = params[1:] + kwargs_for_fingerprint.update(zip(params, args)) + else: + del kwargs_for_fingerprint[ + next(iter(inspect.signature(func).parameters)) + ] # assume the first key is the dataset + + # keep the right kwargs to be hashed to generate the fingerprint + + if use_kwargs: + kwargs_for_fingerprint = {k: v for k, v in kwargs_for_fingerprint.items() if k in use_kwargs} + if ignore_kwargs: + kwargs_for_fingerprint = {k: v for k, v in kwargs_for_fingerprint.items() if k not in ignore_kwargs} + if randomized_function: # randomized functions have `seed` and `generator` parameters + if kwargs_for_fingerprint.get("seed") is None and kwargs_for_fingerprint.get("generator") is None: + _, seed, pos, *_ = np.random.get_state() + seed = seed[pos] if pos < 624 else seed[0] + kwargs_for_fingerprint["generator"] = np.random.default_rng(seed) + + # remove kwargs that are the default values + + default_values = { + p.name: p.default for p in inspect.signature(func).parameters.values() if p.default != inspect._empty + } + for default_varname, default_value in default_values.items(): + if default_varname in kwargs_for_fingerprint and kwargs_for_fingerprint[default_varname] == default_value: + kwargs_for_fingerprint.pop(default_varname) + return kwargs_for_fingerprint + + +def fingerprint_transform( + inplace: bool, + use_kwargs: Optional[List[str]] = None, + ignore_kwargs: Optional[List[str]] = None, + fingerprint_names: Optional[List[str]] = None, + randomized_function: bool = False, + version: Optional[str] = None, +): + """ + Wrapper for dataset transforms to update the dataset fingerprint using ``update_fingerprint`` + Args: + inplace (:obj:`bool`): If inplace is True, the fingerprint of the dataset is updated inplace. + Otherwise, a parameter "new_fingerprint" is passed to the wrapped method that should take care of + setting the fingerprint of the returned Dataset. + use_kwargs (:obj:`List[str]`, optional): optional white list of argument names to take into account + to update the fingerprint to the wrapped method that should take care of + setting the fingerprint of the returned Dataset. By default all the arguments are used. + ignore_kwargs (:obj:`List[str]`, optional): optional black list of argument names to take into account + to update the fingerprint. Note that ignore_kwargs prevails on use_kwargs. + fingerprint_names (:obj:`List[str]`, optional, defaults to ["new_fingerprint"]): + If the dataset transforms is not inplace and returns a DatasetDict, then it can require + several fingerprints (one per dataset in the DatasetDict). By specifying fingerprint_names, + one fingerprint named after each element of fingerprint_names is going to be passed. + randomized_function (:obj:`bool`, defaults to False): If the dataset transform is random and has + optional parameters "seed" and "generator", then you can set randomized_function to True. + This way, even if users set "seed" and "generator" to None, then the fingerprint is + going to be randomly generated depending on numpy's current state. In this case, the + generator is set to np.random.default_rng(np.random.get_state()[1][0]). + version (:obj:`str`, optional): version of the transform. The version is taken into account when + computing the fingerprint. If a datase transform changes (or at least if the output data + that are cached changes), then one should increase the version. If the version stays the + same, then old cached data could be reused that are not compatible with the new transform. + It should be in the format "MAJOR.MINOR.PATCH". + """ + + if use_kwargs is not None and not isinstance(use_kwargs, list): + raise ValueError(f"use_kwargs is supposed to be a list, not {type(use_kwargs)}") + + if ignore_kwargs is not None and not isinstance(ignore_kwargs, list): + raise ValueError(f"ignore_kwargs is supposed to be a list, not {type(use_kwargs)}") + + if inplace and fingerprint_names: + raise ValueError("fingerprint_names are only used when inplace is False") + + fingerprint_names = fingerprint_names if fingerprint_names is not None else ["new_fingerprint"] + + def _fingerprint(func): + if not inplace and not all(name in func.__code__.co_varnames for name in fingerprint_names): + raise ValueError(f"function {func} is missing parameters {fingerprint_names} in signature") + + if randomized_function: # randomized function have seed and generator parameters + if "seed" not in func.__code__.co_varnames: + raise ValueError(f"'seed' must be in {func}'s signature") + if "generator" not in func.__code__.co_varnames: + raise ValueError(f"'generator' must be in {func}'s signature") + # this call has to be outside the wrapper or since __qualname__ changes in multiprocessing + transform = format_transform_for_fingerprint(func, version=version) + + @wraps(func) + def wrapper(*args, **kwargs): + kwargs_for_fingerprint = format_kwargs_for_fingerprint( + func, + args, + kwargs, + use_kwargs=use_kwargs, + ignore_kwargs=ignore_kwargs, + randomized_function=randomized_function, + ) + + if args: + dataset: Dataset = args[0] + args = args[1:] + else: + dataset: Dataset = kwargs.pop(next(iter(inspect.signature(func).parameters))) + + # compute new_fingerprint and add it to the args of not in-place transforms + if inplace: + new_fingerprint = update_fingerprint(dataset._fingerprint, transform, kwargs_for_fingerprint) + else: + for fingerprint_name in fingerprint_names: # transforms like `train_test_split` have several hashes + if kwargs.get(fingerprint_name) is None: + kwargs_for_fingerprint["fingerprint_name"] = fingerprint_name + kwargs[fingerprint_name] = update_fingerprint( + dataset._fingerprint, transform, kwargs_for_fingerprint + ) + else: + validate_fingerprint(kwargs[fingerprint_name]) + + # Call actual function + + out = func(dataset, *args, **kwargs) + + # Update fingerprint of in-place transforms + update in-place history of transforms + + if inplace: # update after calling func so that the fingerprint doesn't change if the function fails + dataset._fingerprint = new_fingerprint + + return out + + wrapper._decorator_name_ = "fingerprint" + return wrapper + + return _fingerprint diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/hub.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/hub.py new file mode 100644 index 0000000000000000000000000000000000000000..2d8b60c9feaa3149f3d4b40bc13968b624cb960f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/hub.py @@ -0,0 +1,230 @@ +import time +from itertools import chain +from typing import Optional, Union + +from huggingface_hub import ( + CommitInfo, + CommitOperationAdd, + CommitOperationDelete, + DatasetCard, + DatasetCardData, + HfApi, + HfFileSystem, +) +from huggingface_hub.utils import HfHubHTTPError + +import datasets.config +from datasets.info import DatasetInfosDict +from datasets.inspect import get_dataset_config_names, get_dataset_default_config_name +from datasets.load import load_dataset, load_dataset_builder +from datasets.utils.metadata import MetadataConfigs + + +def convert_to_parquet( + repo_id: str, + revision: Optional[str] = None, + token: Optional[Union[bool, str]] = None, + trust_remote_code: Optional[bool] = None, +) -> CommitInfo: + """Convert Hub [script-based dataset](dataset_script) to Parquet [data-only dataset](repository_structure), so that + the dataset viewer will be supported. + + This function: + - makes a copy of the script on the "main" branch into a dedicated branch called "script" (if it does not already exist) + - creates a pull request to the Hub dataset to convert it to Parquet files (and deletes the script from the main branch) + + If in the future you need to recreate the Parquet files from the "script" branch, pass the `revision="script"` argument. + + Note that you should pass the `trust_remote_code=True` argument only if you trust the remote code to be executed locally on your machine. + + Args: + repo_id (`str`): ID of the source Hub dataset repository, in the following format: `/` or + `/`. + revision (`str`, *optional*): Branch of the source Hub dataset repository. Defaults to the `"main"` branch. + token (`bool` or `str`, *optional*): Authentication token for the Hugging Face Hub. + trust_remote_code (`bool`, defaults to `True`): Whether you trust the remote code of the Hub script-based + dataset to be executed locally on your machine. This option should only be set to `True` for repositories + where you have read the code and which you trust. + + + + `trust_remote_code` will default to False in the next major release. + + + + Returns: + `huggingface_hub.CommitInfo` + """ + print(f"{repo_id}") + configs = get_dataset_config_names(repo_id, token=token, revision=revision, trust_remote_code=trust_remote_code) + print(f"{configs = }") + default_config = get_dataset_default_config_name( + repo_id, token=token, revision=revision, trust_remote_code=trust_remote_code + ) + print(f"{default_config = }") + if default_config: + config = default_config + configs.remove(default_config) + else: + config = configs.pop(0) + print(f"{config = }") + dataset = load_dataset(repo_id, config, revision=revision, trust_remote_code=trust_remote_code) + commit_info = dataset.push_to_hub( + repo_id, + config_name=config, + commit_message="Convert dataset to Parquet", + commit_description="Convert dataset to Parquet.", + create_pr=True, + token=token, + set_default=default_config is not None, + ) + time.sleep(5) + pr_revision, pr_url = commit_info.pr_revision, commit_info.pr_url + for config in configs: + print(f"{config = }") + dataset = load_dataset(repo_id, config, revision=revision, trust_remote_code=trust_remote_code) + dataset.push_to_hub( + repo_id, + config_name=config, + commit_message=f"Add '{config}' config data files", + revision=pr_revision, + token=token, + ) + time.sleep(5) + _delete_files(repo_id, revision=pr_revision, token=token) + if not revision: + api = HfApi(endpoint=datasets.config.HF_ENDPOINT, token=token) + try: + api.create_branch(repo_id, branch="script", repo_type="dataset", token=token, exist_ok=True) + except HfHubHTTPError: + pass + print(f"You can find your PR to convert the dataset to Parquet at: {pr_url}") + return commit_info + + +def delete_from_hub( + repo_id: str, + config_name: str, + revision: Optional[str] = None, + token: Optional[Union[bool, str]] = None, +) -> CommitInfo: + """Delete a dataset configuration from a [data-only dataset](repository_structure) on the Hub. + + Args: + repo_id (`str`): ID of the Hub dataset repository, in the following format: `/` or + `/`. + config_name (`str`): Name of the dataset configuration. + revision (`str`, *optional*): Branch to delete the configuration from. Defaults to the `"main"` branch. + token (`bool` or `str`, *optional*): Authentication token for the Hugging Face Hub. + + Returns: + `huggingface_hub.CommitInfo` + """ + operations = [] + # data_files + fs = HfFileSystem(endpoint=datasets.config.HF_ENDPOINT, token=token) + builder = load_dataset_builder(repo_id, config_name, revision=revision, token=token, trust_remote_code=False) + for data_file in chain(*builder.config.data_files.values()): + data_file_resolved_path = fs.resolve_path(data_file) + if data_file_resolved_path.repo_id == repo_id: + operations.append(CommitOperationDelete(path_in_repo=data_file_resolved_path.path_in_repo)) + # README.md + dataset_card = DatasetCard.load(repo_id) + # config_names + if dataset_card.data.get("config_names", None) and config_name in dataset_card.data["config_names"]: + dataset_card.data["config_names"].remove(config_name) + # metadata_configs + metadata_configs = MetadataConfigs.from_dataset_card_data(dataset_card.data) + if metadata_configs: + _ = metadata_configs.pop(config_name, None) + dataset_card_data = DatasetCardData() + metadata_configs.to_dataset_card_data(dataset_card_data) + if datasets.config.METADATA_CONFIGS_FIELD in dataset_card_data: + dataset_card.data[datasets.config.METADATA_CONFIGS_FIELD] = dataset_card_data[ + datasets.config.METADATA_CONFIGS_FIELD + ] + else: + _ = dataset_card.data.pop(datasets.config.METADATA_CONFIGS_FIELD, None) + # dataset_info + dataset_infos: DatasetInfosDict = DatasetInfosDict.from_dataset_card_data(dataset_card.data) + if dataset_infos: + _ = dataset_infos.pop(config_name, None) + dataset_card_data = DatasetCardData() + dataset_infos.to_dataset_card_data(dataset_card_data) + if "dataset_info" in dataset_card_data: + dataset_card.data["dataset_info"] = dataset_card_data["dataset_info"] + else: + _ = dataset_card.data.pop("dataset_info", None) + # Commit + operations.append( + CommitOperationAdd(path_in_repo=datasets.config.REPOCARD_FILENAME, path_or_fileobj=str(dataset_card).encode()) + ) + api = HfApi(endpoint=datasets.config.HF_ENDPOINT, token=token) + commit_info = api.create_commit( + repo_id, + operations=operations, + commit_message=f"Delete '{config_name}' config", + commit_description=f"Delete '{config_name}' config.", + token=token, + repo_type="dataset", + revision=revision, + create_pr=True, + ) + print(f"You can find your PR to delete the dataset config at: {commit_info.pr_url}") + return commit_info + + +def _delete_files(dataset_id, revision=None, token=None): + dataset_name = dataset_id.split("/")[-1] + hf_api = HfApi(endpoint=datasets.config.HF_ENDPOINT, token=token) + repo_files = hf_api.list_repo_files( + dataset_id, + repo_type="dataset", + ) + if repo_files: + legacy_json_file = [] + python_files = [] + data_files = [] + for filename in repo_files: + if filename in {".gitattributes", "README.md"}: + continue + elif filename == f"{dataset_name}.py": + hf_api.delete_file( + filename, + dataset_id, + repo_type="dataset", + revision=revision, + commit_message="Delete loading script", + ) + elif filename == "dataset_infos.json": + legacy_json_file.append(filename) + elif filename.endswith(".py"): + python_files.append(filename) + else: + data_files.append(filename) + if legacy_json_file: + hf_api.delete_file( + "dataset_infos.json", + dataset_id, + repo_type="dataset", + revision=revision, + commit_message="Delete legacy dataset_infos.json", + ) + if python_files: + for filename in python_files: + hf_api.delete_file( + filename, + dataset_id, + repo_type="dataset", + revision=revision, + commit_message="Delete loading script auxiliary file", + ) + if data_files: + for filename in data_files: + hf_api.delete_file( + filename, + dataset_id, + repo_type="dataset", + revision=revision, + commit_message="Delete data file", + ) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/info.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/info.py new file mode 100644 index 0000000000000000000000000000000000000000..557f5b77d3f7ff754e4a9482dada99842511a160 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/info.py @@ -0,0 +1,593 @@ +# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""DatasetInfo and MetricInfo record information we know about a dataset and a metric. + +This includes things that we know about the dataset statically, i.e.: + - description + - canonical location + - does it have validation and tests splits + - size + - etc. + +This also includes the things that can and should be computed once we've +processed the dataset as well: + - number of examples (in each split) + - etc. +""" + +import copy +import dataclasses +import json +import os +import posixpath +import warnings +from dataclasses import dataclass +from pathlib import Path +from typing import ClassVar, Dict, List, Optional, Union + +import fsspec +from fsspec.core import url_to_fs +from huggingface_hub import DatasetCard, DatasetCardData + +from . import config +from .features import Features, Value +from .splits import SplitDict +from .tasks import TaskTemplate, task_template_from_dict +from .utils import Version +from .utils.logging import get_logger +from .utils.py_utils import asdict, unique_values + + +logger = get_logger(__name__) + + +@dataclass +class SupervisedKeysData: + input: str = "" + output: str = "" + + +@dataclass +class DownloadChecksumsEntryData: + key: str = "" + value: str = "" + + +class MissingCachedSizesConfigError(Exception): + """The expected cached sizes of the download file are missing.""" + + +class NonMatchingCachedSizesError(Exception): + """The prepared split doesn't have expected sizes.""" + + +@dataclass +class PostProcessedInfo: + features: Optional[Features] = None + resources_checksums: Optional[dict] = None + + def __post_init__(self): + # Convert back to the correct classes when we reload from dict + if self.features is not None and not isinstance(self.features, Features): + self.features = Features.from_dict(self.features) + + @classmethod + def from_dict(cls, post_processed_info_dict: dict) -> "PostProcessedInfo": + field_names = {f.name for f in dataclasses.fields(cls)} + return cls(**{k: v for k, v in post_processed_info_dict.items() if k in field_names}) + + +@dataclass +class DatasetInfo: + """Information about a dataset. + + `DatasetInfo` documents datasets, including its name, version, and features. + See the constructor arguments and properties for a full list. + + Not all fields are known on construction and may be updated later. + + Attributes: + description (`str`): + A description of the dataset. + citation (`str`): + A BibTeX citation of the dataset. + homepage (`str`): + A URL to the official homepage for the dataset. + license (`str`): + The dataset's license. It can be the name of the license or a paragraph containing the terms of the license. + features ([`Features`], *optional*): + The features used to specify the dataset's column types. + post_processed (`PostProcessedInfo`, *optional*): + Information regarding the resources of a possible post-processing of a dataset. For example, it can contain the information of an index. + supervised_keys (`SupervisedKeysData`, *optional*): + Specifies the input feature and the label for supervised learning if applicable for the dataset (legacy from TFDS). + builder_name (`str`, *optional*): + The name of the `GeneratorBasedBuilder` subclass used to create the dataset. Usually matched to the corresponding script name. It is also the snake_case version of the dataset builder class name. + config_name (`str`, *optional*): + The name of the configuration derived from [`BuilderConfig`]. + version (`str` or [`Version`], *optional*): + The version of the dataset. + splits (`dict`, *optional*): + The mapping between split name and metadata. + download_checksums (`dict`, *optional*): + The mapping between the URL to download the dataset's checksums and corresponding metadata. + download_size (`int`, *optional*): + The size of the files to download to generate the dataset, in bytes. + post_processing_size (`int`, *optional*): + Size of the dataset in bytes after post-processing, if any. + dataset_size (`int`, *optional*): + The combined size in bytes of the Arrow tables for all splits. + size_in_bytes (`int`, *optional*): + The combined size in bytes of all files associated with the dataset (downloaded files + Arrow files). + task_templates (`List[TaskTemplate]`, *optional*): + The task templates to prepare the dataset for during training and evaluation. Each template casts the dataset's [`Features`] to standardized column names and types as detailed in `datasets.tasks`. + **config_kwargs (additional keyword arguments): + Keyword arguments to be passed to the [`BuilderConfig`] and used in the [`DatasetBuilder`]. + """ + + # Set in the dataset scripts + description: str = dataclasses.field(default_factory=str) + citation: str = dataclasses.field(default_factory=str) + homepage: str = dataclasses.field(default_factory=str) + license: str = dataclasses.field(default_factory=str) + features: Optional[Features] = None + post_processed: Optional[PostProcessedInfo] = None + supervised_keys: Optional[SupervisedKeysData] = None + task_templates: Optional[List[TaskTemplate]] = None + + # Set later by the builder + builder_name: Optional[str] = None + dataset_name: Optional[str] = None # for packaged builders, to be different from builder_name + config_name: Optional[str] = None + version: Optional[Union[str, Version]] = None + # Set later by `download_and_prepare` + splits: Optional[dict] = None + download_checksums: Optional[dict] = None + download_size: Optional[int] = None + post_processing_size: Optional[int] = None + dataset_size: Optional[int] = None + size_in_bytes: Optional[int] = None + + _INCLUDED_INFO_IN_YAML: ClassVar[List[str]] = [ + "config_name", + "download_size", + "dataset_size", + "features", + "splits", + ] + + def __post_init__(self): + # Convert back to the correct classes when we reload from dict + if self.features is not None and not isinstance(self.features, Features): + self.features = Features.from_dict(self.features) + if self.post_processed is not None and not isinstance(self.post_processed, PostProcessedInfo): + self.post_processed = PostProcessedInfo.from_dict(self.post_processed) + if self.version is not None and not isinstance(self.version, Version): + if isinstance(self.version, str): + self.version = Version(self.version) + else: + self.version = Version.from_dict(self.version) + if self.splits is not None and not isinstance(self.splits, SplitDict): + self.splits = SplitDict.from_split_dict(self.splits) + if self.supervised_keys is not None and not isinstance(self.supervised_keys, SupervisedKeysData): + if isinstance(self.supervised_keys, (tuple, list)): + self.supervised_keys = SupervisedKeysData(*self.supervised_keys) + else: + self.supervised_keys = SupervisedKeysData(**self.supervised_keys) + + # Parse and make a list of templates + if self.task_templates is not None: + if isinstance(self.task_templates, (list, tuple)): + templates = [ + template if isinstance(template, TaskTemplate) else task_template_from_dict(template) + for template in self.task_templates + ] + self.task_templates = [template for template in templates if template is not None] + elif isinstance(self.task_templates, TaskTemplate): + self.task_templates = [self.task_templates] + else: + template = task_template_from_dict(self.task_templates) + self.task_templates = [template] if template is not None else [] + + # Align task templates with features + if self.task_templates is not None: + self.task_templates = list(self.task_templates) + if self.features is not None: + self.task_templates = [ + template.align_with_features(self.features) for template in (self.task_templates) + ] + + def write_to_directory( + self, dataset_info_dir, pretty_print=False, fs="deprecated", storage_options: Optional[dict] = None + ): + """Write `DatasetInfo` and license (if present) as JSON files to `dataset_info_dir`. + + Args: + dataset_info_dir (`str`): + Destination directory. + pretty_print (`bool`, defaults to `False`): + If `True`, the JSON will be pretty-printed with the indent level of 4. + fs (`fsspec.spec.AbstractFileSystem`, *optional*): + Instance of the remote filesystem used to download the files from. + + + + `fs` was deprecated in version 2.9.0 and will be removed in 3.0.0. + Please use `storage_options` instead, e.g. `storage_options=fs.storage_options`. + + + + storage_options (`dict`, *optional*): + Key/value pairs to be passed on to the file-system backend, if any. + + + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="validation") + >>> ds.info.write_to_directory("/path/to/directory/") + ``` + """ + if fs != "deprecated": + warnings.warn( + "'fs' was deprecated in favor of 'storage_options' in version 2.9.0 and will be removed in 3.0.0.\n" + "You can remove this warning by passing 'storage_options=fs.storage_options' instead.", + FutureWarning, + ) + storage_options = fs.storage_options + + fs: fsspec.AbstractFileSystem + fs, *_ = url_to_fs(dataset_info_dir, **(storage_options or {})) + with fs.open(posixpath.join(dataset_info_dir, config.DATASET_INFO_FILENAME), "wb") as f: + self._dump_info(f, pretty_print=pretty_print) + if self.license: + with fs.open(posixpath.join(dataset_info_dir, config.LICENSE_FILENAME), "wb") as f: + self._dump_license(f) + + def _dump_info(self, file, pretty_print=False): + """Dump info in `file` file-like object open in bytes mode (to support remote files)""" + file.write(json.dumps(asdict(self), indent=4 if pretty_print else None).encode("utf-8")) + + def _dump_license(self, file): + """Dump license in `file` file-like object open in bytes mode (to support remote files)""" + file.write(self.license.encode("utf-8")) + + @classmethod + def from_merge(cls, dataset_infos: List["DatasetInfo"]): + dataset_infos = [dset_info.copy() for dset_info in dataset_infos if dset_info is not None] + + if len(dataset_infos) > 0 and all(dataset_infos[0] == dset_info for dset_info in dataset_infos): + # if all dataset_infos are equal we don't need to merge. Just return the first. + return dataset_infos[0] + + description = "\n\n".join(unique_values(info.description for info in dataset_infos)).strip() + citation = "\n\n".join(unique_values(info.citation for info in dataset_infos)).strip() + homepage = "\n\n".join(unique_values(info.homepage for info in dataset_infos)).strip() + license = "\n\n".join(unique_values(info.license for info in dataset_infos)).strip() + features = None + supervised_keys = None + task_templates = None + + # Find common task templates across all dataset infos + all_task_templates = [info.task_templates for info in dataset_infos if info.task_templates is not None] + if len(all_task_templates) > 1: + task_templates = list(set(all_task_templates[0]).intersection(*all_task_templates[1:])) + elif len(all_task_templates): + task_templates = list(set(all_task_templates[0])) + # If no common task templates found, replace empty list with None + task_templates = task_templates if task_templates else None + + return cls( + description=description, + citation=citation, + homepage=homepage, + license=license, + features=features, + supervised_keys=supervised_keys, + task_templates=task_templates, + ) + + @classmethod + def from_directory( + cls, dataset_info_dir: str, fs="deprecated", storage_options: Optional[dict] = None + ) -> "DatasetInfo": + """Create [`DatasetInfo`] from the JSON file in `dataset_info_dir`. + + This function updates all the dynamically generated fields (num_examples, + hash, time of creation,...) of the [`DatasetInfo`]. + + This will overwrite all previous metadata. + + Args: + dataset_info_dir (`str`): + The directory containing the metadata file. This + should be the root directory of a specific dataset version. + fs (`fsspec.spec.AbstractFileSystem`, *optional*): + Instance of the remote filesystem used to download the files from. + + + + `fs` was deprecated in version 2.9.0 and will be removed in 3.0.0. + Please use `storage_options` instead, e.g. `storage_options=fs.storage_options`. + + + + storage_options (`dict`, *optional*): + Key/value pairs to be passed on to the file-system backend, if any. + + + + Example: + + ```py + >>> from datasets import DatasetInfo + >>> ds_info = DatasetInfo.from_directory("/path/to/directory/") + ``` + """ + if fs != "deprecated": + warnings.warn( + "'fs' was deprecated in favor of 'storage_options' in version 2.9.0 and will be removed in 3.0.0.\n" + "You can remove this warning by passing 'storage_options=fs.storage_options' instead.", + FutureWarning, + ) + storage_options = fs.storage_options + + fs: fsspec.AbstractFileSystem + fs, *_ = url_to_fs(dataset_info_dir, **(storage_options or {})) + logger.info(f"Loading Dataset info from {dataset_info_dir}") + if not dataset_info_dir: + raise ValueError("Calling DatasetInfo.from_directory() with undefined dataset_info_dir.") + with fs.open(posixpath.join(dataset_info_dir, config.DATASET_INFO_FILENAME), "r", encoding="utf-8") as f: + dataset_info_dict = json.load(f) + return cls.from_dict(dataset_info_dict) + + @classmethod + def from_dict(cls, dataset_info_dict: dict) -> "DatasetInfo": + field_names = {f.name for f in dataclasses.fields(cls)} + return cls(**{k: v for k, v in dataset_info_dict.items() if k in field_names}) + + def update(self, other_dataset_info: "DatasetInfo", ignore_none=True): + self_dict = self.__dict__ + self_dict.update( + **{ + k: copy.deepcopy(v) + for k, v in other_dataset_info.__dict__.items() + if (v is not None or not ignore_none) + } + ) + + def copy(self) -> "DatasetInfo": + return self.__class__(**{k: copy.deepcopy(v) for k, v in self.__dict__.items()}) + + def _to_yaml_dict(self) -> dict: + yaml_dict = {} + dataset_info_dict = asdict(self) + for key in dataset_info_dict: + if key in self._INCLUDED_INFO_IN_YAML: + value = getattr(self, key) + if hasattr(value, "_to_yaml_list"): # Features, SplitDict + yaml_dict[key] = value._to_yaml_list() + elif hasattr(value, "_to_yaml_string"): # Version + yaml_dict[key] = value._to_yaml_string() + else: + yaml_dict[key] = value + return yaml_dict + + @classmethod + def _from_yaml_dict(cls, yaml_data: dict) -> "DatasetInfo": + yaml_data = copy.deepcopy(yaml_data) + if yaml_data.get("features") is not None: + yaml_data["features"] = Features._from_yaml_list(yaml_data["features"]) + if yaml_data.get("splits") is not None: + yaml_data["splits"] = SplitDict._from_yaml_list(yaml_data["splits"]) + field_names = {f.name for f in dataclasses.fields(cls)} + return cls(**{k: v for k, v in yaml_data.items() if k in field_names}) + + +class DatasetInfosDict(Dict[str, DatasetInfo]): + def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=False) -> None: + total_dataset_infos = {} + dataset_infos_path = os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME) + dataset_readme_path = os.path.join(dataset_infos_dir, config.REPOCARD_FILENAME) + if not overwrite: + total_dataset_infos = self.from_directory(dataset_infos_dir) + total_dataset_infos.update(self) + if os.path.exists(dataset_infos_path): + # for backward compatibility, let's update the JSON file if it exists + with open(dataset_infos_path, "w", encoding="utf-8") as f: + dataset_infos_dict = { + config_name: asdict(dset_info) for config_name, dset_info in total_dataset_infos.items() + } + json.dump(dataset_infos_dict, f, indent=4 if pretty_print else None) + # Dump the infos in the YAML part of the README.md file + if os.path.exists(dataset_readme_path): + dataset_card = DatasetCard.load(dataset_readme_path) + dataset_card_data = dataset_card.data + else: + dataset_card = None + dataset_card_data = DatasetCardData() + if total_dataset_infos: + total_dataset_infos.to_dataset_card_data(dataset_card_data) + dataset_card = ( + DatasetCard("---\n" + str(dataset_card_data) + "\n---\n") if dataset_card is None else dataset_card + ) + dataset_card.save(Path(dataset_readme_path)) + + @classmethod + def from_directory(cls, dataset_infos_dir) -> "DatasetInfosDict": + logger.info(f"Loading Dataset Infos from {dataset_infos_dir}") + # Load the info from the YAML part of README.md + if os.path.exists(os.path.join(dataset_infos_dir, config.REPOCARD_FILENAME)): + dataset_card_data = DatasetCard.load(Path(dataset_infos_dir) / config.REPOCARD_FILENAME).data + if "dataset_info" in dataset_card_data: + return cls.from_dataset_card_data(dataset_card_data) + if os.path.exists(os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME)): + # this is just to have backward compatibility with dataset_infos.json files + with open(os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME), encoding="utf-8") as f: + return cls( + { + config_name: DatasetInfo.from_dict(dataset_info_dict) + for config_name, dataset_info_dict in json.load(f).items() + } + ) + else: + return cls() + + @classmethod + def from_dataset_card_data(cls, dataset_card_data: DatasetCardData) -> "DatasetInfosDict": + if isinstance(dataset_card_data.get("dataset_info"), (list, dict)): + if isinstance(dataset_card_data["dataset_info"], list): + return cls( + { + dataset_info_yaml_dict.get("config_name", "default"): DatasetInfo._from_yaml_dict( + dataset_info_yaml_dict + ) + for dataset_info_yaml_dict in dataset_card_data["dataset_info"] + } + ) + else: + dataset_info = DatasetInfo._from_yaml_dict(dataset_card_data["dataset_info"]) + dataset_info.config_name = dataset_card_data["dataset_info"].get("config_name", "default") + return cls({dataset_info.config_name: dataset_info}) + else: + return cls() + + def to_dataset_card_data(self, dataset_card_data: DatasetCardData) -> None: + if self: + # first get existing metadata info + if "dataset_info" in dataset_card_data and isinstance(dataset_card_data["dataset_info"], dict): + dataset_metadata_infos = { + dataset_card_data["dataset_info"].get("config_name", "default"): dataset_card_data["dataset_info"] + } + elif "dataset_info" in dataset_card_data and isinstance(dataset_card_data["dataset_info"], list): + dataset_metadata_infos = { + config_metadata["config_name"]: config_metadata + for config_metadata in dataset_card_data["dataset_info"] + } + else: + dataset_metadata_infos = {} + # update/rewrite existing metadata info with the one to dump + total_dataset_infos = { + **dataset_metadata_infos, + **{config_name: dset_info._to_yaml_dict() for config_name, dset_info in self.items()}, + } + # the config_name from the dataset_infos_dict takes over the config_name of the DatasetInfo + for config_name, dset_info_yaml_dict in total_dataset_infos.items(): + dset_info_yaml_dict["config_name"] = config_name + if len(total_dataset_infos) == 1: + # use a struct instead of a list of configurations, since there's only one + dataset_card_data["dataset_info"] = next(iter(total_dataset_infos.values())) + config_name = dataset_card_data["dataset_info"].pop("config_name", None) + if config_name != "default": + # if config_name is not "default" preserve it and put at the first position + dataset_card_data["dataset_info"] = { + "config_name": config_name, + **dataset_card_data["dataset_info"], + } + else: + dataset_card_data["dataset_info"] = [] + for config_name, dataset_info_yaml_dict in sorted(total_dataset_infos.items()): + # add the config_name field in first position + dataset_info_yaml_dict.pop("config_name", None) + dataset_info_yaml_dict = {"config_name": config_name, **dataset_info_yaml_dict} + dataset_card_data["dataset_info"].append(dataset_info_yaml_dict) + + +@dataclass +class MetricInfo: + """Information about a metric. + + `MetricInfo` documents a metric, including its name, version, and features. + See the constructor arguments and properties for a full list. + + Note: Not all fields are known on construction and may be updated later. + """ + + # Set in the dataset scripts + description: str + citation: str + features: Features + inputs_description: str = dataclasses.field(default_factory=str) + homepage: str = dataclasses.field(default_factory=str) + license: str = dataclasses.field(default_factory=str) + codebase_urls: List[str] = dataclasses.field(default_factory=list) + reference_urls: List[str] = dataclasses.field(default_factory=list) + streamable: bool = False + format: Optional[str] = None + + # Set later by the builder + metric_name: Optional[str] = None + config_name: Optional[str] = None + experiment_id: Optional[str] = None + + def __post_init__(self): + if self.format is not None: + for key, value in self.features.items(): + if not isinstance(value, Value): + raise ValueError( + f"When using 'numpy' format, all features should be a `datasets.Value` feature. " + f"Here {key} is an instance of {value.__class__.__name__}" + ) + + def write_to_directory(self, metric_info_dir, pretty_print=False): + """Write `MetricInfo` as JSON to `metric_info_dir`. + Also save the license separately in LICENCE. + If `pretty_print` is True, the JSON will be pretty-printed with the indent level of 4. + + Example: + + ```py + >>> from datasets import load_metric + >>> metric = load_metric("accuracy") + >>> metric.info.write_to_directory("/path/to/directory/") + ``` + """ + with open(os.path.join(metric_info_dir, config.METRIC_INFO_FILENAME), "w", encoding="utf-8") as f: + json.dump(asdict(self), f, indent=4 if pretty_print else None) + + if self.license: + with open(os.path.join(metric_info_dir, config.LICENSE_FILENAME), "w", encoding="utf-8") as f: + f.write(self.license) + + @classmethod + def from_directory(cls, metric_info_dir) -> "MetricInfo": + """Create MetricInfo from the JSON file in `metric_info_dir`. + + Args: + metric_info_dir: `str` The directory containing the metadata file. This + should be the root directory of a specific dataset version. + + Example: + + ```py + >>> from datasets import MetricInfo + >>> metric_info = MetricInfo.from_directory("/path/to/directory/") + ``` + """ + logger.info(f"Loading Metric info from {metric_info_dir}") + if not metric_info_dir: + raise ValueError("Calling MetricInfo.from_directory() with undefined metric_info_dir.") + + with open(os.path.join(metric_info_dir, config.METRIC_INFO_FILENAME), encoding="utf-8") as f: + metric_info_dict = json.load(f) + return cls.from_dict(metric_info_dict) + + @classmethod + def from_dict(cls, metric_info_dict: dict) -> "MetricInfo": + field_names = {f.name for f in dataclasses.fields(cls)} + return cls(**{k: v for k, v in metric_info_dict.items() if k in field_names}) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py new file mode 100644 index 0000000000000000000000000000000000000000..d49ffd47a498a8583d048143f9f9cc401d3253f2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py @@ -0,0 +1,2816 @@ +import copy +import itertools +import sys +import warnings +from collections import Counter +from copy import deepcopy +from dataclasses import dataclass +from functools import partial +from itertools import cycle, islice +from typing import Any, Callable, Dict, Iterable, Iterator, List, Optional, Tuple, Union + +import fsspec.asyn +import numpy as np +import pyarrow as pa + +from . import config +from .arrow_dataset import Dataset, DatasetInfoMixin +from .features import Features +from .features.features import FeatureType, _align_features, _check_if_features_can_be_aligned, cast_to_python_objects +from .formatting import PythonFormatter, TensorFormatter, get_format_type_from_alias, get_formatter +from .info import DatasetInfo +from .splits import NamedSplit +from .table import cast_table_to_features, read_schema_from_file, table_cast +from .utils.logging import get_logger +from .utils.py_utils import Literal +from .utils.sharding import _merge_gen_kwargs, _number_of_shards_in_gen_kwargs, _shuffle_gen_kwargs, _split_gen_kwargs + + +logger = get_logger(__name__) + +Key = Union[int, str] + + +def identity_func(x): + return x + + +def _rename_columns_fn(example: Dict, column_mapping: Dict[str, str]): + if any(col not in example for col in column_mapping): + raise ValueError( + f"Error when renaming {list(column_mapping)} to {list(column_mapping.values())}: columns {set(column_mapping) - set(example)} are not in the dataset." + ) + if any(col in example for col in column_mapping.values()): + raise ValueError( + f"Error when renaming {list(column_mapping)} to {list(column_mapping.values())}: columns {set(example) - set(column_mapping.values())} are already in the dataset." + ) + return { + new_column_name: example[original_column_name] + for original_column_name, new_column_name in column_mapping.items() + } + + +def add_column_fn(example: Dict, idx: int, name: str, column: List[Dict]): + if name in example: + raise ValueError(f"Error when adding {name}: column {name} is already in the dataset.") + return {name: column[idx]} + + +def _infer_features_from_batch(batch: Dict[str, list], try_features: Optional[Features] = None) -> Features: + pa_table = pa.Table.from_pydict(batch) + if try_features is not None: + try: + pa_table = table_cast(pa_table, pa.schema(try_features.type)) + except (TypeError, pa.ArrowInvalid, pa.ArrowNotImplementedError): + pass + return Features.from_arrow_schema(pa_table.schema) + + +def _examples_to_batch(examples: List[Dict[str, Any]]) -> Dict[str, list]: + # we order the columns by order of appearance + # to do so, we use a dict as an ordered set + cols = {col: None for example in examples for col in example} + # when an example is missing a column, we set the value to None with .get() + arrays = [[example.get(col) for example in examples] for col in cols] + return dict(zip(cols, arrays)) + + +def _batch_to_examples(batch: Dict[str, list]) -> Iterator[Dict[str, Any]]: + """Convert a batch (dict of examples) to examples list""" + n_examples = len(batch[next(iter(batch))]) + for i in range(n_examples): + yield {col: array[i] for col, array in batch.items()} + + +def _convert_to_arrow( + iterable: Iterable[Tuple[Key, dict]], + batch_size: int, + drop_last_batch: bool = False, +) -> Iterator[Tuple[Key, pa.Table]]: + """Convert and group examples in Arrow tables of size `batch_size`. + + Args: + iterable (`Iterable[Tuple[Key, dict]]`): + An examples iterable containing tuples (example_key, example) of type (int/str, dict) + batch_size (`Optional[int]`): + Size of each sub-table to yield. If None or <= 0, yields the full table. + drop_last_batch (`bool`, defaults to `False`): + Drop the last batch if it is smaller than `batch_size`. + """ + if batch_size is None or batch_size <= 0: + yield ( + "all", + pa.Table.from_pylist(cast_to_python_objects([example for _, example in iterable], only_1d_for_numpy=True)), + ) + return + iterator = iter(iterable) + for key, example in iterator: + iterator_batch = islice(iterator, batch_size - 1) + key_examples_list = [(key, example)] + list(iterator_batch) + if len(key_examples_list) < batch_size and drop_last_batch: + return + keys, examples = zip(*key_examples_list) + new_key = "_".join(str(key) for key in keys) + yield new_key, pa.Table.from_pylist(cast_to_python_objects(examples, only_1d_for_numpy=True)) + + +def _batch_arrow_tables( + iterable: Iterable[Tuple[Key, pa.Table]], + batch_size: Optional[int], + drop_last_batch: bool = False, +) -> Iterator[Tuple[Key, pa.Table]]: + """Iterate over sub-tables of size `batch_size`. + + Args: + iterable (`Iterable[Tuple[Key, pa.Table]]`): + A tables iterable containing tuples (table_key, table) of type (int/str, pa.Table) + batch_size (`Optional[int]`): + Size of each sub-table to yield. If None or <= 0, yields the full table. + drop_last_batch (`bool`, defaults to `False`): + Drop the last batch if it is smaller than `batch_size`. + """ + if batch_size is None or batch_size <= 0: + yield "all", pa.concat_tables([pa_table for _, pa_table in iterable]) + return + keys_buffer = [] + chunks_buffer = [] + chunks_buffer_size = 0 + for key, pa_table in iterable: + for chunk in pa_table.to_reader(max_chunksize=batch_size): + if len(chunk) == 0: + continue + elif chunks_buffer_size + len(chunk) < batch_size: + keys_buffer.append(key) + chunks_buffer.append(chunk) + chunks_buffer_size += len(chunk) + continue + elif chunks_buffer_size + len(chunk) == batch_size: + keys_buffer.append(key) + chunks_buffer.append(chunk) + new_key = "_".join(str(_key) for _key in keys_buffer) + yield new_key, pa.Table.from_batches(chunks_buffer) + keys_buffer = [] + chunks_buffer = [] + chunks_buffer_size = 0 + else: + cropped_chunk_length = batch_size - chunks_buffer_size + keys_buffer.append(f"{key}[:{cropped_chunk_length}]") + chunks_buffer.append(chunk.slice(0, cropped_chunk_length)) + new_key = "_".join(str(_key) for _key in keys_buffer) + yield new_key, pa.Table.from_batches(chunks_buffer) + keys_buffer = [f"{key}[{cropped_chunk_length}:]"] + chunks_buffer = [chunk.slice(cropped_chunk_length, len(chunk) - cropped_chunk_length)] + chunks_buffer_size = len(chunk) - cropped_chunk_length + if not drop_last_batch and chunks_buffer: + new_key = "_".join(str(_key) for _key in keys_buffer) + yield new_key, pa.Table.from_batches(chunks_buffer) + + +class _BaseExamplesIterable: + """Base class for the examples iterable used by an IterableDataset""" + + def __init__(self) -> None: + self._state_dict: Optional[Union[list, dict]] = None + + def __iter__(self) -> Iterator[Tuple[Key, dict]]: + """An examples iterable should yield tuples (example_key, example) of type (int/str, dict)""" + raise NotImplementedError(f"{type(self)} doesn't implement __iter__ yet") + + @property + def iter_arrow(self) -> Optional[Callable[[], Iterator[Tuple[Key, pa.Table]]]]: + return None + + def shuffle_data_sources(self, generator: np.random.Generator) -> "_BaseExamplesIterable": + """ + Either shuffle the shards/sources of the dataset, or propagate the shuffling to the underlying iterable. + If the order of the shards must stay fixed (when using .skip or .take for example), then this method returns self. + """ + raise NotImplementedError(f"{type(self)} doesn't implement shuffle_data_sources yet") + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "_BaseExamplesIterable": + """Either keep only the requested shard, or propagate the request to the underlying iterable.""" + raise NotImplementedError(f"{type(self)} doesn't implement shard_data_sources yet") + + def split_shard_indices_by_worker(self, worker_id: int, num_workers: int) -> List[int]: + return list(range(worker_id, self.n_shards, num_workers)) + + @property + def n_shards(self) -> int: + raise NotImplementedError(f"{type(self)} doesn't implement n_shards yet") + + def _init_state_dict(self) -> dict: + raise NotImplementedError(f"{type(self)} doesn't implement _init_state_dict yet") + + def load_state_dict(self, state_dict: dict) -> dict: + def _inner_load_state_dict(state, new_state): + if new_state is not None and isinstance(state, dict): + for key in state: + state[key] = _inner_load_state_dict(state[key], new_state[key]) + return state + elif new_state is not None and isinstance(state, list): + for i in range(len(state)): + state[i] = _inner_load_state_dict(state[i], new_state[i]) + return state + return new_state + + return _inner_load_state_dict(self._state_dict, state_dict) + + def state_dict(self) -> dict: + if self._state_dict: + return copy.deepcopy(self._state_dict) + raise RuntimeError("State dict is not initialized, please call ex_iterable._init_state_dict() first.") + + +class ExamplesIterable(_BaseExamplesIterable): + def __init__(self, generate_examples_fn: Callable[..., Tuple[Key, dict]], kwargs: dict): + super().__init__() + self.generate_examples_fn = generate_examples_fn + self.kwargs = kwargs + + def _init_state_dict(self) -> dict: + self._state_dict = {"shard_idx": 0, "shard_example_idx": 0} + return self._state_dict + + def __iter__(self): + shard_idx_start = self._state_dict["shard_idx"] if self._state_dict else 0 + for gen_kwags in islice(_split_gen_kwargs(self.kwargs, max_num_jobs=self.n_shards), shard_idx_start, None): + shard_example_idx_start = self._state_dict["shard_example_idx"] if self._state_dict else 0 + for key_example in islice(self.generate_examples_fn(**gen_kwags), shard_example_idx_start, None): + if self._state_dict: + self._state_dict["shard_example_idx"] += 1 + yield key_example + if self._state_dict: + self._state_dict["shard_idx"] += 1 + self._state_dict["shard_example_idx"] = 0 + + def shuffle_data_sources(self, generator: np.random.Generator) -> "ExamplesIterable": + return ShuffledDataSourcesExamplesIterable(self.generate_examples_fn, self.kwargs, generator) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "ExamplesIterable": + """Keep only the requested shard.""" + gen_kwargs_list = _split_gen_kwargs(self.kwargs, max_num_jobs=self.n_shards) + shard_indices = self.split_shard_indices_by_worker(worker_id, num_workers) + requested_gen_kwargs = _merge_gen_kwargs([gen_kwargs_list[i] for i in shard_indices]) + return ExamplesIterable(self.generate_examples_fn, requested_gen_kwargs) + + @property + def n_shards(self) -> int: + return _number_of_shards_in_gen_kwargs(self.kwargs) + + +class ShuffledDataSourcesExamplesIterable(ExamplesIterable): + def __init__( + self, generate_examples_fn: Callable[..., Tuple[Key, dict]], kwargs: dict, generator: np.random.Generator + ): + super().__init__(generate_examples_fn, kwargs) + self.generator = deepcopy(generator) + + def _init_state_dict(self) -> dict: + self._state_dict = {"shard_idx": 0, "shard_example_idx": 0} + return self._state_dict + + def __iter__(self): + """Shuffle the kwargs order to shuffle shards""" + rng = deepcopy(self.generator) + kwargs_with_shuffled_shards = _shuffle_gen_kwargs(rng, self.kwargs) + shard_idx_start = self._state_dict["shard_idx"] if self._state_dict else 0 + for gen_kwags in islice( + _split_gen_kwargs(kwargs_with_shuffled_shards, max_num_jobs=self.n_shards), shard_idx_start, None + ): + shard_example_idx_start = self._state_dict["shard_example_idx"] if self._state_dict else 0 + for key_example in islice(self.generate_examples_fn(**gen_kwags), shard_example_idx_start, None): + if self._state_dict: + self._state_dict["shard_example_idx"] += 1 + yield key_example + if self._state_dict: + self._state_dict["shard_idx"] += 1 + self._state_dict["shard_example_idx"] = 0 + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "ExamplesIterable": + """Keep only the requested shard.""" + rng = deepcopy(self.generator) + kwargs_with_shuffled_shards = _shuffle_gen_kwargs(rng, self.kwargs) + return ExamplesIterable(self.generate_examples_fn, kwargs_with_shuffled_shards).shard_data_sources( + worker_id, num_workers + ) + + +class ArrowExamplesIterable(_BaseExamplesIterable): + def __init__(self, generate_tables_fn: Callable[..., Tuple[Key, pa.Table]], kwargs: dict): + super().__init__() + self.generate_tables_fn = generate_tables_fn + self.kwargs = kwargs + + @property + def iter_arrow(self): + return self._iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = {"shard_idx": 0, "shard_example_idx": 0} + return self._state_dict + + def __iter__(self): + formatter = PythonFormatter() + shard_idx_start = self._state_dict["shard_idx"] if self._state_dict else 0 + for gen_kwags in islice(_split_gen_kwargs(self.kwargs, max_num_jobs=self.n_shards), shard_idx_start, None): + shard_example_idx_start = self._state_dict["shard_example_idx"] if self._state_dict else 0 + shard_example_idx = 0 + for key, pa_table in self.generate_tables_fn(**gen_kwags): + if shard_example_idx + len(pa_table) <= shard_example_idx_start: + shard_example_idx += len(pa_table) + continue + for pa_subtable in pa_table.to_reader(max_chunksize=config.ARROW_READER_BATCH_SIZE_IN_DATASET_ITER): + formatted_batch = formatter.format_batch(pa_subtable) + for example in _batch_to_examples(formatted_batch): + if shard_example_idx >= shard_example_idx_start: + if self._state_dict: + self._state_dict["shard_example_idx"] += 1 + yield key, example + shard_example_idx += 1 + if self._state_dict: + self._state_dict["shard_idx"] += 1 + self._state_dict["shard_example_idx"] = 0 + + def _iter_arrow(self): + shard_idx_start = self._state_dict["shard_idx"] if self._state_dict else 0 + for gen_kwags in islice(_split_gen_kwargs(self.kwargs, max_num_jobs=self.n_shards), shard_idx_start, None): + shard_example_idx_start = self._state_dict["shard_example_idx"] if self._state_dict else 0 + shard_example_idx = 0 + for key, pa_table in self.generate_tables_fn(**gen_kwags): + shard_example_idx += len(pa_table) + if shard_example_idx <= shard_example_idx_start: + continue + if self._state_dict: + self._state_dict["shard_example_idx"] += len(pa_table) + yield key, pa_table + if self._state_dict: + self._state_dict["shard_idx"] += 1 + self._state_dict["shard_example_idx"] = 0 + + def shuffle_data_sources(self, generator: np.random.Generator) -> "ArrowExamplesIterable": + return ShuffledDataSourcesArrowExamplesIterable(self.generate_tables_fn, self.kwargs, generator) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "ArrowExamplesIterable": + """Keep only the requested shard.""" + gen_kwargs_list = _split_gen_kwargs(self.kwargs, max_num_jobs=self.n_shards) + shard_indices = self.split_shard_indices_by_worker(worker_id, num_workers) + requested_gen_kwargs = _merge_gen_kwargs([gen_kwargs_list[i] for i in shard_indices]) + return ArrowExamplesIterable(self.generate_tables_fn, requested_gen_kwargs) + + @property + def n_shards(self) -> int: + return _number_of_shards_in_gen_kwargs(self.kwargs) + + +class ShuffledDataSourcesArrowExamplesIterable(ArrowExamplesIterable): + def __init__( + self, + generate_tables_fn: Callable[..., Tuple[Key, pa.Table]], + kwargs: dict, + generator: np.random.Generator, + ): + super().__init__(generate_tables_fn, kwargs) + self.generator = deepcopy(generator) + + def _init_state_dict(self) -> dict: + self._state_dict = {"shard_idx": 0, "shard_example_idx": 0} + return self._state_dict + + def __iter__(self): + """Shuffle the kwargs order to shuffle shards""" + rng = deepcopy(self.generator) + kwargs_with_shuffled_shards = _shuffle_gen_kwargs(rng, self.kwargs) + formatter = PythonFormatter() + shard_idx_start = self._state_dict["shard_idx"] if self._state_dict else 0 + for gen_kwags in islice( + _split_gen_kwargs(kwargs_with_shuffled_shards, max_num_jobs=self.n_shards), shard_idx_start, None + ): + shard_example_idx_start = self._state_dict["shard_example_idx"] if self._state_dict else 0 + shard_example_idx = 0 + for key, pa_table in self.generate_tables_fn(**gen_kwags): + if shard_example_idx + len(pa_table) <= shard_example_idx_start: + shard_example_idx += len(pa_table) + continue + for pa_subtable in pa_table.to_reader(max_chunksize=config.ARROW_READER_BATCH_SIZE_IN_DATASET_ITER): + formatted_batch = formatter.format_batch(pa_subtable) + for example in _batch_to_examples(formatted_batch): + if shard_example_idx >= shard_example_idx_start: + if self._state_dict: + self._state_dict["shard_example_idx"] += 1 + yield key, example + shard_example_idx += 1 + if self._state_dict: + self._state_dict["shard_idx"] += 1 + self._state_dict["shard_example_idx"] = 0 + + def _iter_arrow(self): + rng = deepcopy(self.generator) + kwargs_with_shuffled_shards = _shuffle_gen_kwargs(rng, self.kwargs) + shard_idx_start = self._state_dict["shard_idx"] if self._state_dict else 0 + for gen_kwags in islice( + _split_gen_kwargs(kwargs_with_shuffled_shards, max_num_jobs=self.n_shards), shard_idx_start, None + ): + shard_example_idx_start = self._state_dict["shard_example_idx"] if self._state_dict else 0 + shard_example_idx = 0 + for key, pa_table in self.generate_tables_fn(**gen_kwags): + shard_example_idx += len(pa_table) + if shard_example_idx <= shard_example_idx_start: + continue + if self._state_dict: + self._state_dict["shard_example_idx"] += len(pa_table) + yield key, pa_table + if self._state_dict: + self._state_dict["shard_idx"] += 1 + self._state_dict["shard_example_idx"] = 0 + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "ArrowExamplesIterable": + """Keep only the requested shard.""" + rng = deepcopy(self.generator) + kwargs_with_shuffled_shards = _shuffle_gen_kwargs(rng, self.kwargs) + return ArrowExamplesIterable(self.generate_tables_fn, kwargs_with_shuffled_shards).shard_data_sources( + worker_id, num_workers + ) + + +class SelectColumnsIterable(_BaseExamplesIterable): + def __init__(self, ex_iterable: _BaseExamplesIterable, column_names: List[str]): + super().__init__() + self.ex_iterable = ex_iterable + self.column_names = column_names + + @property + def iter_arrow(self): + if self.ex_iterable.iter_arrow: + return self._iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = self.ex_iterable._init_state_dict() + return self._state_dict + + def __iter__(self): + for idx, row in self.ex_iterable: + yield idx, {c: row[c] for c in self.column_names} + + def _iter_arrow(self) -> Iterator[Tuple[Key, pa.Table]]: + for idx, pa_table in self.ex_iterable.iter_arrow(): + if len(pa_table) > 0: # empty tables have no schema + yield idx, pa_table.select(self.column_names) + + def shuffle_data_sources(self, generator: np.random.Generator) -> "SelectColumnsIterable": + return SelectColumnsIterable(self.ex_iterable.shuffle_data_sources(generator), self.column_names) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "SelectColumnsIterable": + return SelectColumnsIterable(self.ex_iterable.shard_data_sources(worker_id, num_workers), self.column_names) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +class StepExamplesIterable(_BaseExamplesIterable): + def __init__(self, ex_iterable: _BaseExamplesIterable, step: int, offset: int): + super().__init__() + self.ex_iterable = ex_iterable + self.step = step + self.offset = offset + # TODO(QL): implement iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = self.ex_iterable._init_state_dict() + return self._state_dict + + def __iter__(self): + ex_iterator = iter(self.ex_iterable) + while True: + batch = list(islice(ex_iterator, self.step)) + if len(batch) > self.offset: + yield batch[self.offset] + else: + break + + def shuffle_data_sources(self, generator: np.random.Generator) -> "StepExamplesIterable": + return StepExamplesIterable( + self.ex_iterable.shuffle_data_sources(generator), step=self.step, offset=self.offset + ) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "StepExamplesIterable": + return StepExamplesIterable( + self.ex_iterable.shard_data_sources(worker_id, num_workers), step=self.step, offset=self.offset + ) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +class CyclingMultiSourcesExamplesIterable(_BaseExamplesIterable): + def __init__( + self, + ex_iterables: List[_BaseExamplesIterable], + stopping_strategy: Literal["first_exhausted", "all_exhausted"] = "first_exhausted", + ): + super().__init__() + self.ex_iterables = ex_iterables + self.stopping_strategy = stopping_strategy + + # if undersampling ("first_exhausted"), we stop as soon as one dataset is exhausted + # if oversampling ("all_exhausted"), we stop as soons as every dataset is exhausted, i.e as soon as every samples of every dataset has been visited at least once + self.bool_strategy_func = np.all if (stopping_strategy == "all_exhausted") else np.any + # TODO(QL): implement iter_arrow + + def _get_indices_iterator(self): + # this is an infinite iterator to keep track of which iterator we want to pick examples from + ex_iterable_idx = self._state_dict["ex_iterable_idx"] if self._state_dict else 0 + for next_ex_iterable_idx in islice(cycle(range(len(self.ex_iterables))), ex_iterable_idx + 1, None): + if self._state_dict: + self._state_dict["ex_iterable_idx"] = next_ex_iterable_idx + yield ex_iterable_idx + ex_iterable_idx = next_ex_iterable_idx + + def _init_state_dict(self) -> dict: + self._state_dict = { + "ex_iterable_idx": 0, + "ex_iterables": [ex_iterable._init_state_dict() for ex_iterable in self.ex_iterables], + "previous_states": [None] * len(self.ex_iterables), + "is_exhausted": [False] * len(self.ex_iterables), + } + return self._state_dict + + def __iter__(self): + # we use this to buffer one example of each iterator to know if an iterator is exhausted + nexts = [None] * len(self.ex_iterables) + # because of that, we need to rewind 1 example when reloading the state dict + if self._state_dict: + for i in range(len(self.ex_iterables)): + if self._state_dict["previous_states"][i] is not None: + self.ex_iterables[i].load_state_dict(self._state_dict["previous_states"][i]) + iterators = [iter(ex_iterable) for ex_iterable in self.ex_iterables] + + indices_iterator = self._get_indices_iterator() + + is_exhausted = ( + np.array(self._state_dict["is_exhausted"]) if self._state_dict else np.full(len(self.ex_iterables), False) + ) + for i in indices_iterator: + # if the stopping criteria is met, break the main for loop + if self.bool_strategy_func(is_exhausted): + break + # let's pick one example from the iterator at index i + if nexts[i] is None: + nexts[i] = next(iterators[i], False) + result = nexts[i] + if self._state_dict: + self._state_dict["previous_states"][i] = deepcopy(self._state_dict["ex_iterables"][i]) + nexts[i] = next(iterators[i], False) + + # the iterator is exhausted + if nexts[i] is False: + is_exhausted[i] = True + if self._state_dict: + self._state_dict["is_exhausted"][i] = True + # we reset it in case the stopping crtieria isn't met yet + nexts[i] = None + if self._state_dict: + self._state_dict["ex_iterables"][i] = self.ex_iterables[i]._init_state_dict() + self._state_dict["previous_states"][i] = None + iterators[i] = iter(self.ex_iterables[i]) + + if result is not False: + yield result + + def shuffle_data_sources(self, generator: np.random.Generator) -> "CyclingMultiSourcesExamplesIterable": + """Shuffle each underlying examples iterable.""" + ex_iterables = [ex_iterable.shuffle_data_sources(generator) for ex_iterable in self.ex_iterables] + return CyclingMultiSourcesExamplesIterable(ex_iterables, self.stopping_strategy) + + @property + def n_shards(self) -> int: + return min(ex_iterable.n_shards for ex_iterable in self.ex_iterables) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "CyclingMultiSourcesExamplesIterable": + """Either keep only the requested shard, or propagate the request to the underlying iterable.""" + return CyclingMultiSourcesExamplesIterable( + [iterable.shard_data_sources(worker_id, num_workers) for iterable in self.ex_iterables], + stopping_strategy=self.stopping_strategy, + ) + + +class VerticallyConcatenatedMultiSourcesExamplesIterable(_BaseExamplesIterable): + """ + VerticallyConcatenatedMultiSourcesExamplesIterable simply chains the input iterables. + It doesn't require the examples iterables to always yield the same columns. + Instead, this is handled by the `IterableDataset` class or `TypedExamplesIterable`. + + For information, `IterableDataset` merges the features of all the datasets to concatenate into one. + We use `IterableDataset._resolve_features` to obtain the features of all the datasets to concatenate. + + Then for each example, `IterableDataset` and `TypedExamplesIterable` automatically fill missing columns with None. + This is done with `_apply_feature_types_on_example`. + """ + + def __init__(self, ex_iterables: List[_BaseExamplesIterable]): + super().__init__() + self.ex_iterables = ex_iterables + + @property + def iter_arrow(self): + if all(ex_iterable.iter_arrow is not None for ex_iterable in self.ex_iterables): + return self._iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = { + "ex_iterable_idx": 0, + "ex_iterables": [ex_iterable._init_state_dict() for ex_iterable in self.ex_iterables], + } + return self._state_dict + + def __iter__(self): + ex_iterable_idx_start = self._state_dict["ex_iterable_idx"] if self._state_dict else 0 + for ex_iterable in islice(self.ex_iterables, ex_iterable_idx_start, None): + yield from ex_iterable + if self._state_dict: + self._state_dict["ex_iterable_idx"] += 1 + + def _iter_arrow(self): + ex_iterable_idx_start = self._state_dict["ex_iterable_idx"] if self._state_dict else 0 + for ex_iterable in islice(self.ex_iterables, ex_iterable_idx_start, None): + yield from ex_iterable.iter_arrow() + if self._state_dict: + self._state_dict["ex_iterable_idx"] += 1 + + def shuffle_data_sources( + self, generator: np.random.Generator + ) -> "VerticallyConcatenatedMultiSourcesExamplesIterable": + """Shuffle the list of examples iterable, as well as each underlying examples iterable.""" + rng = deepcopy(generator) + ex_iterables = list(self.ex_iterables) + rng.shuffle(ex_iterables) + ex_iterables = [ex_iterable.shuffle_data_sources(generator) for ex_iterable in ex_iterables] + return VerticallyConcatenatedMultiSourcesExamplesIterable(ex_iterables) + + @property + def n_shards(self) -> int: + return min(ex_iterable.n_shards for ex_iterable in self.ex_iterables) + + def shard_data_sources( + self, worker_id: int, num_workers: int + ) -> "VerticallyConcatenatedMultiSourcesExamplesIterable": + """Either keep only the requested shard, or propagate the request to the underlying iterable.""" + return VerticallyConcatenatedMultiSourcesExamplesIterable( + [iterable.shard_data_sources(worker_id, num_workers) for iterable in self.ex_iterables] + ) + + +def _check_column_names(column_names: List[str]): + """Check the column names to make sure they don't contain duplicates.""" + counter = Counter(column_names) + if not all(count == 1 for count in counter.values()): + duplicated_columns = [col for col in counter if counter[col] > 1] + raise ValueError( + f"The examples iterables can't have duplicated columns but columns {duplicated_columns} are duplicated." + ) + + +class HorizontallyConcatenatedMultiSourcesExamplesIterable(_BaseExamplesIterable): + """ + HorizontallyConcatenatedMultiSourcesExamplesIterable merges examples together for the input list of iterables. + It also checks that there are no duplicate columns (otherwise we don't know which one to keep). + This check is done once when yielding the first example. + + However it doesn't fill missing columns with None. + Instead, this is handled by the `IterableDataset` class or `TypedExamplesIterable`. + + For information, `IterableDataset` merges the features of all the datasets to concatenate into one. + We use `IterableDataset._resolve_features` to obtain the features of all the datasets to concatenate. + + Then for each example, `IterableDataset` and `TypedExamplesIterable` automatically fill missing columns with None. + This is done with `_apply_feature_types_on_example`. + """ + + def __init__(self, ex_iterables: List[_BaseExamplesIterable]): + super().__init__() + self.ex_iterables = ex_iterables + # TODO(QL): implement iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = {"ex_iterables": [ex_iterable._init_state_dict() for ex_iterable in self.ex_iterables]} + return self._state_dict + + def __iter__(self): + ex_iterators = [iter(ex_iterable) for ex_iterable in self.ex_iterables] + for i in itertools.count(): + keys = [] + examples = [] + for ex_iterator in list(ex_iterators): + try: + key, example = next(ex_iterator) + keys.append(key) + examples.append(example) + except StopIteration: + ex_iterators.remove(ex_iterator) + if ex_iterators: + if i == 0: + _check_column_names([column_name for example in examples for column_name in example]) + new_example = {} + for example in examples: + new_example.update(example) + new_key = "_".join(str(key) for key in keys) + yield new_key, new_example + else: + break + + def shuffle_data_sources( + self, generator: np.random.Generator + ) -> "HorizontallyConcatenatedMultiSourcesExamplesIterable": + """Doesn't shuffle the wrapped examples iterable since it would break the alignment between them.""" + return self + + @property + def n_shards(self) -> int: + return 1 + + def shard_data_sources( + self, worker_id: int, num_workers: int + ) -> "HorizontallyConcatenatedMultiSourcesExamplesIterable": + """Either keep only the requested shard, or propagate the request to the underlying iterable.""" + return HorizontallyConcatenatedMultiSourcesExamplesIterable( + [iterable.shard_data_sources(worker_id, num_workers) for iterable in self.ex_iterables] + ) + + +class RandomlyCyclingMultiSourcesExamplesIterable(CyclingMultiSourcesExamplesIterable): + def __init__( + self, + ex_iterables: List[_BaseExamplesIterable], + generator: np.random.Generator, + probabilities: Optional[List[float]] = None, + stopping_strategy: Literal["first_exhausted", "all_exhausted"] = "first_exhausted", + ): + super().__init__(ex_iterables, stopping_strategy) + self.generator = deepcopy(generator) + self.probabilities = probabilities + # TODO(QL): implement iter_arrow + + def _get_indices_iterator(self): + rng = deepcopy(self.generator) + num_sources = len(self.ex_iterables) + random_batch_size = 1000 + # this is an infinite iterator that randomly samples the index of the source to pick examples from + index_offset = self._state_dict["bit_generator_index_offset"] if self._state_dict else 0 + if self._state_dict: + rng.bit_generator.state = self._state_dict["bit_generator_state"] + if self.probabilities is None: + while True: + for i in islice(rng.integers(0, num_sources, size=random_batch_size), index_offset, None): + index_offset = (index_offset + 1) % random_batch_size + if self._state_dict: + self._state_dict["bit_generator_index_offset"] = index_offset + if index_offset == 0: + self._state_dict["bit_generator_state"] = rng.bit_generator.state + yield int(i) + else: + while True: + for i in islice( + rng.choice(num_sources, size=random_batch_size, p=self.probabilities), index_offset, None + ): + index_offset = (index_offset + 1) % random_batch_size + if self._state_dict: + self._state_dict["bit_generator_index_offset"] = index_offset + if index_offset == 0: + self._state_dict["bit_generator_state"] = rng.bit_generator.state + yield int(i) + + def _init_state_dict(self) -> dict: + self._state_dict = { + "bit_generator_state": self.generator.bit_generator.state, + "bit_generator_index_offset": 0, + "ex_iterables": [ex_iterable._init_state_dict() for ex_iterable in self.ex_iterables], + "previous_states": [None] * len(self.ex_iterables), + "is_exhausted": [False] * len(self.ex_iterables), + } + return self._state_dict + + def shuffle_data_sources(self, generator: np.random.Generator) -> "RandomlyCyclingMultiSourcesExamplesIterable": + """Shuffle the data sources of each wrapped examples iterable.""" + ex_iterables = [ex_iterable.shuffle_data_sources(generator) for ex_iterable in self.ex_iterables] + return RandomlyCyclingMultiSourcesExamplesIterable( + ex_iterables, + generator=generator, + probabilities=self.probabilities, + stopping_strategy=self.stopping_strategy, + ) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "RandomlyCyclingMultiSourcesExamplesIterable": + """Either keep only the requested shard, or propagate the request to the underlying iterable.""" + return RandomlyCyclingMultiSourcesExamplesIterable( + [iterable.shard_data_sources(worker_id, num_workers) for iterable in self.ex_iterables], + self.generator, + self.probabilities, + self.stopping_strategy, + ) + + +class MappedExamplesIterable(_BaseExamplesIterable): + def __init__( + self, + ex_iterable: _BaseExamplesIterable, + function: Callable, + with_indices: bool = False, + input_columns: Optional[List[str]] = None, + batched: bool = False, + batch_size: Optional[int] = 1000, + drop_last_batch: bool = False, + remove_columns: Optional[List[str]] = None, + fn_kwargs: Optional[dict] = None, + formatting: Optional["FormattingConfig"] = None, + format_type="deprecated", + ): + if format_type != "deprecated": + warning_msg = "'format_type' is deprecated and will be removed in the next major version of datasets. " + help_message = "Please use 'formatting=FormattingConfig(format_type=format_type)' instead." + warnings.warn(warning_msg + help_message, category=FutureWarning, stacklevel=2) + formatting = FormattingConfig(format_type=format_type) + super().__init__() + self.ex_iterable = ex_iterable + self.function = function + self.batched = batched + self.batch_size = batch_size + self.drop_last_batch = drop_last_batch + self.remove_columns = remove_columns + self.with_indices = with_indices + self.input_columns = input_columns + self.fn_kwargs = fn_kwargs or {} + self.formatting = formatting + + @property + def iter_arrow(self): + if self.formatting and self.formatting.format_type == "arrow": + return self._iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = { + "ex_iterable": self.ex_iterable._init_state_dict(), + "previous_state": None, + "num_examples_since_previous_state": 0, + "previous_state_example_idx": 0, + } + return self._state_dict + + def __iter__(self): + if self.formatting and self.formatting.format_type == "arrow": + yield from ArrowExamplesIterable(self._iter_arrow, {}) + else: + yield from self._iter() + + def _iter(self): + current_idx = self._state_dict["previous_state_example_idx"] if self._state_dict else 0 + if self._state_dict and self._state_dict["previous_state"]: + self.ex_iterable.load_state_dict(self._state_dict["previous_state"]) + num_examples_to_skip = self._state_dict["num_examples_since_previous_state"] + else: + num_examples_to_skip = 0 + iterator = iter(self.ex_iterable) + + if self.formatting: + formatter = get_formatter(self.formatting.format_type) + format_dict = ( + formatter.recursive_tensorize if isinstance(formatter, TensorFormatter) else cast_to_python_objects + ) + else: + format_dict = None + + if self.batched: + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + for key, example in iterator: + # If `batched`, first build the batch, if `batch_size` is None or <=0, then the batch is the whole dataset + iterator_batch = ( + iterator + if self.batch_size is None or self.batch_size <= 0 + else islice(iterator, self.batch_size - 1) + ) + key_examples_list = [(key, example)] + list(iterator_batch) + keys, examples = zip(*key_examples_list) + if ( + self.drop_last_batch + and self.batch_size is not None + and self.batch_size > 0 + and len(examples) < self.batch_size + ): # ignore last batch + return + batch = _examples_to_batch(examples) + batch = format_dict(batch) if format_dict else batch + # then apply the transform + inputs = batch + function_args = [inputs] if self.input_columns is None else [inputs[col] for col in self.input_columns] + if self.with_indices: + function_args.append([current_idx + i for i in range(len(key_examples_list))]) + transformed_batch = dict(batch) # this will be updated with the function output + transformed_batch.update(self.function(*function_args, **self.fn_kwargs)) + # then remove the unwanted columns + if self.remove_columns: + for c in self.remove_columns: + del transformed_batch[c] + if transformed_batch: + first_col = next(iter(transformed_batch)) + bad_cols = [ + col + for col in transformed_batch + if len(transformed_batch[col]) != len(transformed_batch[first_col]) + ] + if bad_cols: + raise ValueError( + f"Column lengths mismatch: columns {bad_cols} have length {[len(transformed_batch[col]) for col in bad_cols]} while {first_col} has length {len(transformed_batch[first_col])}." + ) + # the new key is the concatenation of the examples keys from the batch + new_key = "_".join(str(key) for key in keys) + # yield one example at a time from the transformed batch + for example in _batch_to_examples(transformed_batch): + current_idx += 1 + if self._state_dict: + self._state_dict["num_examples_since_previous_state"] += 1 + if num_examples_to_skip > 0: + num_examples_to_skip -= 1 + continue + yield new_key, example + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + else: + for key, example in iterator: + # If not batched, we can apply the transform and yield the example directly + # first copy the example, since we might drop some keys + example = dict(example) + example = format_dict(example) if format_dict else example + # then apply the transform + inputs = example + function_args = [inputs] if self.input_columns is None else [inputs[col] for col in self.input_columns] + if self.with_indices: + function_args.append(current_idx) + transformed_example = dict(example) # this will be updated with the function output + transformed_example.update(self.function(*function_args, **self.fn_kwargs)) + # then we remove the unwanted columns + if self.remove_columns: + for c in self.remove_columns: + del transformed_example[c] + current_idx += 1 + if self._state_dict: + self._state_dict["previous_state_example_idx"] += 1 + yield key, transformed_example + + def _iter_arrow(self) -> Iterator[Tuple[Key, pa.Table]]: + if self.ex_iterable.iter_arrow: + iterator = _batch_arrow_tables( + self.ex_iterable.iter_arrow(), + batch_size=self.batch_size if self.batched else 1, + drop_last_batch=self.drop_last_batch, + ) + else: + iterator = _convert_to_arrow( + self.ex_iterable, + batch_size=self.batch_size if self.batched else 1, + drop_last_batch=self.drop_last_batch, + ) + + current_idx = self._state_dict["previous_state_example_idx"] if self._state_dict else 0 + if self._state_dict and self._state_dict["previous_state"]: + self.ex_iterable.load_state_dict(self._state_dict["previous_state"]) + num_examples_to_skip = self._state_dict["num_examples_since_previous_state"] + else: + num_examples_to_skip = 0 + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + + for key, pa_table in iterator: + # first build the batch + function_args = [pa_table] if self.input_columns is None else [pa_table[col] for col in self.input_columns] + if self.with_indices: + if self.batched: + function_args.append([current_idx + i for i in range(len(pa_table))]) + else: + function_args.append(current_idx) + # then apply the transform + output_table = self.function(*function_args, **self.fn_kwargs) + if not isinstance(output_table, pa.Table): + raise TypeError( + f"Provided `function` which is applied to pyarrow tables returns a variable of type {type(output_table)}. Make sure provided `function` returns a a pyarrow table to update the dataset." + ) + # we don't need to merge results for consistency with Dataset.map which merges iif both input and output are dicts + # then remove the unwanted columns + if self.remove_columns: + for column in self.remove_columns: + if column in output_table.column_names: + output_table = output_table.remove_column(output_table.column_names.index(column)) + # return output + current_idx += len(pa_table) + if self._state_dict: + self._state_dict["num_examples_since_previous_state"] += len(pa_table) + if num_examples_to_skip > 0: + num_examples_to_skip -= len(pa_table) + continue + yield key, output_table + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + + def shuffle_data_sources(self, generator: np.random.Generator) -> "MappedExamplesIterable": + """Shuffle the wrapped examples iterable.""" + return MappedExamplesIterable( + self.ex_iterable.shuffle_data_sources(generator), + function=self.function, + with_indices=self.with_indices, + input_columns=self.input_columns, + batched=self.batched, + batch_size=self.batch_size, + drop_last_batch=self.drop_last_batch, + remove_columns=self.remove_columns, + fn_kwargs=self.fn_kwargs, + formatting=self.formatting, + ) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "MappedExamplesIterable": + """Keep only the requested shard.""" + return MappedExamplesIterable( + self.ex_iterable.shard_data_sources(worker_id, num_workers), + function=self.function, + with_indices=self.with_indices, + input_columns=self.input_columns, + batched=self.batched, + batch_size=self.batch_size, + drop_last_batch=self.drop_last_batch, + remove_columns=self.remove_columns, + fn_kwargs=self.fn_kwargs, + formatting=self.formatting, + ) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +class FilteredExamplesIterable(_BaseExamplesIterable): + def __init__( + self, + ex_iterable: _BaseExamplesIterable, + function: Callable, + with_indices: bool = False, + input_columns: Optional[List[str]] = None, + batched: bool = False, + batch_size: Optional[int] = 1000, + fn_kwargs: Optional[dict] = None, + formatting: Optional["FormattingConfig"] = None, + format_type="deprecated", + ): + if format_type != "deprecated": + warning_msg = "'format_type' is deprecated and will be removed in the next major version of datasets. " + help_message = "Please use 'formatting=FormattingConfig(format_type=format_type)' instead." + warnings.warn(warning_msg + help_message, category=FutureWarning, stacklevel=2) + formatting = FormattingConfig(format_type=format_type) + super().__init__() + self.ex_iterable = ex_iterable + self.function = function + self.batched = batched + self.batch_size = batch_size + self.with_indices = with_indices + self.input_columns = input_columns + self.fn_kwargs = fn_kwargs or {} + self.formatting = formatting + + @property + def iter_arrow(self): + if self.formatting and self.formatting.format_type == "arrow": + return self._iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = { + "ex_iterable": self.ex_iterable._init_state_dict(), + "previous_state": None, + "num_examples_since_previous_state": 0, + "previous_state_example_idx": 0, + } + return self._state_dict + + def __iter__(self): + if self.formatting and self.formatting.format_type == "arrow": + yield from ArrowExamplesIterable(self._iter_arrow, {}) + else: + yield from self._iter() + + def _iter(self): + current_idx = self._state_dict["previous_state_example_idx"] if self._state_dict else 0 + if self._state_dict and self._state_dict["previous_state"]: + self.ex_iterable.load_state_dict(self._state_dict["previous_state"]) + num_examples_to_skip = self._state_dict["num_examples_since_previous_state"] + else: + num_examples_to_skip = 0 + iterator = iter(self.ex_iterable) + + if self.formatting: + formatter = get_formatter(self.formatting.format_type) + format_dict = ( + formatter.recursive_tensorize if isinstance(formatter, TensorFormatter) else cast_to_python_objects + ) + else: + format_dict = None + + if self.batched: + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + for key, example in iterator: + # If `batched`, first build the batch, if `batch_size` is None or <=0, then the batch is the whole dataset + iterator_batch = ( + iterator + if self.batch_size is None or self.batch_size <= 0 + else islice(iterator, self.batch_size - 1) + ) + key_examples_list = [(key, example)] + list(iterator_batch) + keys, examples = zip(*key_examples_list) + batch = _examples_to_batch(examples) + batch = format_dict(batch) if format_dict else batch + # then compute the mask for the batch + inputs = batch + function_args = [inputs] if self.input_columns is None else [inputs[col] for col in self.input_columns] + if self.with_indices: + function_args.append([current_idx + i for i in range(len(key_examples_list))]) + mask = self.function(*function_args, **self.fn_kwargs) + # yield one example at a time from the batch + for key_example, to_keep in zip(key_examples_list, mask): + current_idx += 1 + if self._state_dict: + self._state_dict["num_examples_since_previous_state"] += 1 + if num_examples_to_skip > 0: + num_examples_to_skip -= 1 + continue + if to_keep: + yield key_example + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + else: + for key, example in iterator: + # If not batched, we can apply the filtering function direcly + example = dict(example) + inputs = format_dict(example) if format_dict else example + function_args = [inputs] if self.input_columns is None else [inputs[col] for col in self.input_columns] + if self.with_indices: + function_args.append(current_idx) + to_keep = self.function(*function_args, **self.fn_kwargs) + current_idx += 1 + if self._state_dict: + self._state_dict["previous_state_example_idx"] += 1 + if to_keep: + yield key, example + + def _iter_arrow(self): + if self.ex_iterable.iter_arrow: + iterator = _batch_arrow_tables( + self.ex_iterable.iter_arrow(), batch_size=self.batch_size if self.batched else 1 + ) + else: + iterator = _convert_to_arrow(self.ex_iterable, batch_size=self.batch_size if self.batched else 1) + + current_idx = self._state_dict["previous_state_example_idx"] if self._state_dict else 0 + if self._state_dict and self._state_dict["previous_state"]: + self.ex_iterable.load_state_dict(self._state_dict["previous_state"]) + num_examples_to_skip = self._state_dict["num_examples_since_previous_state"] + else: + num_examples_to_skip = 0 + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + + for key, pa_table in iterator: + # first build the batch + function_args = [pa_table] if self.input_columns is None else [pa_table[col] for col in self.input_columns] + if self.with_indices: + if self.batched: + function_args.append([current_idx + i for i in range(len(pa_table))]) + else: + function_args.append(current_idx) + # then apply the transform + mask = self.function(*function_args, **self.fn_kwargs) + # yield the filtered table + current_idx += len(pa_table) + if self._state_dict: + self._state_dict["num_examples_since_previous_state"] += len(pa_table) + if num_examples_to_skip > 0: + num_examples_to_skip -= len(pa_table) + continue + if self.batched: + yield key, pa_table.filter(mask) + elif mask.as_py() if isinstance(mask, pa.BooleanScalar) else mask: + yield key, pa_table + if self._state_dict: + self._state_dict["previous_state"] = self.ex_iterable.state_dict() + self._state_dict["num_examples_since_previous_state"] = 0 + self._state_dict["previous_state_example_idx"] = current_idx + + def shuffle_data_sources(self, seed: Optional[int]) -> "FilteredExamplesIterable": + """Shuffle the wrapped examples iterable.""" + return FilteredExamplesIterable( + self.ex_iterable.shuffle_data_sources(seed), + function=self.function, + with_indices=self.with_indices, + input_columns=self.input_columns, + batched=self.batched, + batch_size=self.batch_size, + ) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "FilteredExamplesIterable": + """Keep only the requested shard.""" + return FilteredExamplesIterable( + self.ex_iterable.shard_data_sources(worker_id, num_workers), + function=self.function, + with_indices=self.with_indices, + input_columns=self.input_columns, + batched=self.batched, + batch_size=self.batch_size, + ) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +class BufferShuffledExamplesIterable(_BaseExamplesIterable): + def __init__(self, ex_iterable: _BaseExamplesIterable, buffer_size: int, generator: np.random.Generator): + super().__init__() + self.ex_iterable = ex_iterable + self.buffer_size = buffer_size + self.generator = generator + # TODO(QL): implement iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = self.ex_iterable._init_state_dict() + self._original_state_dict = self.state_dict() + return self._state_dict + + def load_state_dict(self, state_dict: dict) -> dict: + if self._state_dict: + if state_dict != self._original_state_dict: + logger.warning( + "Loading a state dict of a shuffle buffer of a dataset without the buffer content." + "The shuffle buffer will be refilled before starting to yield new examples." + ) + return super().load_state_dict(state_dict) + + @staticmethod + def _iter_random_indices(rng: np.random.Generator, buffer_size: int, random_batch_size=1000) -> Iterator[int]: + while True: + yield from (int(i) for i in rng.integers(0, buffer_size, size=random_batch_size)) + + def __iter__(self): + buffer_size = self.buffer_size + rng = deepcopy(self.generator) + indices_iterator = self._iter_random_indices(rng, buffer_size) + # this is the shuffle buffer that we keep in memory + mem_buffer = [] + for x in self.ex_iterable: + if len(mem_buffer) == buffer_size: # if the buffer is full, pick and example from it + i = next(indices_iterator) + yield mem_buffer[i] + mem_buffer[i] = x # replace the picked example by a new one + else: # otherwise, keep filling the buffer + mem_buffer.append(x) + # when we run out of examples, we shuffle the remaining examples in the buffer and yield them + rng.shuffle(mem_buffer) + yield from mem_buffer + + def shuffle_data_sources(self, generator: np.random.Generator) -> "BufferShuffledExamplesIterable": + """Shuffle the wrapped examples iterable as well as the shuffling buffer.""" + return BufferShuffledExamplesIterable( + self.ex_iterable.shuffle_data_sources(generator), buffer_size=self.buffer_size, generator=generator + ) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "BufferShuffledExamplesIterable": + """Keep only the requested shard.""" + return BufferShuffledExamplesIterable( + self.ex_iterable.shard_data_sources(worker_id, num_workers), + buffer_size=self.buffer_size, + generator=self.generator, + ) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +class SkipExamplesIterable(_BaseExamplesIterable): + def __init__(self, ex_iterable: _BaseExamplesIterable, n: int): + super().__init__() + self.ex_iterable = ex_iterable + self.n = n + # TODO(QL): implement iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = {"skipped": False, "ex_iterable": self.ex_iterable._init_state_dict()} + return self._state_dict + + def __iter__(self): + ex_iterable_idx_start = 0 if self._state_dict and self._state_dict["skipped"] else self.n + if self._state_dict: + self._state_dict["skipped"] = True + yield from islice(self.ex_iterable, ex_iterable_idx_start, None) + + def shuffle_data_sources(self, generator: np.random.Generator) -> "SkipExamplesIterable": + """Doesn't shuffle the wrapped examples iterable since it would skip examples from other shards instead.""" + return self + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +class TakeExamplesIterable(_BaseExamplesIterable): + def __init__(self, ex_iterable: _BaseExamplesIterable, n: int): + super().__init__() + self.ex_iterable = ex_iterable + self.n = n + # TODO(QL): implement iter_arrow + + def _init_state_dict(self) -> dict: + self._state_dict = {"num_taken": 0, "ex_iterable": self.ex_iterable._init_state_dict()} + return self._state_dict + + def __iter__(self): + ex_iterable_num_taken = self._state_dict["num_taken"] if self._state_dict else 0 + for key_example in islice(self.ex_iterable, self.n - ex_iterable_num_taken): + if self._state_dict: + self._state_dict["num_taken"] += 1 + yield key_example + + def shuffle_data_sources(self, generator: np.random.Generator) -> "TakeExamplesIterable": + """Doesn't shuffle the wrapped examples iterable since it would take examples from other shards instead.""" + return self + + @staticmethod + def split_number(num, n): + quotient = num // n + remainder = num % n + result = [quotient] * n + for i in range(remainder): + result[i] += 1 + return result + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "TakeExamplesIterable": + """Keep only the requested shard.""" + return TakeExamplesIterable( + self.ex_iterable.shard_data_sources(worker_id, num_workers), + n=self.split_number(self.n, num_workers)[worker_id], + ) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +def _apply_feature_types_on_example( + example: dict, features: Features, token_per_repo_id: Dict[str, Union[str, bool, None]] +) -> dict: + example = dict(example) + # add missing columns + for column_name in features: + if column_name not in example: + example[column_name] = None + # we encode the example for ClassLabel feature types for example + encoded_example = features.encode_example(example) + # Decode example for Audio feature, e.g. + decoded_example = features.decode_example(encoded_example, token_per_repo_id=token_per_repo_id) + return decoded_example + + +def _apply_feature_types_on_batch( + batch: dict, features: Features, token_per_repo_id: Dict[str, Union[str, bool, None]] +) -> dict: + batch = dict(batch) + # add missing columns + n_examples = len(batch[next(iter(batch))]) + for column_name in features: + if column_name not in batch: + batch[column_name] = [None] * n_examples + # we encode the batch for ClassLabel feature types for example + encoded_batch = features.encode_batch(batch) + # Decode batch for Audio feature, e.g. + decoded_batch = features.decode_batch(encoded_batch, token_per_repo_id=token_per_repo_id) + return decoded_batch + + +class TypedExamplesIterable(_BaseExamplesIterable): + def __init__( + self, + ex_iterable: _BaseExamplesIterable, + features: Features, + token_per_repo_id: Dict[str, Union[str, bool, None]], + ): + super().__init__() + self.ex_iterable = ex_iterable + self.features = features + self.token_per_repo_id = token_per_repo_id + + @property + def iter_arrow(self): + if self.ex_iterable.iter_arrow is not None: + return self._iter_arrow + + def _init_state_dict(self) -> dict: + if not self._state_dict: + self._state_dict = self.ex_iterable._init_state_dict() + return self._state_dict + + def __iter__(self): + # Then for each example, `TypedExamplesIterable` automatically fills missing columns with None. + # This is done with `_apply_feature_types_on_example`. + for key, example in self.ex_iterable: + yield ( + key, + _apply_feature_types_on_example(example, self.features, token_per_repo_id=self.token_per_repo_id), + ) + + def _iter_arrow(self) -> Iterator[Tuple[Key, pa.Table]]: + schema = self.features.arrow_schema + for key, pa_table in self.ex_iterable.iter_arrow(): + columns = set(pa_table.column_names) + # add missing columns + for column_name in self.features: + if column_name not in columns: + col = pa.NullArray.from_buffers(pa.null(), len(pa_table), [None]) + pa_table = pa_table.append_column(column_name, col) + if pa_table.schema != schema: + pa_table = cast_table_to_features(pa_table, self.features) + yield key, pa_table + + def shuffle_data_sources(self, generator: np.random.Generator) -> "TypedExamplesIterable": + """Shuffle the wrapped examples iterable.""" + return TypedExamplesIterable( + self.ex_iterable.shuffle_data_sources(generator), + features=self.features, + token_per_repo_id=self.token_per_repo_id, + ) + + def shard_data_sources(self, worker_id: int, num_workers: int) -> "TypedExamplesIterable": + """Keep only the requested shard.""" + return TypedExamplesIterable( + self.ex_iterable.shard_data_sources(worker_id, num_workers), + features=self.features, + token_per_repo_id=self.token_per_repo_id, + ) + + @property + def n_shards(self) -> int: + return self.ex_iterable.n_shards + + +@dataclass +class FormattingConfig: + format_type: Optional[str] + + def __post_init__(self): + if self.format_type == "pandas": + raise NotImplementedError( + "The 'pandas' formatting is not implemented for iterable datasets. You can use 'numpy' or 'arrow' instead." + ) + + +@dataclass +class ShufflingConfig: + generator: np.random.Generator + _original_seed: Optional[int] = None + + +@dataclass +class DistributedConfig: + rank: int + world_size: int + + +def _maybe_add_torch_iterable_dataset_parent_class(cls): + """Add torch.utils.data.IterableDataset as a parent class if 'torch' is available""" + if config.TORCH_AVAILABLE: + import torch.utils.data + + if torch.utils.data.IterableDataset not in cls.__bases__: + cls.__bases__ += (torch.utils.data.IterableDataset,) + + +class IterableDataset(DatasetInfoMixin): + """A Dataset backed by an iterable.""" + + def __init__( + self, + ex_iterable: _BaseExamplesIterable, + info: Optional[DatasetInfo] = None, + split: Optional[NamedSplit] = None, + formatting: Optional[FormattingConfig] = None, + shuffling: Optional[ShufflingConfig] = None, + distributed: Optional[DistributedConfig] = None, + token_per_repo_id: Optional[Dict[str, Union[str, bool, None]]] = None, + format_type="deprecated", + ): + if distributed and distributed.world_size > 1 and shuffling and shuffling._original_seed is None: + raise RuntimeError( + "The dataset doesn't have a fixed random seed across nodes to shuffle and split the list of dataset shards by node. " + "Please pass e.g. `seed=42` in `.shuffle()` to make all the nodes use the same seed. " + ) + if format_type != "deprecated": + warning_msg = "'format_type' is deprecated and will be removed in the next major version of datasets. " + help_message = "Please use 'formatting=FormattingConfig(format_type=format_type)' instead." + warnings.warn(warning_msg + help_message, category=FutureWarning, stacklevel=2) + formatting = FormattingConfig(format_type=format_type) + + info = info.copy() if info is not None else DatasetInfo() + DatasetInfoMixin.__init__(self, info=info, split=split) + + self._ex_iterable = copy.copy(ex_iterable) + self._formatting = formatting + self._shuffling = shuffling + self._distributed = distributed + self._epoch = 0 + self._token_per_repo_id: Dict[str, Union[str, bool, None]] = token_per_repo_id or {} + self._state_dict = ex_iterable._init_state_dict() + self._starting_state_dict: Optional[dict] = None + _maybe_add_torch_iterable_dataset_parent_class(self.__class__) + + def state_dict(self) -> dict: + """Get the current state_dict of the dataset. + It corresponds to the state at the latest example it yielded. + + Resuming returns exactly where the checkpoint was saved except in two cases: + + 1. examples from shuffle buffers are lost when resuming and the buffers are refilled with new data + 2. combinations of `.with_format(arrow)` and batched `.map()` may skip one batch. + + Returns: + `dict` + + Example: + + ```py + >>> from datasets import Dataset, concatenate_datasets + >>> ds = Dataset.from_dict({"a": range(6)}).to_iterable_dataset(num_shards=3) + >>> for idx, example in enumerate(ds): + ... print(example) + ... if idx == 2: + ... state_dict = ds.state_dict() + ... print("checkpoint") + ... break + >>> ds.load_state_dict(state_dict) + >>> print(f"restart from checkpoint") + >>> for example in ds: + ... print(example) + ``` + + which returns: + ``` + {'a': 0} + {'a': 1} + {'a': 2} + checkpoint + restart from checkpoint + {'a': 3} + {'a': 4} + {'a': 5} + ``` + + ```py + >>> from torchdata.stateful_dataloader import StatefulDataLoader + >>> ds = load_dataset("deepmind/code_contests", streaming=True, split="train") + >>> dataloader = StatefulDataLoader(ds, batch_size=32, num_workers=4) + >>> # checkpoint + >>> state_dict = dataloader.state_dict() # uses ds.state_dict() under the hood + >>> # resume from checkpoint + >>> dataloader.load_state_dict(state_dict) # uses ds.load_state_dict() under the hood + ``` + """ + return copy.deepcopy(self._state_dict) + + def load_state_dict(self, state_dict: dict) -> None: + """Load the state_dict of the dataset. + The iteration will restart at the next example from when the state was saved. + + Resuming returns exactly where the checkpoint was saved except in two cases: + + 1. examples from shuffle buffers are lost when resuming and the buffers are refilled with new data + 2. combinations of `.with_format(arrow)` and batched `.map()` may skip one batch. + + Example: + + ```py + >>> from datasets import Dataset, concatenate_datasets + >>> ds = Dataset.from_dict({"a": range(6)}).to_iterable_dataset(num_shards=3) + >>> for idx, example in enumerate(ds): + ... print(example) + ... if idx == 2: + ... state_dict = ds.state_dict() + ... print("checkpoint") + ... break + >>> ds.load_state_dict(state_dict) + >>> print(f"restart from checkpoint") + >>> for example in ds: + ... print(example) + ``` + + which returns: + ``` + {'a': 0} + {'a': 1} + {'a': 2} + checkpoint + restart from checkpoint + {'a': 3} + {'a': 4} + {'a': 5} + ``` + + ```py + >>> from torchdata.stateful_dataloader import StatefulDataLoader + >>> ds = load_dataset("deepmind/code_contests", streaming=True, split="train") + >>> dataloader = StatefulDataLoader(ds, batch_size=32, num_workers=4) + >>> # checkpoint + >>> state_dict = dataloader.state_dict() # uses ds.state_dict() under the hood + >>> # resume from checkpoint + >>> dataloader.load_state_dict(state_dict) # uses ds.load_state_dict() under the hood + ``` + """ + self._ex_iterable.load_state_dict(state_dict) + self._starting_state_dict = state_dict + + def __repr__(self): + return f"IterableDataset({{\n features: {list(self._info.features.keys()) if self._info.features is not None else 'Unknown'},\n n_shards: {self.n_shards}\n}})" + + def __getstate__(self): + return self.__dict__ + + def __setstate__(self, d): + self.__dict__ = d + # Re-add torch iterable dataset as a parent class, since dynamically added parent classes are not kept when pickling + _maybe_add_torch_iterable_dataset_parent_class(self.__class__) + + def _head(self, n=5): + return _examples_to_batch(list(self.take(n))) + + def _effective_generator(self): + if self._shuffling and self._epoch == 0: + return self._shuffling.generator + elif self._shuffling: + # Create effective seed using self._epoch (we subtract in order to avoir overflow in long_scalars) + effective_seed = deepcopy(self._shuffling.generator).integers(0, 1 << 63) - self._epoch + effective_seed = (1 << 63) + effective_seed if effective_seed < 0 else effective_seed + return np.random.default_rng(effective_seed) + else: + raise ValueError("This dataset is not shuffled") + + @property + def n_shards(self) -> int: + if self._distributed and self._ex_iterable.n_shards % self._distributed.world_size == 0: + return self._ex_iterable.n_shards // self._distributed.world_size + return self._ex_iterable.n_shards + + def _iter_pytorch(self): + ex_iterable = self._prepare_ex_iterable_for_iteration() + # Fix for fsspec when using multiprocess to avoid hanging in the ML training loop. (only required for fsspec >= 0.9.0) + # See https://github.com/fsspec/gcsfs/issues/379 + fsspec.asyn.reset_lock() + # check if there aren't too many workers + import torch.utils.data + + worker_info = torch.utils.data.get_worker_info() + if self._is_main_process() and ex_iterable.n_shards < worker_info.num_workers: + logger.warning( + f"Too many dataloader workers: {worker_info.num_workers} (max is dataset.n_shards={ex_iterable.n_shards}). " + f"Stopping {worker_info.num_workers - ex_iterable.n_shards} dataloader workers." + ) + logger.info( + f"To parallelize data loading, we give each process some shards (or data sources) to process. " + f"Therefore it's unnecessary to have a number of workers greater than dataset.n_shards={ex_iterable.n_shards}. " + f"To enable more parallelism, please split the dataset in more files than {ex_iterable.n_shards}." + ) + # split workload + _log_prefix = f"node#{self._distributed.rank} " if self._distributed else "" + shards_indices = ex_iterable.split_shard_indices_by_worker(worker_info.id, worker_info.num_workers) + if shards_indices: + logger.debug( + f"{_log_prefix}dataloader worker#{worker_info.id}, ': Starting to iterate over {len(shards_indices)}/{ex_iterable.n_shards} shards." + ) + ex_iterable = ex_iterable.shard_data_sources(worker_id=worker_info.id, num_workers=worker_info.num_workers) + self._state_dict = ex_iterable._init_state_dict() + if self._starting_state_dict: + ex_iterable.load_state_dict(self._starting_state_dict) + + if self._formatting: + formatter = get_formatter(self._formatting.format_type, features=self.features) + format_dict = ( + formatter.recursive_tensorize if isinstance(formatter, TensorFormatter) else cast_to_python_objects + ) + else: + format_dict = None + + if self._formatting and (ex_iterable.iter_arrow or self._formatting == "arrow"): + if ex_iterable.iter_arrow: + iterator = _batch_arrow_tables(ex_iterable.iter_arrow(), batch_size=1) + else: + iterator = _convert_to_arrow(ex_iterable, batch_size=1) + for key, pa_table in iterator: + yield formatter.format_row(pa_table) + return + else: + for key, example in ex_iterable: + if self.features: + # `IterableDataset` automatically fills missing columns with None. + # This is done with `_apply_feature_types_on_example`. + example = _apply_feature_types_on_example( + example, self.features, token_per_repo_id=self._token_per_repo_id + ) + yield format_dict(example) if format_dict else example + logger.debug( + f"{_log_prefix}dataloader worker#{worker_info.id}, ': Finished iterating over {len(shards_indices)}/{ex_iterable.n_shards} shards." + ) + else: + logger.debug( + f"{_log_prefix}dataloader worker#{worker_info.id}, ': Stopping... Number of dataset shards < num_workers ({ex_iterable.n_shards}<{worker_info.num_workers})." + ) + + def _is_main_process(self): + if self._distributed and self._distributed.rank > 0: + return False + if "torch" in sys.modules: + import torch.utils.data + + worker_info = torch.utils.data.get_worker_info() + if worker_info is not None and worker_info.id > 0: + return False + return True + + def _prepare_ex_iterable_for_iteration(self) -> _BaseExamplesIterable: + if self._shuffling: + ex_iterable = self._ex_iterable.shuffle_data_sources(self._effective_generator()) + else: + ex_iterable = self._ex_iterable + + if self._distributed: + rank = self._distributed.rank + world_size = self._distributed.world_size + if ex_iterable.n_shards % world_size == 0: + if self._is_main_process(): + n_shards_per_node = ex_iterable.n_shards // world_size + plural = "s" if n_shards_per_node > 1 else "" + logger.info( + f"Assigning {n_shards_per_node} shard{plural} (or data source{plural}) of the dataset to each node." + ) + ex_iterable = ex_iterable.shard_data_sources(rank, world_size) + else: + if self._is_main_process(): + logger.info( + f"Assigning 1 out of {world_size} examples of the dataset to each node. The others are skipped during the iteration." + ) + logger.info( + f"It is more optimized to distribute the dataset shards (or data sources) across nodes. " + f"You can do that by using a dataset with number of shards that is a factor of world_size={world_size}. " + f"The current dataset has {ex_iterable.n_shards} which is not a factor of {world_size}" + ) + ex_iterable = StepExamplesIterable(ex_iterable, step=world_size, offset=rank) + + return ex_iterable + + def __iter__(self): + if "torch" in sys.modules: + import torch.utils.data + + worker_info = torch.utils.data.get_worker_info() + if isinstance(self, torch.utils.data.IterableDataset) and worker_info is not None: + # We're a torch.utils.data.IterableDataset in a PyTorch worker process + yield from self._iter_pytorch() + return + + ex_iterable = self._prepare_ex_iterable_for_iteration() + self._state_dict = ex_iterable._init_state_dict() + if self._starting_state_dict: + ex_iterable.load_state_dict(self._starting_state_dict) + if self._formatting: + formatter = get_formatter(self._formatting.format_type, features=self.features) + format_dict = ( + formatter.recursive_tensorize if isinstance(formatter, TensorFormatter) else cast_to_python_objects + ) + else: + format_dict = None + + if self._formatting and (ex_iterable.iter_arrow or self._formatting.format_type == "arrow"): + assert self._state_dict is ex_iterable._state_dict + if ex_iterable.iter_arrow: + iterator = _batch_arrow_tables(ex_iterable.iter_arrow(), batch_size=1) + else: + iterator = _convert_to_arrow(ex_iterable, batch_size=1) + for key, pa_table in iterator: + yield formatter.format_row(pa_table) + return + + for key, example in ex_iterable: + if self.features: + # `IterableDataset` automatically fills missing columns with None. + # This is done with `_apply_feature_types_on_example`. + example = _apply_feature_types_on_example( + example, self.features, token_per_repo_id=self._token_per_repo_id + ) + yield format_dict(example) if format_dict else example + + def iter(self, batch_size: int, drop_last_batch: bool = False): + """Iterate through the batches of size `batch_size`. + + Args: + batch_size (:obj:`int`): size of each batch to yield. + drop_last_batch (:obj:`bool`, default `False`): Whether a last batch smaller than the batch_size should be + dropped + """ + + if self._formatting: + formatter = get_formatter(self._formatting.format_type, features=self.features) + format_dict = ( + formatter.recursive_tensorize if isinstance(formatter, TensorFormatter) else cast_to_python_objects + ) + else: + format_dict = None + + ex_iterable = self._prepare_ex_iterable_for_iteration() + if self._formatting and (ex_iterable.iter_arrow or self._formatting == "arrow"): + if ex_iterable.iter_arrow: + iterator = _batch_arrow_tables( + ex_iterable.iter_arrow(), batch_size=batch_size, drop_last_batch=drop_last_batch + ) + else: + iterator = _convert_to_arrow(ex_iterable, batch_size=batch_size, drop_last_batch=drop_last_batch) + for key, pa_table in iterator: + yield formatter.format_batch(pa_table) + return + + iterator = iter(ex_iterable) + for key, example in iterator: + # If batched, first build the batch + examples = [example] + [example for key, example in islice(iterator, batch_size - 1)] + if drop_last_batch and len(examples) < batch_size: # ignore last batch + return + batch = _examples_to_batch(examples) + if self.features: + # `IterableDataset` automatically fills missing columns with None. + # This is done with `_apply_feature_types_on_batch`. + batch = _apply_feature_types_on_batch(batch, self.features, token_per_repo_id=self._token_per_repo_id) + yield format_dict(batch) if format_dict else batch + + @staticmethod + def from_generator( + generator: Callable, + features: Optional[Features] = None, + gen_kwargs: Optional[dict] = None, + ) -> "IterableDataset": + """Create an Iterable Dataset from a generator. + + Args: + generator (`Callable`): + A generator function that `yields` examples. + features (`Features`, *optional*): + Dataset features. + gen_kwargs(`dict`, *optional*): + Keyword arguments to be passed to the `generator` callable. + You can define a sharded iterable dataset by passing the list of shards in `gen_kwargs`. + This can be used to improve shuffling and when iterating over the dataset with multiple workers. + + Returns: + `IterableDataset` + + Example: + + ```py + >>> def gen(): + ... yield {"text": "Good", "label": 0} + ... yield {"text": "Bad", "label": 1} + ... + >>> ds = IterableDataset.from_generator(gen) + ``` + + ```py + >>> def gen(shards): + ... for shard in shards: + ... with open(shard) as f: + ... for line in f: + ... yield {"line": line} + ... + >>> shards = [f"data{i}.txt" for i in range(32)] + >>> ds = IterableDataset.from_generator(gen, gen_kwargs={"shards": shards}) + >>> ds = ds.shuffle(seed=42, buffer_size=10_000) # shuffles the shards order + uses a shuffle buffer + >>> from torch.utils.data import DataLoader + >>> dataloader = DataLoader(ds.with_format("torch"), num_workers=4) # give each worker a subset of 32/4=8 shards + ``` + """ + from .io.generator import GeneratorDatasetInputStream + + return GeneratorDatasetInputStream( + generator=generator, + features=features, + gen_kwargs=gen_kwargs, + streaming=True, + ).read() + + @staticmethod + def from_spark( + df: "pyspark.sql.DataFrame", + split: Optional[NamedSplit] = None, + features: Optional[Features] = None, + **kwargs, + ) -> "IterableDataset": + """Create an IterableDataset from Spark DataFrame. The dataset is streamed to the driver in batches. + + Args: + df (`pyspark.sql.DataFrame`): + The DataFrame containing the desired data. + split (`NamedSplit`, *optional*): + Split name to be assigned to the dataset. + features (`Features`, *optional*): + Dataset features. + + Returns: + [`IterableDataset`] + + Example: + + ```py + >>> df = spark.createDataFrame( + >>> data=[[1, "Elia"], [2, "Teo"], [3, "Fang"]], + >>> columns=["id", "name"], + >>> ) + >>> ds = IterableDataset.from_spark(df) + ``` + """ + from .io.spark import SparkDatasetReader + + if sys.platform == "win32": + raise EnvironmentError("IterableDataset.from_spark is not currently supported on Windows") + + return SparkDatasetReader( + df, + split=split, + features=features, + streaming=True, + **kwargs, + ).read() + + @staticmethod + def from_file(filename: str) -> "IterableDataset": + """Instantiate a IterableDataset from Arrow table at filename. + + Args: + filename (`str`): + File name of the dataset. + + Returns: + [`IterableDataset`] + """ + pa_table_schema = read_schema_from_file(filename) + inferred_features = Features.from_arrow_schema(pa_table_schema) + ex_iterable = ArrowExamplesIterable(Dataset._generate_tables_from_cache_file, kwargs={"filename": filename}) + return IterableDataset(ex_iterable=ex_iterable, info=DatasetInfo(features=inferred_features)) + + def with_format( + self, + type: Optional[str] = None, + ) -> "IterableDataset": + """ + Return a dataset with the specified format. + Supported formats: "arrow", or None for regular python objects. + The other formats are currently not implemented. + + Args: + + type (`str`, optional, default None): if set to "torch", the returned dataset + will be a subclass of torch.utils.data.IterableDataset to be used in a DataLoader + """ + type = get_format_type_from_alias(type) + # TODO(QL): add format_kwargs + # TODO(QL): add format_columns and return_all_columns + # TODO(QL): add pandas format + return IterableDataset( + ex_iterable=self._ex_iterable, + info=self._info.copy(), + split=self._split, + formatting=FormattingConfig(format_type=type), + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def map( + self, + function: Optional[Callable] = None, + with_indices: bool = False, + input_columns: Optional[Union[str, List[str]]] = None, + batched: bool = False, + batch_size: Optional[int] = 1000, + drop_last_batch: bool = False, + remove_columns: Optional[Union[str, List[str]]] = None, + features: Optional[Features] = None, + fn_kwargs: Optional[dict] = None, + ) -> "IterableDataset": + """ + Apply a function to all the examples in the iterable dataset (individually or in batches) and update them. + If your function returns a column that already exists, then it overwrites it. + The function is applied on-the-fly on the examples when iterating over the dataset. + + You can specify whether the function should be batched or not with the `batched` parameter: + + - If batched is `False`, then the function takes 1 example in and should return 1 example. + An example is a dictionary, e.g. `{"text": "Hello there !"}`. + - If batched is `True` and `batch_size` is 1, then the function takes a batch of 1 example as input and can return a batch with 1 or more examples. + A batch is a dictionary, e.g. a batch of 1 example is {"text": ["Hello there !"]}. + - If batched is `True` and `batch_size` is `n` > 1, then the function takes a batch of `n` examples as input and can return a batch with `n` examples, or with an arbitrary number of examples. + Note that the last batch may have less than `n` examples. + A batch is a dictionary, e.g. a batch of `n` examples is `{"text": ["Hello there !"] * n}`. + + Args: + function (`Callable`, *optional*, defaults to `None`): + Function applied on-the-fly on the examples when you iterate on the dataset. + It must have one of the following signatures: + + - `function(example: Dict[str, Any]) -> Dict[str, Any]` if `batched=False` and `with_indices=False` + - `function(example: Dict[str, Any], idx: int) -> Dict[str, Any]` if `batched=False` and `with_indices=True` + - `function(batch: Dict[str, List]) -> Dict[str, List]` if `batched=True` and `with_indices=False` + - `function(batch: Dict[str, List], indices: List[int]) -> Dict[str, List]` if `batched=True` and `with_indices=True` + + For advanced usage, the function can also return a `pyarrow.Table`. + Moreover if your function returns nothing (`None`), then `map` will run your function and return the dataset unchanged. + If no function is provided, default to identity function: `lambda x: x`. + with_indices (`bool`, defaults to `False`): + Provide example indices to `function`. Note that in this case the signature of `function` should be `def function(example, idx[, rank]): ...`. + input_columns (`Optional[Union[str, List[str]]]`, defaults to `None`): + The columns to be passed into `function` + as positional arguments. If `None`, a dict mapping to all formatted columns is passed as one argument. + batched (`bool`, defaults to `False`): + Provide batch of examples to `function`. + batch_size (`int`, *optional*, defaults to `1000`): + Number of examples per batch provided to `function` if `batched=True`. + `batch_size <= 0` or `batch_size == None` then provide the full dataset as a single batch to `function`. + drop_last_batch (`bool`, defaults to `False`): + Whether a last batch smaller than the batch_size should be + dropped instead of being processed by the function. + remove_columns (`[List[str]]`, *optional*, defaults to `None`): + Remove a selection of columns while doing the mapping. + Columns will be removed before updating the examples with the output of `function`, i.e. if `function` is adding + columns with names in `remove_columns`, these columns will be kept. + features (`[Features]`, *optional*, defaults to `None`): + Feature types of the resulting dataset. + fn_kwargs (`Dict`, *optional*, default `None`): + Keyword arguments to be passed to `function`. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> def add_prefix(example): + ... example["text"] = "Review: " + example["text"] + ... return example + >>> ds = ds.map(add_prefix) + >>> list(ds.take(3)) + [{'label': 1, + 'text': 'Review: the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'}, + {'label': 1, + 'text': 'Review: the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson\'s expanded vision of j . r . r . tolkien\'s middle-earth .'}, + {'label': 1, 'text': 'Review: effective but too-tepid biopic'}] + ``` + """ + if isinstance(input_columns, str): + input_columns = [input_columns] + if isinstance(remove_columns, str): + remove_columns = [remove_columns] + if function is None: + function = identity_func + if fn_kwargs is None: + fn_kwargs = {} + ex_iterable = MappedExamplesIterable( + TypedExamplesIterable(self._ex_iterable, self._info.features, token_per_repo_id=self._token_per_repo_id) + if self._info.features is not None + else self._ex_iterable, + function=function, + with_indices=with_indices, + input_columns=input_columns, + batched=batched, + batch_size=batch_size, + drop_last_batch=drop_last_batch, + remove_columns=remove_columns, + fn_kwargs=fn_kwargs, + formatting=self._formatting, + ) + info = self.info.copy() + info.features = features + return IterableDataset( + ex_iterable=ex_iterable, + info=info, + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def filter( + self, + function: Optional[Callable] = None, + with_indices=False, + input_columns: Optional[Union[str, List[str]]] = None, + batched: bool = False, + batch_size: Optional[int] = 1000, + fn_kwargs: Optional[dict] = None, + ) -> "IterableDataset": + """Apply a filter function to all the elements so that the dataset only includes examples according to the filter function. + The filtering is done on-the-fly when iterating over the dataset. + + Args: + function (`Callable`): + Callable with one of the following signatures: + + - `function(example: Dict[str, Any]) -> bool` if `with_indices=False, batched=False` + - `function(example: Dict[str, Any], indices: int) -> bool` if `with_indices=True, batched=False` + - `function(example: Dict[str, List]) -> List[bool]` if `with_indices=False, batched=True` + - `function(example: Dict[str, List], indices: List[int]) -> List[bool]` if `with_indices=True, batched=True` + + If no function is provided, defaults to an always True function: `lambda x: True`. + with_indices (`bool`, defaults to `False`): + Provide example indices to `function`. Note that in this case the signature of `function` should be `def function(example, idx): ...`. + input_columns (`str` or `List[str]`, *optional*): + The columns to be passed into `function` as + positional arguments. If `None`, a dict mapping to all formatted columns is passed as one argument. + batched (`bool`, defaults to `False`): + Provide batch of examples to `function`. + batch_size (`int`, *optional*, default `1000`): + Number of examples per batch provided to `function` if `batched=True`. + fn_kwargs (`Dict`, *optional*, default `None`): + Keyword arguments to be passed to `function`. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> ds = ds.filter(lambda x: x["label"] == 0) + >>> list(ds.take(3)) + [{'label': 0, 'movie_review': 'simplistic , silly and tedious .'}, + {'label': 0, + 'movie_review': "it's so laddish and juvenile , only teenage boys could possibly find it funny ."}, + {'label': 0, + 'movie_review': 'exploitative and largely devoid of the depth or sophistication that would make watching such a graphic treatment of the crimes bearable .'}] + ``` + """ + if isinstance(input_columns, str): + input_columns = [input_columns] + + # TODO(QL): keep the features (right now if we keep it it would call decode_example again on an already decoded example) + info = copy.deepcopy(self._info) + info.features = None + + # We need the examples to be decoded for certain feature types like Image or Audio, so we use TypedExamplesIterable here + ex_iterable = FilteredExamplesIterable( + TypedExamplesIterable(self._ex_iterable, self._info.features, token_per_repo_id=self._token_per_repo_id) + if self._info.features is not None + else self._ex_iterable, + function=function, + with_indices=with_indices, + input_columns=input_columns, + batched=batched, + batch_size=batch_size, + fn_kwargs=fn_kwargs, + formatting=self._formatting, + ) + return IterableDataset( + ex_iterable=ex_iterable, + info=info, + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def shuffle( + self, seed=None, generator: Optional[np.random.Generator] = None, buffer_size: int = 1000 + ) -> "IterableDataset": + """ + Randomly shuffles the elements of this dataset. + + This dataset fills a buffer with `buffer_size` elements, then randomly samples elements from this buffer, + replacing the selected elements with new elements. For perfect shuffling, a buffer size greater than or + equal to the full size of the dataset is required. + + For instance, if your dataset contains 10,000 elements but `buffer_size` is set to 1000, then `shuffle` will + initially select a random element from only the first 1000 elements in the buffer. Once an element is + selected, its space in the buffer is replaced by the next (i.e. 1,001-st) element, + maintaining the 1000 element buffer. + + If the dataset is made of several shards, it also does shuffle the order of the shards. + However if the order has been fixed by using [`~datasets.IterableDataset.skip`] or [`~datasets.IterableDataset.take`] + then the order of the shards is kept unchanged. + + Args: + seed (`int`, *optional*, defaults to `None`): + Random seed that will be used to shuffle the dataset. + It is used to sample from the shuffle buffer and also to shuffle the data shards. + generator (`numpy.random.Generator`, *optional*): + Numpy random Generator to use to compute the permutation of the dataset rows. + If `generator=None` (default), uses `np.random.default_rng` (the default BitGenerator (PCG64) of NumPy). + buffer_size (`int`, defaults to `1000`): + Size of the buffer. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> list(ds.take(3)) + [{'label': 1, + 'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'}, + {'label': 1, + 'text': 'the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson\'s expanded vision of j . r . r . tolkien\'s middle-earth .'}, + {'label': 1, 'text': 'effective but too-tepid biopic'}] + >>> shuffled_ds = ds.shuffle(seed=42) + >>> list(shuffled_ds.take(3)) + [{'label': 1, + 'text': "a sports movie with action that's exciting on the field and a story you care about off it ."}, + {'label': 1, + 'text': 'at its best , the good girl is a refreshingly adult take on adultery . . .'}, + {'label': 1, + 'text': "sam jones became a very lucky filmmaker the day wilco got dropped from their record label , proving that one man's ruin may be another's fortune ."}] + ``` + """ + if generator is None: + generator = np.random.default_rng(seed) + else: + generator = deepcopy(generator) + shuffling = ShufflingConfig(generator=generator, _original_seed=seed) + return IterableDataset( + ex_iterable=BufferShuffledExamplesIterable( + self._ex_iterable, buffer_size=buffer_size, generator=generator + ).shuffle_data_sources(generator), + info=self._info.copy(), + split=self._split, + formatting=self._formatting, + shuffling=shuffling, + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def set_epoch(self, epoch: int): + self._epoch = epoch + + def skip(self, n: int) -> "IterableDataset": + """ + Create a new [`IterableDataset`] that skips the first `n` elements. + + Args: + n (`int`): + Number of elements to skip. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> list(ds.take(3)) + [{'label': 1, + 'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'}, + {'label': 1, + 'text': 'the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson\'s expanded vision of j . r . r . tolkien\'s middle-earth .'}, + {'label': 1, 'text': 'effective but too-tepid biopic'}] + >>> ds = ds.skip(1) + >>> list(ds.take(3)) + [{'label': 1, + 'text': 'the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson\'s expanded vision of j . r . r . tolkien\'s middle-earth .'}, + {'label': 1, 'text': 'effective but too-tepid biopic'}, + {'label': 1, + 'text': 'if you sometimes like to go to the movies to have fun , wasabi is a good place to start .'}] + ``` + """ + ex_iterable = SkipExamplesIterable(self._ex_iterable, n) + return IterableDataset( + ex_iterable=ex_iterable, + info=self._info.copy(), + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def take(self, n: int) -> "IterableDataset": + """ + Create a new [`IterableDataset`] with only the first `n` elements. + + Args: + n (`int`): + Number of elements to take. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> small_ds = ds.take(2) + >>> list(small_ds) + [{'label': 1, + 'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'}, + {'label': 1, + 'text': 'the gorgeously elaborate continuation of " the lord of the rings " trilogy is so huge that a column of words cannot adequately describe co-writer/director peter jackson\'s expanded vision of j . r . r . tolkien\'s middle-earth .'}] + ``` + """ + ex_iterable = TakeExamplesIterable(self._ex_iterable, n) + return IterableDataset( + ex_iterable=ex_iterable, + info=self._info.copy(), + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + @property + def column_names(self) -> Optional[List[str]]: + """Names of the columns in the dataset. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="validation", streaming=True) + >>> ds.column_names + ['text', 'label'] + ``` + """ + return list(self._info.features.keys()) if self._info.features is not None else None + + def add_column(self, name: str, column: Union[list, np.array]) -> "IterableDataset": + """Add column to Dataset. + + Args: + name (str): Column name. + column (list or np.array): Column data to be added. + + Returns: + `IterableDataset` + """ + return self.map(partial(add_column_fn, name=name, column=column), with_indices=True) + + def rename_column(self, original_column_name: str, new_column_name: str) -> "IterableDataset": + """ + Rename a column in the dataset, and move the features associated to the original column under the new column + name. + + Args: + original_column_name (`str`): + Name of the column to rename. + new_column_name (`str`): + New name for the column. + + Returns: + `IterableDataset`: A copy of the dataset with a renamed column. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> next(iter(ds)) + {'label': 1, + 'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'} + >>> ds = ds.rename_column("text", "movie_review") + >>> next(iter(ds)) + {'label': 1, + 'movie_review': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'} + ``` + """ + return self.rename_columns({original_column_name: new_column_name}) + + def rename_columns(self, column_mapping: Dict[str, str]) -> "IterableDataset": + """ + Rename several columns in the dataset, and move the features associated to the original columns under + the new column names. + + Args: + column_mapping (`Dict[str, str]`): A mapping of columns to rename to their new names + + Returns: + `IterableDataset`: A copy of the dataset with renamed columns + """ + + original_features = self._info.features.copy() if self._info.features else None + ds_iterable = self.map( + partial(_rename_columns_fn, column_mapping=column_mapping), remove_columns=list(column_mapping) + ) + if original_features is not None: + ds_iterable._info.features = Features( + { + column_mapping[col] if col in column_mapping.keys() else col: feature + for col, feature in original_features.items() + } + ) + # check that it's still valid, especially with regard to task templates + try: + ds_iterable._info.copy() + except ValueError: + ds_iterable._info.task_templates = None + return ds_iterable + + def remove_columns(self, column_names: Union[str, List[str]]) -> "IterableDataset": + """ + Remove one or several column(s) in the dataset and the features associated to them. + The removal is done on-the-fly on the examples when iterating over the dataset. + + + Args: + column_names (`Union[str, List[str]]`): + Name of the column(s) to remove. + + Returns: + `IterableDataset`: A copy of the dataset object without the columns to remove. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> next(iter(ds)) + {'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .', 'label': 1} + >>> ds = ds.remove_columns("label") + >>> next(iter(ds)) + {'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'} + ``` + """ + original_features = self._info.features.copy() if self._info.features else None + ds_iterable = self.map(remove_columns=column_names) + if original_features is not None: + ds_iterable._info.features = original_features.copy() + for col, _ in original_features.items(): + if col in column_names: + del ds_iterable._info.features[col] + # check that it's still valid, especially with regard to task templates + try: + ds_iterable._info.copy() + except ValueError: + ds_iterable._info.task_templates = None + + return ds_iterable + + def select_columns(self, column_names: Union[str, List[str]]) -> "IterableDataset": + """Select one or several column(s) in the dataset and the features + associated to them. The selection is done on-the-fly on the examples + when iterating over the dataset. + + + Args: + column_names (`Union[str, List[str]]`): + Name of the column(s) to select. + + Returns: + `IterableDataset`: A copy of the dataset object with selected columns. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> next(iter(ds)) + {'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .', 'label': 1} + >>> ds = ds.select_columns("text") + >>> next(iter(ds)) + {'text': 'the rock is destined to be the 21st century\'s new " conan " and that he\'s going to make a splash even greater than arnold schwarzenegger , jean-claud van damme or steven segal .'} + ``` + """ + if isinstance(column_names, str): + column_names = [column_names] + + if self._info: + info = copy.deepcopy(self._info) + if self._info.features is not None: + missing_columns = set(column_names) - set(self._info.features.keys()) + if missing_columns: + raise ValueError( + f"Column name {list(missing_columns)} not in the " + "dataset. Columns in the dataset: " + f"{list(self._info.features.keys())}." + ) + info.features = Features({c: info.features[c] for c in column_names}) + # check that it's still valid, especially with regard to task templates + try: + info.copy() + except ValueError: + info.task_templates = None + + ex_iterable = SelectColumnsIterable(self._ex_iterable, column_names) + return IterableDataset( + ex_iterable=ex_iterable, + info=info, + split=self._split, + formatting=self._formatting, + shuffling=self._shuffling, + distributed=self._distributed, + token_per_repo_id=self._token_per_repo_id, + ) + + def cast_column(self, column: str, feature: FeatureType) -> "IterableDataset": + """Cast column to feature for decoding. + + Args: + column (`str`): + Column name. + feature (`Feature`): + Target feature. + + Returns: + `IterableDataset` + + Example: + + ```py + >>> from datasets import load_dataset, Audio + >>> ds = load_dataset("PolyAI/minds14", name="en-US", split="train", streaming=True) + >>> ds.features + {'audio': Audio(sampling_rate=8000, mono=True, decode=True, id=None), + 'english_transcription': Value(dtype='string', id=None), + 'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan', 'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill'], id=None), + 'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR', 'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN'], id=None), + 'path': Value(dtype='string', id=None), + 'transcription': Value(dtype='string', id=None)} + >>> ds = ds.cast_column("audio", Audio(sampling_rate=16000)) + >>> ds.features + {'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), + 'english_transcription': Value(dtype='string', id=None), + 'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan', 'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill'], id=None), + 'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR', 'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN'], id=None), + 'path': Value(dtype='string', id=None), + 'transcription': Value(dtype='string', id=None)} + ``` + """ + info = self._info.copy() + info.features[column] = feature + # check that it's still valid, especially with regard to task templates + try: + info.copy() + except ValueError: + info.task_templates = None + return IterableDataset( + ex_iterable=self._ex_iterable, + info=info, + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def cast( + self, + features: Features, + ) -> "IterableDataset": + """ + Cast the dataset to a new set of features. + + Args: + features ([`Features`]): + New features to cast the dataset to. + The name of the fields in the features must match the current column names. + The type of the data must also be convertible from one type to the other. + For non-trivial conversion, e.g. `string` <-> `ClassLabel` you should use [`~Dataset.map`] to update the Dataset. + + Returns: + `IterableDataset`: A copy of the dataset with casted features. + + Example: + + ```py + >>> from datasets import load_dataset + >>> ds = load_dataset("rotten_tomatoes", split="train", streaming=True) + >>> ds.features + {'label': ClassLabel(num_classes=2, names=['neg', 'pos'], id=None), + 'text': Value(dtype='string', id=None)} + >>> new_features = ds.features.copy() + >>> new_features["label"] = ClassLabel(names=["bad", "good"]) + >>> new_features["text"] = Value("large_string") + >>> ds = ds.cast(new_features) + >>> ds.features + {'label': ClassLabel(num_classes=2, names=['bad', 'good'], id=None), + 'text': Value(dtype='large_string', id=None)} + ``` + """ + info = self._info.copy() + info.features = features + # check that it's still valid, especially with regard to task templates + try: + info.copy() + except ValueError: + info.task_templates = None + return IterableDataset( + ex_iterable=self._ex_iterable, + info=info, + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def _step(self, step: int, offset: int) -> "IterableDataset": + ex_iterable = StepExamplesIterable(self._ex_iterable, step=step, offset=offset) + return IterableDataset( + ex_iterable=ex_iterable, + info=self._info.copy(), + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + def _resolve_features(self): + if self.features is not None: + return self + elif isinstance(self._ex_iterable, TypedExamplesIterable): + features = self._ex_iterable.features + else: + features = _infer_features_from_batch(self.with_format(None)._head()) + info = self.info.copy() + info.features = features + return IterableDataset( + ex_iterable=self._ex_iterable, + info=info, + split=self._split, + formatting=self._formatting, + shuffling=copy.deepcopy(self._shuffling), + distributed=copy.deepcopy(self._distributed), + token_per_repo_id=self._token_per_repo_id, + ) + + +def _concatenate_iterable_datasets( + dsets: List[IterableDataset], + info: Optional[DatasetInfo] = None, + split: Optional[NamedSplit] = None, + axis: int = 0, +) -> IterableDataset: + """ + Converts a list of `IterableDataset` with the same schema into a single `IterableDataset`. + Missing data are filled with None values. + + + + Args: + dsets (`List[datasets.IterableDataset]`): List of Datasets to concatenate. + info (`DatasetInfo`, optional): Dataset information, like description, citation, etc. + split (`NamedSplit`, optional): Name of the dataset split. + axis (``{0, 1}``, default ``0``, meaning over rows): + Axis to concatenate over, where ``0`` means over rows (vertically) and ``1`` means over columns + (horizontally). + + *New in version 1.6.0* + + Example: + + ```py + >>> ds3 = _concatenate_iterable_datasets([ds1, ds2]) + ``` + """ + dsets = [d._resolve_features() for d in dsets] + + # Perform checks (and a potentional cast if axis=0) + if axis == 0: + _check_if_features_can_be_aligned([dset.features for dset in dsets]) + else: + _check_column_names([col_name for dset in dsets for col_name in dset.features]) + + # TODO: improve this to account for a mix of ClassLabel and Value for example + # right now it would keep the type of the first dataset in the list + features = Features( + {k: v for features in _align_features([dset.features for dset in dsets]) for k, v in features.items()} + ) + + ex_iterables = [copy.deepcopy(d._ex_iterable) for d in dsets] + if axis == 0: + ex_iterable = VerticallyConcatenatedMultiSourcesExamplesIterable(ex_iterables) + else: + ex_iterable = HorizontallyConcatenatedMultiSourcesExamplesIterable(ex_iterables) + # Set new info - we update the features + # setting the features also ensures to fill missing columns with None + if info is None: + info = DatasetInfo.from_merge([d.info for d in dsets]) + else: + info = info.copy() + info.features = features + # Get all the auth tokens per repository - in case the datasets come from different private repositories + token_per_repo_id = {repo_id: token for dataset in dsets for repo_id, token in dataset._token_per_repo_id.items()} + # Return new daset + return IterableDataset(ex_iterable=ex_iterable, info=info, split=split, token_per_repo_id=token_per_repo_id) + + +def _interleave_iterable_datasets( + datasets: List[IterableDataset], + probabilities: Optional[List[float]] = None, + seed: Optional[int] = None, + info: Optional[DatasetInfo] = None, + split: Optional[NamedSplit] = None, + stopping_strategy: Literal["first_exhausted", "all_exhausted"] = "first_exhausted", +) -> IterableDataset: + """ + Interleave several iterable datasets (sources) into a single iterable dataset. + The new iterable dataset alternates between the sources to yield examples. + If `probabilities = None` (default) the iterable dataset will cycles through the sources in order for each next example in the iteration. + If `probabilities` is not `None, the iterable dataset will sample a random source according to the provided probabilities for each next examples in the iteration. + + + + Args: + datasets (`List[IterableDataset]`): list of datasets to interleave + probabilities (`List[float]`, optional, default None): If specified, the new iterable dataset samples + examples from one source at a time according to these probabilities. + seed (`int`, optional, default None): The random seed used to choose a source for each example. + stopping_strategy (`str`, defaults to `first_exhausted`): + Two strategies are proposed right now. + By default, `first_exhausted` is an undersampling strategy, i.e the dataset construction is stopped as soon as one dataset has ran out of samples. + If the strategy is `all_exhausted`, we use an oversampling strategy, i.e the dataset construction is stopped as soon as every samples of every dataset has been added at least once. + Note that if the strategy is `all_exhausted`, the interleaved dataset size can get enormous: + - with no probabilities, the resulting dataset will have max_length_datasets*nb_dataset samples. + - with given probabilities, the resulting dataset will have more samples if some datasets have really low probability of visiting. + + Output: + `datasets.IterableDataset` + """ + datasets = [d._resolve_features() for d in datasets] + + # Perform checks + _check_if_features_can_be_aligned([dset.features for dset in datasets]) + + # TODO: improve this to account for a mix of ClassLabel and Value for example + # right now it would keep the type of the first dataset in the list + features = Features( + {k: v for features in _align_features([dset.features for dset in datasets]) for k, v in features.items()} + ) + + ex_iterables = [copy.deepcopy(d._ex_iterable) for d in datasets] + + # Use cycling or random cycling of sources + if probabilities is None: + ex_iterable = CyclingMultiSourcesExamplesIterable(ex_iterables, stopping_strategy=stopping_strategy) + else: + generator = np.random.default_rng(seed) + ex_iterable = RandomlyCyclingMultiSourcesExamplesIterable( + ex_iterables, generator=generator, probabilities=probabilities, stopping_strategy=stopping_strategy + ) + # Set new info - we update the features + # setting the features also ensures to fill missing columns with None + if info is None: + info = DatasetInfo.from_merge([d.info for d in datasets]) + else: + info = info.copy() + info.features = features + # Get all the auth tokens per repository - in case the datasets come from different private repositories + token_per_repo_id = { + repo_id: token for dataset in datasets for repo_id, token in dataset._token_per_repo_id.items() + } + # Return new daset + return IterableDataset(ex_iterable=ex_iterable, info=info, split=split, token_per_repo_id=token_per_repo_id) + + +def _split_by_node_iterable_dataset(dataset: IterableDataset, rank: int, world_size: int) -> IterableDataset: + """ + Split an iterable dataset for the node at rank `rank` in a pool of nodes of size `world_size`. + + If the dataset has a number of shards that is a factor of `world_size` (i.e. if `dataset.n_shards % world_size == 0`), + then the shards are evenly assigned across the nodes, which is the most optimized. + Otherwise, each node keeps 1 example out of `world_size`, skipping the other examples. + + Args: + dataset ([`IterableDataset`]): + The iterable dataset to split by node. + rank (`int`): + Rank of the current node. + world_size (`int`): + Total number of nodes. + + Returns: + [`IterableDataset`]: The iterable dataset to be used on the node at rank `rank`. + """ + if dataset._distributed: + world_size = world_size * dataset._distributed.world_size + rank = world_size * dataset._distributed.rank + rank + distributed = DistributedConfig(rank=rank, world_size=world_size) + return IterableDataset( + ex_iterable=dataset._ex_iterable, + info=dataset._info.copy(), + split=dataset._split, + formatting=dataset._formatting, + shuffling=copy.deepcopy(dataset._shuffling), + distributed=distributed, + token_per_repo_id=dataset._token_per_repo_id, + ) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/metric.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/metric.py new file mode 100644 index 0000000000000000000000000000000000000000..187c5e5c925b71b26ca83021523dd55c28989d28 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/metric.py @@ -0,0 +1,652 @@ +# Copyright 2020 The HuggingFace Datasets Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Metrics base class.""" + +import os +import types +import uuid +from typing import Any, Dict, List, Optional, Tuple, Union + +import numpy as np +import pyarrow as pa +from filelock import BaseFileLock, Timeout + +from . import config +from .arrow_dataset import Dataset +from .arrow_reader import ArrowReader +from .arrow_writer import ArrowWriter +from .download.download_config import DownloadConfig +from .download.download_manager import DownloadManager +from .features import Features +from .info import DatasetInfo, MetricInfo +from .naming import camelcase_to_snakecase +from .utils._filelock import FileLock +from .utils.deprecation_utils import deprecated +from .utils.logging import get_logger +from .utils.py_utils import copyfunc, temp_seed + + +logger = get_logger(__name__) + + +class FileFreeLock(BaseFileLock): + """Thread lock until a file **cannot** be locked""" + + def __init__(self, lock_file, *args, **kwargs): + self.filelock = FileLock(lock_file) + super().__init__(self.filelock.lock_file, *args, **kwargs) + + def _acquire(self): + try: + self.filelock.acquire(timeout=0.01, poll_intervall=0.02) # Try to lock once + except Timeout: + # We couldn't acquire the lock, the file is locked! + self._context.lock_file_fd = self.filelock.lock_file + else: + # We were able to acquire the lock, the file is not yet locked! + self.filelock.release() + self._context.lock_file_fd = None + + def _release(self): + self._context.lock_file_fd = None + + +# lists - summarize long lists similarly to NumPy +# arrays/tensors - let the frameworks control formatting +def summarize_if_long_list(obj): + if not type(obj) == list or len(obj) <= 6: # noqa: E721 + return f"{obj}" + + def format_chunk(chunk): + return ", ".join(repr(x) for x in chunk) + + return f"[{format_chunk(obj[:3])}, ..., {format_chunk(obj[-3:])}]" + + +class MetricInfoMixin: + """This base class exposes some attributes of MetricInfo + at the base level of the Metric for easy access. + + + + Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate + + + + """ + + def __init__(self, info: MetricInfo): + self._metric_info = info + + @property + def info(self): + """:class:`datasets.MetricInfo` object containing all the metadata in the metric.""" + return self._metric_info + + @property + def name(self) -> str: + return self._metric_info.metric_name + + @property + def experiment_id(self) -> Optional[str]: + return self._metric_info.experiment_id + + @property + def description(self) -> str: + return self._metric_info.description + + @property + def citation(self) -> str: + return self._metric_info.citation + + @property + def features(self) -> Features: + return self._metric_info.features + + @property + def inputs_description(self) -> str: + return self._metric_info.inputs_description + + @property + def homepage(self) -> Optional[str]: + return self._metric_info.homepage + + @property + def license(self) -> str: + return self._metric_info.license + + @property + def codebase_urls(self) -> Optional[List[str]]: + return self._metric_info.codebase_urls + + @property + def reference_urls(self) -> Optional[List[str]]: + return self._metric_info.reference_urls + + @property + def streamable(self) -> bool: + return self._metric_info.streamable + + @property + def format(self) -> Optional[str]: + return self._metric_info.format + + +class Metric(MetricInfoMixin): + """A Metric is the base class and common API for all metrics. + + + + Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate + + + + Args: + config_name (``str``): This is used to define a hash specific to a metrics computation script and prevents the metric's data + to be overridden when the metric loading script is modified. + keep_in_memory (:obj:`bool`): keep all predictions and references in memory. Not possible in distributed settings. + cache_dir (``str``): Path to a directory in which temporary prediction/references data will be stored. + The data directory should be located on a shared file-system in distributed setups. + num_process (``int``): specify the total number of nodes in a distributed settings. + This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1). + process_id (``int``): specify the id of the current process in a distributed setup (between 0 and num_process-1) + This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1). + seed (:obj:`int`, optional): If specified, this will temporarily set numpy's random seed when :func:`datasets.Metric.compute` is run. + experiment_id (``str``): A specific experiment id. This is used if several distributed evaluations share the same file system. + This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1). + max_concurrent_cache_files (``int``): Max number of concurrent metrics cache files (default 10000). + timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization. + """ + + @deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate") + def __init__( + self, + config_name: Optional[str] = None, + keep_in_memory: bool = False, + cache_dir: Optional[str] = None, + num_process: int = 1, + process_id: int = 0, + seed: Optional[int] = None, + experiment_id: Optional[str] = None, + max_concurrent_cache_files: int = 10000, + timeout: Union[int, float] = 100, + **kwargs, + ): + # prepare info + self.config_name = config_name or "default" + info = self._info() + info.metric_name = camelcase_to_snakecase(self.__class__.__name__) + info.config_name = self.config_name + info.experiment_id = experiment_id or "default_experiment" + MetricInfoMixin.__init__(self, info) # For easy access on low level + + # Safety checks on num_process and process_id + if not isinstance(process_id, int) or process_id < 0: + raise ValueError("'process_id' should be a number greater than 0") + if not isinstance(num_process, int) or num_process <= process_id: + raise ValueError("'num_process' should be a number greater than process_id") + if keep_in_memory and num_process != 1: + raise ValueError("Using 'keep_in_memory' is not possible in distributed setting (num_process > 1).") + + self.num_process = num_process + self.process_id = process_id + self.max_concurrent_cache_files = max_concurrent_cache_files + + self.keep_in_memory = keep_in_memory + self._data_dir_root = os.path.expanduser(cache_dir or config.HF_METRICS_CACHE) + self.data_dir = self._build_data_dir() + if seed is None: + _, seed, pos, *_ = np.random.get_state() + self.seed: int = seed[pos] if pos < 624 else seed[0] + else: + self.seed: int = seed + self.timeout: Union[int, float] = timeout + + # Update 'compute' and 'add' docstring + # methods need to be copied otherwise it changes the docstrings of every instance + self.compute = types.MethodType(copyfunc(self.compute), self) + self.add_batch = types.MethodType(copyfunc(self.add_batch), self) + self.add = types.MethodType(copyfunc(self.add), self) + self.compute.__func__.__doc__ += self.info.inputs_description + self.add_batch.__func__.__doc__ += self.info.inputs_description + self.add.__func__.__doc__ += self.info.inputs_description + + # self.arrow_schema = pa.schema(field for field in self.info.features.type) + self.buf_writer = None + self.writer = None + self.writer_batch_size = None + self.data = None + + # This is the cache file we store our predictions/references in + # Keep it None for now so we can (cloud)pickle the object + self.cache_file_name = None + self.filelock = None + self.rendez_vous_lock = None + + # This is all the cache files on which we have a lock when we are in a distributed setting + self.file_paths = None + self.filelocks = None + + def __len__(self): + """Return the number of examples (predictions or predictions/references pair) + currently stored in the metric's cache. + """ + return 0 if self.writer is None else len(self.writer) + + def __repr__(self): + return ( + f'Metric(name: "{self.name}", features: {self.features}, ' + f'usage: """{self.inputs_description}""", ' + f"stored examples: {len(self)})" + ) + + def _build_data_dir(self): + """Path of this metric in cache_dir: + Will be: + self._data_dir_root/self.name/self.config_name/self.hash (if not none)/ + If any of these element is missing or if ``with_version=False`` the corresponding subfolders are dropped. + """ + builder_data_dir = self._data_dir_root + builder_data_dir = os.path.join(builder_data_dir, self.name, self.config_name) + os.makedirs(builder_data_dir, exist_ok=True) + return builder_data_dir + + def _create_cache_file(self, timeout=1) -> Tuple[str, FileLock]: + """Create a new cache file. If the default cache file is used, we generated a new hash.""" + file_path = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{self.process_id}.arrow") + filelock = None + for i in range(self.max_concurrent_cache_files): + filelock = FileLock(file_path + ".lock") + try: + filelock.acquire(timeout=timeout) + except Timeout: + # If we have reached the max number of attempts or we are not allow to find a free name (distributed setup) + # We raise an error + if self.num_process != 1: + raise ValueError( + f"Error in _create_cache_file: another metric instance is already using the local cache file at {file_path}. " + f"Please specify an experiment_id (currently: {self.experiment_id}) to avoid collision " + f"between distributed metric instances." + ) from None + if i == self.max_concurrent_cache_files - 1: + raise ValueError( + f"Cannot acquire lock, too many metric instance are operating concurrently on this file system." + f"You should set a larger value of max_concurrent_cache_files when creating the metric " + f"(current value is {self.max_concurrent_cache_files})." + ) from None + # In other cases (allow to find new file name + not yet at max num of attempts) we can try to sample a new hashing name. + file_uuid = str(uuid.uuid4()) + file_path = os.path.join( + self.data_dir, f"{self.experiment_id}-{file_uuid}-{self.num_process}-{self.process_id}.arrow" + ) + else: + break + + return file_path, filelock + + def _get_all_cache_files(self) -> Tuple[List[str], List[FileLock]]: + """Get a lock on all the cache files in a distributed setup. + We wait for timeout second to let all the distributed node finish their tasks (default is 100 seconds). + """ + if self.num_process == 1: + if self.cache_file_name is None: + raise ValueError( + "Metric cache file doesn't exist. Please make sure that you call `add` or `add_batch` " + "at least once before calling `compute`." + ) + file_paths = [self.cache_file_name] + else: + file_paths = [ + os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{process_id}.arrow") + for process_id in range(self.num_process) + ] + + # Let's acquire a lock on each process files to be sure they are finished writing + filelocks = [] + for process_id, file_path in enumerate(file_paths): + if process_id == 0: # process 0 already has its lock file + filelocks.append(self.filelock) + else: + filelock = FileLock(file_path + ".lock") + try: + filelock.acquire(timeout=self.timeout) + except Timeout: + raise ValueError( + f"Cannot acquire lock on cached file {file_path} for process {process_id}." + ) from None + else: + filelocks.append(filelock) + + return file_paths, filelocks + + def _check_all_processes_locks(self): + expected_lock_file_names = [ + os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{process_id}.arrow.lock") + for process_id in range(self.num_process) + ] + for expected_lock_file_name in expected_lock_file_names: + nofilelock = FileFreeLock(expected_lock_file_name) + try: + nofilelock.acquire(timeout=self.timeout) + except Timeout: + raise ValueError( + f"Expected to find locked file {expected_lock_file_name} from process {self.process_id} but it doesn't exist." + ) from None + else: + nofilelock.release() + + def _check_rendez_vous(self): + expected_lock_file_name = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-0.arrow.lock") + nofilelock = FileFreeLock(expected_lock_file_name) + try: + nofilelock.acquire(timeout=self.timeout) + except Timeout: + raise ValueError( + f"Expected to find locked file {expected_lock_file_name} from process {self.process_id} but it doesn't exist." + ) from None + else: + nofilelock.release() + lock_file_name = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-rdv.lock") + rendez_vous_lock = FileLock(lock_file_name) + try: + rendez_vous_lock.acquire(timeout=self.timeout) + except Timeout: + raise ValueError(f"Couldn't acquire lock on {lock_file_name} from process {self.process_id}.") from None + else: + rendez_vous_lock.release() + + def _finalize(self): + """Close all the writing process and load/gather the data + from all the nodes if main node or all_process is True. + """ + if self.writer is not None: + self.writer.finalize() + self.writer = None + # release the locks of the processes > 0 so that process 0 can lock them to read + delete the data + if self.filelock is not None and self.process_id > 0: + self.filelock.release() + + if self.keep_in_memory: + # Read the predictions and references + reader = ArrowReader(path=self.data_dir, info=DatasetInfo(features=self.features)) + self.data = Dataset.from_buffer(self.buf_writer.getvalue()) + + elif self.process_id == 0: + # Let's acquire a lock on each node files to be sure they are finished writing + file_paths, filelocks = self._get_all_cache_files() + + # Read the predictions and references + try: + reader = ArrowReader(path="", info=DatasetInfo(features=self.features)) + self.data = Dataset(**reader.read_files([{"filename": f} for f in file_paths])) + except FileNotFoundError: + raise ValueError( + "Error in finalize: another metric instance is already using the local cache file. " + "Please specify an experiment_id to avoid collision between distributed metric instances." + ) from None + + # Store file paths and locks and we will release/delete them after the computation. + self.file_paths = file_paths + self.filelocks = filelocks + + def compute(self, *, predictions=None, references=None, **kwargs) -> Optional[dict]: + """Compute the metrics. + + Usage of positional arguments is not allowed to prevent mistakes. + + Args: + predictions (list/array/tensor, optional): Predictions. + references (list/array/tensor, optional): References. + **kwargs (optional): Keyword arguments that will be forwarded to the metrics :meth:`_compute` + method (see details in the docstring). + + Return: + dict or None + + - Dictionary with the metrics if this metric is run on the main process (``process_id == 0``). + - None if the metric is not run on the main process (``process_id != 0``). + + Example: + + ```py + >>> from datasets import load_metric + >>> metric = load_metric("accuracy") + >>> accuracy = metric.compute(predictions=model_prediction, references=labels) + ``` + """ + all_kwargs = {"predictions": predictions, "references": references, **kwargs} + if predictions is None and references is None: + missing_kwargs = {k: None for k in self.features if k not in all_kwargs} + all_kwargs.update(missing_kwargs) + else: + missing_inputs = [k for k in self.features if k not in all_kwargs] + if missing_inputs: + raise ValueError( + f"Metric inputs are missing: {missing_inputs}. All required inputs are {list(self.features)}" + ) + inputs = {input_name: all_kwargs[input_name] for input_name in self.features} + compute_kwargs = {k: kwargs[k] for k in kwargs if k not in self.features} + + if any(v is not None for v in inputs.values()): + self.add_batch(**inputs) + self._finalize() + + self.cache_file_name = None + self.filelock = None + + if self.process_id == 0: + self.data.set_format(type=self.info.format) + + inputs = {input_name: self.data[input_name] for input_name in self.features} + with temp_seed(self.seed): + output = self._compute(**inputs, **compute_kwargs) + + if self.buf_writer is not None: + self.buf_writer = None + del self.data + self.data = None + else: + # Release locks and delete all the cache files. Process 0 is released last. + for filelock, file_path in reversed(list(zip(self.filelocks, self.file_paths))): + logger.info(f"Removing {file_path}") + del self.data + self.data = None + del self.writer + self.writer = None + os.remove(file_path) + filelock.release() + + return output + else: + return None + + def add_batch(self, *, predictions=None, references=None, **kwargs): + """Add a batch of predictions and references for the metric's stack. + + Args: + predictions (list/array/tensor, optional): Predictions. + references (list/array/tensor, optional): References. + + Example: + + ```py + >>> from datasets import load_metric + >>> metric = load_metric("accuracy") + >>> metric.add_batch(predictions=model_prediction, references=labels) + ``` + """ + bad_inputs = [input_name for input_name in kwargs if input_name not in self.features] + if bad_inputs: + raise ValueError(f"Bad inputs for metric: {bad_inputs}. All required inputs are {list(self.features)}") + batch = {"predictions": predictions, "references": references, **kwargs} + batch = {intput_name: batch[intput_name] for intput_name in self.features} + batch = self.info.features.encode_batch(batch) + if self.writer is None: + self._init_writer() + try: + self.writer.write_batch(batch) + except pa.ArrowInvalid: + if any(len(batch[c]) != len(next(iter(batch.values()))) for c in batch): + col0 = next(iter(batch)) + bad_col = [c for c in batch if len(batch[c]) != len(batch[col0])][0] + error_msg = ( + f"Mismatch in the number of {col0} ({len(batch[col0])}) and {bad_col} ({len(batch[bad_col])})" + ) + elif sorted(self.features) != ["references", "predictions"]: + error_msg = f"Metric inputs don't match the expected format.\n" f"Expected format: {self.features},\n" + error_msg_inputs = ",\n".join( + f"Input {input_name}: {summarize_if_long_list(batch[input_name])}" for input_name in self.features + ) + error_msg += error_msg_inputs + else: + error_msg = ( + f"Predictions and/or references don't match the expected format.\n" + f"Expected format: {self.features},\n" + f"Input predictions: {summarize_if_long_list(predictions)},\n" + f"Input references: {summarize_if_long_list(references)}" + ) + raise ValueError(error_msg) from None + + def add(self, *, prediction=None, reference=None, **kwargs): + """Add one prediction and reference for the metric's stack. + + Args: + prediction (list/array/tensor, optional): Predictions. + reference (list/array/tensor, optional): References. + + Example: + + ```py + >>> from datasets import load_metric + >>> metric = load_metric("accuracy") + >>> metric.add(predictions=model_predictions, references=labels) + ``` + """ + bad_inputs = [input_name for input_name in kwargs if input_name not in self.features] + if bad_inputs: + raise ValueError(f"Bad inputs for metric: {bad_inputs}. All required inputs are {list(self.features)}") + example = {"predictions": prediction, "references": reference, **kwargs} + example = {intput_name: example[intput_name] for intput_name in self.features} + example = self.info.features.encode_example(example) + if self.writer is None: + self._init_writer() + try: + self.writer.write(example) + except pa.ArrowInvalid: + error_msg = f"Metric inputs don't match the expected format.\n" f"Expected format: {self.features},\n" + error_msg_inputs = ",\n".join( + f"Input {input_name}: {summarize_if_long_list(example[input_name])}" for input_name in self.features + ) + error_msg += error_msg_inputs + raise ValueError(error_msg) from None + + def _init_writer(self, timeout=1): + if self.num_process > 1: + if self.process_id == 0: + file_path = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-rdv.lock") + self.rendez_vous_lock = FileLock(file_path) + try: + self.rendez_vous_lock.acquire(timeout=timeout) + except TimeoutError: + raise ValueError( + f"Error in _init_writer: another metric instance is already using the local cache file at {file_path}. " + f"Please specify an experiment_id (currently: {self.experiment_id}) to avoid collision " + f"between distributed metric instances." + ) from None + + if self.keep_in_memory: + self.buf_writer = pa.BufferOutputStream() + self.writer = ArrowWriter( + features=self.info.features, stream=self.buf_writer, writer_batch_size=self.writer_batch_size + ) + else: + self.buf_writer = None + + # Get cache file name and lock it + if self.cache_file_name is None or self.filelock is None: + cache_file_name, filelock = self._create_cache_file() # get ready + self.cache_file_name = cache_file_name + self.filelock = filelock + + self.writer = ArrowWriter( + features=self.info.features, path=self.cache_file_name, writer_batch_size=self.writer_batch_size + ) + # Setup rendez-vous here if + if self.num_process > 1: + if self.process_id == 0: + self._check_all_processes_locks() # wait for everyone to be ready + self.rendez_vous_lock.release() # let everyone go + else: + self._check_rendez_vous() # wait for master to be ready and to let everyone go + + def _info(self) -> MetricInfo: + """Construct the MetricInfo object. See `MetricInfo` for details. + + Warning: This function is only called once and the result is cached for all + following .info() calls. + + Returns: + info: (MetricInfo) The metrics information + """ + raise NotImplementedError + + def download_and_prepare( + self, + download_config: Optional[DownloadConfig] = None, + dl_manager: Optional[DownloadManager] = None, + ): + """Downloads and prepares dataset for reading. + + Args: + download_config (:class:`DownloadConfig`, optional): Specific download configuration parameters. + dl_manager (:class:`DownloadManager`, optional): Specific download manager to use. + """ + if dl_manager is None: + if download_config is None: + download_config = DownloadConfig() + download_config.cache_dir = os.path.join(self.data_dir, "downloads") + download_config.force_download = False + + dl_manager = DownloadManager( + dataset_name=self.name, download_config=download_config, data_dir=self.data_dir + ) + + self._download_and_prepare(dl_manager) + + def _download_and_prepare(self, dl_manager): + """Downloads and prepares resources for the metric. + + This is the internal implementation to overwrite called when user calls + `download_and_prepare`. It should download all required resources for the metric. + + Args: + dl_manager (:class:`DownloadManager`): `DownloadManager` used to download and cache data. + """ + return None + + def _compute(self, *, predictions=None, references=None, **kwargs) -> Dict[str, Any]: + """This method defines the common API for all the metrics in the library""" + raise NotImplementedError + + def __del__(self): + if hasattr(self, "filelock") and self.filelock is not None: + self.filelock.release() + if hasattr(self, "rendez_vous_lock") and self.rendez_vous_lock is not None: + self.rendez_vous_lock.release() + if hasattr(self, "writer"): # in case it was already deleted + del self.writer + if hasattr(self, "data"): # in case it was already deleted + del self.data diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/search.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/search.py new file mode 100644 index 0000000000000000000000000000000000000000..4f76f9b671fda755b4b7a53822edf7c6f73b56aa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/search.py @@ -0,0 +1,785 @@ +import importlib.util +import os +import tempfile +from pathlib import PurePath +from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Union + +import fsspec +import numpy as np + +from .features import Sequence +from .utils import logging +from .utils import tqdm as hf_tqdm + + +if TYPE_CHECKING: + from .arrow_dataset import Dataset # noqa: F401 + + try: + from elasticsearch import Elasticsearch # noqa: F401 + + except ImportError: + pass + try: + import faiss # noqa: F401 + + except ImportError: + pass + +_has_elasticsearch = importlib.util.find_spec("elasticsearch") is not None +_has_faiss = importlib.util.find_spec("faiss") is not None + + +logger = logging.get_logger(__name__) + + +class MissingIndex(Exception): + pass + + +class SearchResults(NamedTuple): + scores: List[float] + indices: List[int] + + +class BatchedSearchResults(NamedTuple): + total_scores: List[List[float]] + total_indices: List[List[int]] + + +class NearestExamplesResults(NamedTuple): + scores: List[float] + examples: dict + + +class BatchedNearestExamplesResults(NamedTuple): + total_scores: List[List[float]] + total_examples: List[dict] + + +class BaseIndex: + """Base class for indexing""" + + def search(self, query, k: int = 10, **kwargs) -> SearchResults: + """ + To implement. + This method has to return the scores and the indices of the retrieved examples given a certain query. + """ + raise NotImplementedError + + def search_batch(self, queries, k: int = 10, **kwargs) -> BatchedSearchResults: + """Find the nearest examples indices to the query. + + Args: + queries (`Union[List[str], np.ndarray]`): The queries as a list of strings if `column` is a text index or as a numpy array if `column` is a vector index. + k (`int`): The number of examples to retrieve per query. + + Ouput: + total_scores (`List[List[float]`): The retrieval scores of the retrieved examples per query. + total_indices (`List[List[int]]`): The indices of the retrieved examples per query. + """ + total_scores, total_indices = [], [] + for query in queries: + scores, indices = self.search(query, k) + total_scores.append(scores) + total_indices.append(indices) + return BatchedSearchResults(total_scores, total_indices) + + def save(self, file: Union[str, PurePath]): + """Serialize the index on disk""" + raise NotImplementedError + + @classmethod + def load(cls, file: Union[str, PurePath]) -> "BaseIndex": + """Deserialize the index from disk""" + raise NotImplementedError + + +class ElasticSearchIndex(BaseIndex): + """ + Sparse index using Elasticsearch. It is used to index text and run queries based on BM25 similarity. + An Elasticsearch server needs to be accessible, and a python client is declared with + ``` + es_client = Elasticsearch([{'host': 'localhost', 'port': '9200'}]) + ``` + for example. + """ + + def __init__( + self, + host: Optional[str] = None, + port: Optional[int] = None, + es_client: Optional["Elasticsearch"] = None, + es_index_name: Optional[str] = None, + es_index_config: Optional[dict] = None, + ): + if not _has_elasticsearch: + raise ImportError( + "You must install ElasticSearch to use ElasticSearchIndex. To do so you can run `pip install elasticsearch==7.7.1 for example`" + ) + if es_client is not None and (host is not None or port is not None): + raise ValueError("Please specify either `es_client` or `(host, port)`, but not both.") + host = host or "localhost" + port = port or 9200 + + import elasticsearch.helpers # noqa: F401 - need this to properly load all the es features + from elasticsearch import Elasticsearch # noqa: F811 + + self.es_client = es_client if es_client is not None else Elasticsearch([{"host": host, "port": str(port)}]) + self.es_index_name = ( + es_index_name + if es_index_name is not None + else "huggingface_datasets_" + os.path.basename(tempfile.NamedTemporaryFile().name) + ) + self.es_index_config = ( + es_index_config + if es_index_config is not None + else { + "settings": { + "number_of_shards": 1, + "analysis": {"analyzer": {"stop_standard": {"type": "standard", " stopwords": "_english_"}}}, + }, + "mappings": {"properties": {"text": {"type": "text", "analyzer": "standard", "similarity": "BM25"}}}, + } + ) + + def add_documents(self, documents: Union[List[str], "Dataset"], column: Optional[str] = None): + """ + Add documents to the index. + If the documents are inside a certain column, you can specify it using the `column` argument. + """ + index_name = self.es_index_name + index_config = self.es_index_config + self.es_client.indices.create(index=index_name, body=index_config) + number_of_docs = len(documents) + progress = hf_tqdm(unit="docs", total=number_of_docs) + successes = 0 + + def passage_generator(): + if column is not None: + for i, example in enumerate(documents): + yield {"text": example[column], "_id": i} + else: + for i, example in enumerate(documents): + yield {"text": example, "_id": i} + + # create the ES index + import elasticsearch as es + + for ok, action in es.helpers.streaming_bulk( + client=self.es_client, + index=index_name, + actions=passage_generator(), + ): + progress.update(1) + successes += ok + if successes != len(documents): + logger.warning( + f"Some documents failed to be added to ElasticSearch. Failures: {len(documents)-successes}/{len(documents)}" + ) + logger.info(f"Indexed {successes:d} documents") + + def search(self, query: str, k=10, **kwargs) -> SearchResults: + """Find the nearest examples indices to the query. + + Args: + query (`str`): The query as a string. + k (`int`): The number of examples to retrieve. + + Ouput: + scores (`List[List[float]`): The retrieval scores of the retrieved examples. + indices (`List[List[int]]`): The indices of the retrieved examples. + """ + response = self.es_client.search( + index=self.es_index_name, + body={"query": {"multi_match": {"query": query, "fields": ["text"], "type": "cross_fields"}}, "size": k}, + **kwargs, + ) + hits = response["hits"]["hits"] + return SearchResults([hit["_score"] for hit in hits], [int(hit["_id"]) for hit in hits]) + + def search_batch(self, queries, k: int = 10, max_workers=10, **kwargs) -> BatchedSearchResults: + import concurrent.futures + + total_scores, total_indices = [None] * len(queries), [None] * len(queries) + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + future_to_index = {executor.submit(self.search, query, k, **kwargs): i for i, query in enumerate(queries)} + for future in concurrent.futures.as_completed(future_to_index): + index = future_to_index[future] + results: SearchResults = future.result() + total_scores[index] = results.scores + total_indices[index] = results.indices + return BatchedSearchResults(total_indices=total_indices, total_scores=total_scores) + + +class FaissIndex(BaseIndex): + """ + Dense index using Faiss. It is used to index vectors. + Faiss is a library for efficient similarity search and clustering of dense vectors. + It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. + You can find more information about Faiss here: + - For index types and the string factory: https://github.com/facebookresearch/faiss/wiki/The-index-factory + - For GPU settings: https://github.com/facebookresearch/faiss/wiki/Faiss-on-the-GPU + """ + + def __init__( + self, + device: Optional[Union[int, List[int]]] = None, + string_factory: Optional[str] = None, + metric_type: Optional[int] = None, + custom_index: Optional["faiss.Index"] = None, + ): + """ + Create a Dense index using Faiss. You can specify `device` if you want to run it on GPU (`device` must be the GPU index). + You can find more information about Faiss here: + - For `string factory`: https://github.com/facebookresearch/faiss/wiki/The-index-factory + """ + if string_factory is not None and custom_index is not None: + raise ValueError("Please specify either `string_factory` or `custom_index` but not both.") + if device is not None and custom_index is not None: + raise ValueError( + "Cannot pass both 'custom_index' and 'device'. " + "Pass 'custom_index' already transferred to the target device instead." + ) + self.device = device + self.string_factory = string_factory + self.metric_type = metric_type + self.faiss_index = custom_index + if not _has_faiss: + raise ImportError( + "You must install Faiss to use FaissIndex. To do so you can run `conda install -c pytorch faiss-cpu` or `conda install -c pytorch faiss-gpu`. " + "A community supported package is also available on pypi: `pip install faiss-cpu` or `pip install faiss-gpu`. " + "Note that pip may not have the latest version of FAISS, and thus, some of the latest features and bug fixes may not be available." + ) + + def add_vectors( + self, + vectors: Union[np.array, "Dataset"], + column: Optional[str] = None, + batch_size: int = 1000, + train_size: Optional[int] = None, + faiss_verbose: Optional[bool] = None, + ): + """ + Add vectors to the index. + If the arrays are inside a certain column, you can specify it using the `column` argument. + """ + import faiss # noqa: F811 + + if column and not isinstance(vectors.features[column], Sequence): + raise ValueError( + f"Wrong feature type for column '{column}'. Expected 1d array, got {vectors.features[column]}" + ) + + # Create index + if self.faiss_index is None: + size = len(vectors[0]) if column is None else len(vectors[0][column]) + if self.string_factory is not None: + if self.metric_type is None: + index = faiss.index_factory(size, self.string_factory) + else: + index = faiss.index_factory(size, self.string_factory, self.metric_type) + else: + if self.metric_type is None: + index = faiss.IndexFlat(size) + else: + index = faiss.IndexFlat(size, self.metric_type) + + self.faiss_index = self._faiss_index_to_device(index, self.device) + logger.info(f"Created faiss index of type {type(self.faiss_index)}") + + # Set verbosity level + if faiss_verbose is not None: + self.faiss_index.verbose = faiss_verbose + if hasattr(self.faiss_index, "index") and self.faiss_index.index is not None: + self.faiss_index.index.verbose = faiss_verbose + if hasattr(self.faiss_index, "quantizer") and self.faiss_index.quantizer is not None: + self.faiss_index.quantizer.verbose = faiss_verbose + if hasattr(self.faiss_index, "clustering_index") and self.faiss_index.clustering_index is not None: + self.faiss_index.clustering_index.verbose = faiss_verbose + + # Train + if train_size is not None: + train_vecs = vectors[:train_size] if column is None else vectors[:train_size][column] + logger.info(f"Training the index with the first {len(train_vecs)} vectors") + self.faiss_index.train(train_vecs) + else: + logger.info("Ignored the training step of the faiss index as `train_size` is None.") + + # Add vectors + logger.info(f"Adding {len(vectors)} vectors to the faiss index") + for i in hf_tqdm(range(0, len(vectors), batch_size)): + vecs = vectors[i : i + batch_size] if column is None else vectors[i : i + batch_size][column] + self.faiss_index.add(vecs) + + @staticmethod + def _faiss_index_to_device(index: "faiss.Index", device: Optional[Union[int, List[int]]] = None) -> "faiss.Index": + """ + Sends a faiss index to a device. + A device can either be a positive integer (GPU id), a negative integer (all GPUs), + or a list of positive integers (select GPUs to use), or `None` for CPU. + """ + + # If device is not specified, then it runs on CPU. + if device is None: + return index + + import faiss # noqa: F811 + + # If the device id is given as an integer + if isinstance(device, int): + # Positive integers are directly mapped to GPU ids + if device > -1: + faiss_res = faiss.StandardGpuResources() + index = faiss.index_cpu_to_gpu(faiss_res, device, index) + # And negative integers mean using all GPUs + else: + index = faiss.index_cpu_to_all_gpus(index) + # Device ids given as a list mean mapping to those devices specified. + elif isinstance(device, (list, tuple)): + index = faiss.index_cpu_to_gpus_list(index, gpus=list(device)) + else: + raise TypeError( + f"The argument type: {type(device)} is not expected. " + + "Please pass in either nothing, a positive int, a negative int, or a list of positive ints." + ) + + return index + + def search(self, query: np.array, k=10, **kwargs) -> SearchResults: + """Find the nearest examples indices to the query. + + Args: + query (`np.array`): The query as a numpy array. + k (`int`): The number of examples to retrieve. + + Ouput: + scores (`List[List[float]`): The retrieval scores of the retrieved examples. + indices (`List[List[int]]`): The indices of the retrieved examples. + """ + if len(query.shape) != 1 and (len(query.shape) != 2 or query.shape[0] != 1): + raise ValueError("Shape of query is incorrect, it has to be either a 1D array or 2D (1, N)") + + queries = query.reshape(1, -1) + if not queries.flags.c_contiguous: + queries = np.asarray(queries, order="C") + scores, indices = self.faiss_index.search(queries, k, **kwargs) + return SearchResults(scores[0], indices[0].astype(int)) + + def search_batch(self, queries: np.array, k=10, **kwargs) -> BatchedSearchResults: + """Find the nearest examples indices to the queries. + + Args: + queries (`np.array`): The queries as a numpy array. + k (`int`): The number of examples to retrieve. + + Ouput: + total_scores (`List[List[float]`): The retrieval scores of the retrieved examples per query. + total_indices (`List[List[int]]`): The indices of the retrieved examples per query. + """ + if len(queries.shape) != 2: + raise ValueError("Shape of query must be 2D") + if not queries.flags.c_contiguous: + queries = np.asarray(queries, order="C") + scores, indices = self.faiss_index.search(queries, k, **kwargs) + return BatchedSearchResults(scores, indices.astype(int)) + + def save(self, file: Union[str, PurePath], storage_options: Optional[Dict] = None): + """Serialize the FaissIndex on disk""" + import faiss # noqa: F811 + + if self.device is not None and isinstance(self.device, (int, list, tuple)): + index = faiss.index_gpu_to_cpu(self.faiss_index) + else: + index = self.faiss_index + + with fsspec.open(str(file), "wb", **(storage_options or {})) as f: + faiss.write_index(index, faiss.BufferedIOWriter(faiss.PyCallbackIOWriter(f.write))) + + @classmethod + def load( + cls, + file: Union[str, PurePath], + device: Optional[Union[int, List[int]]] = None, + storage_options: Optional[Dict] = None, + ) -> "FaissIndex": + """Deserialize the FaissIndex from disk""" + import faiss # noqa: F811 + + # Instances of FaissIndex is essentially just a wrapper for faiss indices. + faiss_index = cls(device=device) + with fsspec.open(str(file), "rb", **(storage_options or {})) as f: + index = faiss.read_index(faiss.BufferedIOReader(faiss.PyCallbackIOReader(f.read))) + faiss_index.faiss_index = faiss_index._faiss_index_to_device(index, faiss_index.device) + return faiss_index + + +class IndexableMixin: + """Add indexing features to `datasets.Dataset`""" + + def __init__(self): + self._indexes: Dict[str, BaseIndex] = {} + + def __len__(self): + raise NotImplementedError + + def __getitem__(self, key): + raise NotImplementedError + + def is_index_initialized(self, index_name: str) -> bool: + return index_name in self._indexes + + def _check_index_is_initialized(self, index_name: str): + if not self.is_index_initialized(index_name): + raise MissingIndex( + f"Index with index_name '{index_name}' not initialized yet. Please make sure that you call `add_faiss_index` or `add_elasticsearch_index` first." + ) + + def list_indexes(self) -> List[str]: + """List the `colindex_nameumns`/identifiers of all the attached indexes.""" + return list(self._indexes) + + def get_index(self, index_name: str) -> BaseIndex: + """List the `index_name`/identifiers of all the attached indexes. + + Args: + index_name (`str`): Index name. + + Returns: + [`BaseIndex`] + """ + self._check_index_is_initialized(index_name) + return self._indexes[index_name] + + def add_faiss_index( + self, + column: str, + index_name: Optional[str] = None, + device: Optional[Union[int, List[int]]] = None, + string_factory: Optional[str] = None, + metric_type: Optional[int] = None, + custom_index: Optional["faiss.Index"] = None, + batch_size: int = 1000, + train_size: Optional[int] = None, + faiss_verbose: bool = False, + ): + """Add a dense index using Faiss for fast retrieval. + The index is created using the vectors of the specified column. + You can specify `device` if you want to run it on GPU (`device` must be the GPU index, see more below). + You can find more information about Faiss here: + - For `string factory`: https://github.com/facebookresearch/faiss/wiki/The-index-factory + + Args: + column (`str`): The column of the vectors to add to the index. + index_name (Optional `str`): The index_name/identifier of the index. This is the index_name that is used to call `.get_nearest` or `.search`. + By default it corresponds to `column`. + device (Optional `Union[int, List[int]]`): If positive integer, this is the index of the GPU to use. If negative integer, use all GPUs. + If a list of positive integers is passed in, run only on those GPUs. By default it uses the CPU. + string_factory (Optional `str`): This is passed to the index factory of Faiss to create the index. Default index class is IndexFlatIP. + metric_type (Optional `int`): Type of metric. Ex: `faiss.METRIC_INNER_PRODUCT` or `faiss.METRIC_L2`. + custom_index (Optional `faiss.Index`): Custom Faiss index that you already have instantiated and configured for your needs. + batch_size (Optional `int`): Size of the batch to use while adding vectors to the FaissIndex. Default value is 1000. + + train_size (Optional `int`): If the index needs a training step, specifies how many vectors will be used to train the index. + faiss_verbose (`bool`, defaults to False): Enable the verbosity of the Faiss index. + """ + index_name = index_name if index_name is not None else column + faiss_index = FaissIndex( + device=device, string_factory=string_factory, metric_type=metric_type, custom_index=custom_index + ) + faiss_index.add_vectors( + self, column=column, batch_size=batch_size, train_size=train_size, faiss_verbose=faiss_verbose + ) + self._indexes[index_name] = faiss_index + + def add_faiss_index_from_external_arrays( + self, + external_arrays: np.array, + index_name: str, + device: Optional[Union[int, List[int]]] = None, + string_factory: Optional[str] = None, + metric_type: Optional[int] = None, + custom_index: Optional["faiss.Index"] = None, + batch_size: int = 1000, + train_size: Optional[int] = None, + faiss_verbose: bool = False, + ): + """Add a dense index using Faiss for fast retrieval. + The index is created using the vectors of `external_arrays`. + You can specify `device` if you want to run it on GPU (`device` must be the GPU index). + You can find more information about Faiss here: + - For `string factory`: https://github.com/facebookresearch/faiss/wiki/The-index-factory + + Args: + external_arrays (`np.array`): If you want to use arrays from outside the lib for the index, you can set `external_arrays`. + It will use `external_arrays` to create the Faiss index instead of the arrays in the given `column`. + index_name (`str`): The index_name/identifier of the index. This is the index_name that is used to call `.get_nearest` or `.search`. + device (Optional `Union[int, List[int]]`): If positive integer, this is the index of the GPU to use. If negative integer, use all GPUs. + If a list of positive integers is passed in, run only on those GPUs. By default it uses the CPU. + string_factory (Optional `str`): This is passed to the index factory of Faiss to create the index. Default index class is IndexFlatIP. + metric_type (Optional `int`): Type of metric. Ex: `faiss.METRIC_INNER_PRODUCT` or `faiss.METRIC_L2`. + custom_index (Optional `faiss.Index`): Custom Faiss index that you already have instantiated and configured for your needs. + batch_size (Optional `int`): Size of the batch to use while adding vectors to the FaissIndex. Default value is 1000. + + train_size (Optional `int`): If the index needs a training step, specifies how many vectors will be used to train the index. + faiss_verbose (`bool`, defaults to False): Enable the verbosity of the Faiss index. + """ + faiss_index = FaissIndex( + device=device, string_factory=string_factory, metric_type=metric_type, custom_index=custom_index + ) + faiss_index.add_vectors( + external_arrays, column=None, batch_size=batch_size, train_size=train_size, faiss_verbose=faiss_verbose + ) + self._indexes[index_name] = faiss_index + + def save_faiss_index(self, index_name: str, file: Union[str, PurePath], storage_options: Optional[Dict] = None): + """Save a FaissIndex on disk. + + Args: + index_name (`str`): The index_name/identifier of the index. This is the index_name that is used to call `.get_nearest` or `.search`. + file (`str`): The path to the serialized faiss index on disk or remote URI (e.g. `"s3://my-bucket/index.faiss"`). + storage_options (`dict`, *optional*): + Key/value pairs to be passed on to the file-system backend, if any. + + + + """ + index = self.get_index(index_name) + if not isinstance(index, FaissIndex): + raise ValueError(f"Index '{index_name}' is not a FaissIndex but a '{type(index)}'") + index.save(file, storage_options=storage_options) + logger.info(f"Saved FaissIndex {index_name} at {file}") + + def load_faiss_index( + self, + index_name: str, + file: Union[str, PurePath], + device: Optional[Union[int, List[int]]] = None, + storage_options: Optional[Dict] = None, + ): + """Load a FaissIndex from disk. + + If you want to do additional configurations, you can have access to the faiss index object by doing + `.get_index(index_name).faiss_index` to make it fit your needs. + + Args: + index_name (`str`): The index_name/identifier of the index. This is the index_name that is used to + call `.get_nearest` or `.search`. + file (`str`): The path to the serialized faiss index on disk or remote URI (e.g. `"s3://my-bucket/index.faiss"`). + device (Optional `Union[int, List[int]]`): If positive integer, this is the index of the GPU to use. If negative integer, use all GPUs. + If a list of positive integers is passed in, run only on those GPUs. By default it uses the CPU. + storage_options (`dict`, *optional*): + Key/value pairs to be passed on to the file-system backend, if any. + + + + """ + index = FaissIndex.load(file, device=device, storage_options=storage_options) + if index.faiss_index.ntotal != len(self): + raise ValueError( + f"Index size should match Dataset size, but Index '{index_name}' at {file} has {index.faiss_index.ntotal} elements while the dataset has {len(self)} examples." + ) + self._indexes[index_name] = index + logger.info(f"Loaded FaissIndex {index_name} from {file}") + + def add_elasticsearch_index( + self, + column: str, + index_name: Optional[str] = None, + host: Optional[str] = None, + port: Optional[int] = None, + es_client: Optional["Elasticsearch"] = None, + es_index_name: Optional[str] = None, + es_index_config: Optional[dict] = None, + ): + """Add a text index using ElasticSearch for fast retrieval. + + Args: + column (`str`): The column of the documents to add to the index. + index_name (Optional `str`): The index_name/identifier of the index. This is the index name that is used to call `.get_nearest` or `.search`. + By default it corresponds to `column`. + host (Optional `str`, defaults to localhost): + host of where ElasticSearch is running + port (Optional `str`, defaults to 9200): + port of where ElasticSearch is running + es_client (Optional `elasticsearch.Elasticsearch`): + The elasticsearch client used to create the index if host and port are None. + es_index_name (Optional `str`): The elasticsearch index name used to create the index. + es_index_config (Optional `dict`): + The configuration of the elasticsearch index. + Default config is: + + Config:: + + { + "settings": { + "number_of_shards": 1, + "analysis": {"analyzer": {"stop_standard": {"type": "standard", " stopwords": "_english_"}}}, + }, + "mappings": { + "properties": { + "text": { + "type": "text", + "analyzer": "standard", + "similarity": "BM25" + }, + } + }, + } + """ + index_name = index_name if index_name is not None else column + es_index = ElasticSearchIndex( + host=host, port=port, es_client=es_client, es_index_name=es_index_name, es_index_config=es_index_config + ) + es_index.add_documents(self, column=column) + self._indexes[index_name] = es_index + + def load_elasticsearch_index( + self, + index_name: str, + es_index_name: str, + host: Optional[str] = None, + port: Optional[int] = None, + es_client: Optional["Elasticsearch"] = None, + es_index_config: Optional[dict] = None, + ): + """Load an existing text index using ElasticSearch for fast retrieval. + + Args: + index_name (`str`): + The `index_name`/identifier of the index. This is the index name that is used to call `get_nearest` or `search`. + es_index_name (`str`): + The name of elasticsearch index to load. + host (`str`, *optional*, defaults to `localhost`): + Host of where ElasticSearch is running. + port (`str`, *optional*, defaults to `9200`): + Port of where ElasticSearch is running. + es_client (`elasticsearch.Elasticsearch`, *optional*): + The elasticsearch client used to create the index if host and port are `None`. + es_index_config (`dict`, *optional*): + The configuration of the elasticsearch index. + Default config is: + ``` + { + "settings": { + "number_of_shards": 1, + "analysis": {"analyzer": {"stop_standard": {"type": "standard", " stopwords": "_english_"}}}, + }, + "mappings": { + "properties": { + "text": { + "type": "text", + "analyzer": "standard", + "similarity": "BM25" + }, + } + }, + } + ``` + """ + self._indexes[index_name] = ElasticSearchIndex( + host=host, port=port, es_client=es_client, es_index_name=es_index_name, es_index_config=es_index_config + ) + + def drop_index(self, index_name: str): + """Drop the index with the specified column. + + Args: + index_name (`str`): + The `index_name`/identifier of the index. + """ + del self._indexes[index_name] + + def search(self, index_name: str, query: Union[str, np.array], k: int = 10, **kwargs) -> SearchResults: + """Find the nearest examples indices in the dataset to the query. + + Args: + index_name (`str`): + The name/identifier of the index. + query (`Union[str, np.ndarray]`): + The query as a string if `index_name` is a text index or as a numpy array if `index_name` is a vector index. + k (`int`): + The number of examples to retrieve. + + Returns: + `(scores, indices)`: + A tuple of `(scores, indices)` where: + - **scores** (`List[List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples + - **indices** (`List[List[int]]`): the indices of the retrieved examples + """ + self._check_index_is_initialized(index_name) + return self._indexes[index_name].search(query, k, **kwargs) + + def search_batch( + self, index_name: str, queries: Union[List[str], np.array], k: int = 10, **kwargs + ) -> BatchedSearchResults: + """Find the nearest examples indices in the dataset to the query. + + Args: + index_name (`str`): + The `index_name`/identifier of the index. + queries (`Union[List[str], np.ndarray]`): + The queries as a list of strings if `index_name` is a text index or as a numpy array if `index_name` is a vector index. + k (`int`): + The number of examples to retrieve per query. + + Returns: + `(total_scores, total_indices)`: + A tuple of `(total_scores, total_indices)` where: + - **total_scores** (`List[List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples per query + - **total_indices** (`List[List[int]]`): the indices of the retrieved examples per query + """ + self._check_index_is_initialized(index_name) + return self._indexes[index_name].search_batch(queries, k, **kwargs) + + def get_nearest_examples( + self, index_name: str, query: Union[str, np.array], k: int = 10, **kwargs + ) -> NearestExamplesResults: + """Find the nearest examples in the dataset to the query. + + Args: + index_name (`str`): + The index_name/identifier of the index. + query (`Union[str, np.ndarray]`): + The query as a string if `index_name` is a text index or as a numpy array if `index_name` is a vector index. + k (`int`): + The number of examples to retrieve. + + Returns: + `(scores, examples)`: + A tuple of `(scores, examples)` where: + - **scores** (`List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples + - **examples** (`dict`): the retrieved examples + """ + self._check_index_is_initialized(index_name) + scores, indices = self.search(index_name, query, k, **kwargs) + top_indices = [i for i in indices if i >= 0] + return NearestExamplesResults(scores[: len(top_indices)], self[top_indices]) + + def get_nearest_examples_batch( + self, index_name: str, queries: Union[List[str], np.array], k: int = 10, **kwargs + ) -> BatchedNearestExamplesResults: + """Find the nearest examples in the dataset to the query. + + Args: + index_name (`str`): + The `index_name`/identifier of the index. + queries (`Union[List[str], np.ndarray]`): + The queries as a list of strings if `index_name` is a text index or as a numpy array if `index_name` is a vector index. + k (`int`): + The number of examples to retrieve per query. + + Returns: + `(total_scores, total_examples)`: + A tuple of `(total_scores, total_examples)` where: + - **total_scores** (`List[List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples per query + - **total_examples** (`List[dict]`): the retrieved examples per query + """ + self._check_index_is_initialized(index_name) + total_scores, total_indices = self.search_batch(index_name, queries, k, **kwargs) + total_scores = [ + scores_i[: len([i for i in indices_i if i >= 0])] + for scores_i, indices_i in zip(total_scores, total_indices) + ] + total_samples = [self[[i for i in indices if i >= 0]] for indices in total_indices] + return BatchedNearestExamplesResults(total_scores, total_samples) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/splits.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/splits.py new file mode 100644 index 0000000000000000000000000000000000000000..fd4966cb4007adc9f47fd78cf2b0a1732913aaef --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/splits.py @@ -0,0 +1,635 @@ +# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Lint as: python3 +"""Splits related API.""" + +import abc +import collections +import copy +import dataclasses +import re +from dataclasses import dataclass +from typing import Dict, List, Optional, Union + +from .arrow_reader import FileInstructions, make_file_instructions +from .naming import _split_re +from .utils.py_utils import NonMutableDict, asdict + + +@dataclass +class SplitInfo: + name: str = dataclasses.field(default="", metadata={"include_in_asdict_even_if_is_default": True}) + num_bytes: int = dataclasses.field(default=0, metadata={"include_in_asdict_even_if_is_default": True}) + num_examples: int = dataclasses.field(default=0, metadata={"include_in_asdict_even_if_is_default": True}) + shard_lengths: Optional[List[int]] = None + + # Deprecated + # For backward compatibility, this field needs to always be included in files like + # dataset_infos.json and dataset_info.json files + # To do so, we always include it in the output of datasets.utils.py_utils.asdict(split_info) + dataset_name: Optional[str] = dataclasses.field( + default=None, metadata={"include_in_asdict_even_if_is_default": True} + ) + + @property + def file_instructions(self): + """Returns the list of dict(filename, take, skip).""" + # `self.dataset_name` is assigned in `SplitDict.add()`. + instructions = make_file_instructions( + name=self.dataset_name, + split_infos=[self], + instruction=str(self.name), + ) + return instructions.file_instructions + + +@dataclass +class SubSplitInfo: + """Wrapper around a sub split info. + This class expose info on the subsplit: + ``` + ds, info = datasets.load_dataset(..., split='train[75%:]', with_info=True) + info.splits['train[75%:]'].num_examples + ``` + """ + + instructions: FileInstructions + + @property + def num_examples(self): + """Returns the number of example in the subsplit.""" + return self.instructions.num_examples + + @property + def file_instructions(self): + """Returns the list of dict(filename, take, skip).""" + return self.instructions.file_instructions + + +class SplitBase(metaclass=abc.ABCMeta): + # pylint: disable=line-too-long + """Abstract base class for Split compositionality. + + See the + [guide on splits](../loading#slice-splits) + for more information. + + There are three parts to the composition: + 1) The splits are composed (defined, merged, split,...) together before + calling the `.as_dataset()` function. This is done with the `__add__`, + `__getitem__`, which return a tree of `SplitBase` (whose leaf + are the `NamedSplit` objects) + + ``` + split = datasets.Split.TRAIN + datasets.Split.TEST.subsplit(datasets.percent[:50]) + ``` + + 2) The `SplitBase` is forwarded to the `.as_dataset()` function + to be resolved into actual read instruction. This is done by the + `.get_read_instruction()` method which takes the real dataset splits + (name, number of shards,...) and parse the tree to return a + `SplitReadInstruction()` object + + ``` + read_instruction = split.get_read_instruction(self.info.splits) + ``` + + 3) The `SplitReadInstruction` is then used in the `tf.data.Dataset` pipeline + to define which files to read and how to skip examples within file. + + """ + + # pylint: enable=line-too-long + + @abc.abstractmethod + def get_read_instruction(self, split_dict): + """Parse the descriptor tree and compile all read instructions together. + + Args: + split_dict: `dict`, The `dict[split_name, SplitInfo]` of the dataset + + Returns: + split_read_instruction: `SplitReadInstruction` + """ + raise NotImplementedError("Abstract method") + + def __eq__(self, other): + """Equality: datasets.Split.TRAIN == 'train'.""" + if isinstance(other, (NamedSplit, str)): + return False + raise NotImplementedError("Equality is not implemented between merged/sub splits.") + + def __ne__(self, other): + """InEquality: datasets.Split.TRAIN != 'test'.""" + return not self.__eq__(other) + + def __add__(self, other): + """Merging: datasets.Split.TRAIN + datasets.Split.TEST.""" + return _SplitMerged(self, other) + + def subsplit(self, arg=None, k=None, percent=None, weighted=None): # pylint: disable=redefined-outer-name + """Divides this split into subsplits. + + There are 3 ways to define subsplits, which correspond to the 3 + arguments `k` (get `k` even subsplits), `percent` (get a slice of the + dataset with `datasets.percent`), and `weighted` (get subsplits with proportions + specified by `weighted`). + + Example:: + + ``` + # 50% train, 50% test + train, test = split.subsplit(k=2) + # 50% train, 25% test, 25% validation + train, test, validation = split.subsplit(weighted=[2, 1, 1]) + # Extract last 20% + subsplit = split.subsplit(datasets.percent[-20:]) + ``` + + Warning: k and weighted will be converted into percent which mean that + values below the percent will be rounded up or down. The final split may be + bigger to deal with remainders. For instance: + + ``` + train, test, valid = split.subsplit(k=3) # 33%, 33%, 34% + s1, s2, s3, s4 = split.subsplit(weighted=[2, 2, 1, 1]) # 33%, 33%, 16%, 18% + ``` + + Args: + arg: If no kwargs are given, `arg` will be interpreted as one of + `k`, `percent`, or `weighted` depending on the type. + For example: + ``` + split.subsplit(10) # Equivalent to split.subsplit(k=10) + split.subsplit(datasets.percent[:-20]) # percent=datasets.percent[:-20] + split.subsplit([1, 1, 2]) # weighted=[1, 1, 2] + ``` + k: `int` If set, subdivide the split into `k` equal parts. + percent: `datasets.percent slice`, return a single subsplit corresponding to + a slice of the original split. For example: + `split.subsplit(datasets.percent[-20:]) # Last 20% of the dataset`. + weighted: `list[int]`, return a list of subsplits whose proportions match + the normalized sum of the list. For example: + `split.subsplit(weighted=[1, 1, 2]) # 25%, 25%, 50%`. + + Returns: + A subsplit or list of subsplits extracted from this split object. + """ + # Note that the percent kwargs redefine the outer name datasets.percent. This + # is done for consistency (.subsplit(percent=datasets.percent[:40])) + if sum(bool(x) for x in (arg, k, percent, weighted)) != 1: + raise ValueError("Only one argument of subsplit should be set.") + + # Auto deduce k + if isinstance(arg, int): + k = arg + elif isinstance(arg, slice): + percent = arg + elif isinstance(arg, list): + weighted = arg + + if not (k or percent or weighted): + raise ValueError( + f"Invalid split argument {arg}. Only list, slice and int supported. " + "One of k, weighted or percent should be set to a non empty value." + ) + + def assert_slices_coverage(slices): + # Ensure that the expended slices cover all percents. + assert sum((list(range(*s.indices(100))) for s in slices), []) == list(range(100)) + + if k: + if not 0 < k <= 100: + raise ValueError(f"Subsplit k should be between 0 and 100, got {k}") + shift = 100 // k + slices = [slice(i * shift, (i + 1) * shift) for i in range(k)] + # Round up last element to ensure all elements are taken + slices[-1] = slice(slices[-1].start, 100) + # Internal check to ensure full coverage + assert_slices_coverage(slices) + return tuple(_SubSplit(self, s) for s in slices) + elif percent: + return _SubSplit(self, percent) + elif weighted: + # Normalize the weighted sum + total = sum(weighted) + weighted = [100 * x // total for x in weighted] + # Create the slice for each of the elements + start = 0 + stop = 0 + slices = [] + for v in weighted: + stop += v + slices.append(slice(start, stop)) + start = stop + # Round up last element to ensure all elements are taken + slices[-1] = slice(slices[-1].start, 100) + # Internal check to ensure full coverage + assert_slices_coverage(slices) + return tuple(_SubSplit(self, s) for s in slices) + else: + # Should not be possible + raise ValueError("Could not determine the split") + + +# 2 requirements: +# 1. datasets.percent be sliceable +# 2. datasets.percent be documented +# +# Instances are not documented, so we want datasets.percent to be a class, but to +# have it be sliceable, we need this metaclass. +class PercentSliceMeta(type): + def __getitem__(cls, slice_value): + if not isinstance(slice_value, slice): + raise ValueError(f"datasets.percent should only be called with slice, not {slice_value}") + return slice_value + + +class PercentSlice(metaclass=PercentSliceMeta): + # pylint: disable=line-too-long + """Syntactic sugar for defining slice subsplits: `datasets.percent[75:-5]`. + + See the + [guide on splits](../loading#slice-splits) + for more information. + """ + + # pylint: enable=line-too-long + pass + + +percent = PercentSlice # pylint: disable=invalid-name + + +class _SplitMerged(SplitBase): + """Represent two split descriptors merged together.""" + + def __init__(self, split1, split2): + self._split1 = split1 + self._split2 = split2 + + def get_read_instruction(self, split_dict): + read_instruction1 = self._split1.get_read_instruction(split_dict) + read_instruction2 = self._split2.get_read_instruction(split_dict) + return read_instruction1 + read_instruction2 + + def __repr__(self): + return f"({repr(self._split1)} + {repr(self._split2)})" + + +class _SubSplit(SplitBase): + """Represent a sub split of a split descriptor.""" + + def __init__(self, split, slice_value): + self._split = split + self._slice_value = slice_value + + def get_read_instruction(self, split_dict): + return self._split.get_read_instruction(split_dict)[self._slice_value] + + def __repr__(self): + slice_str = "{start}:{stop}" + if self._slice_value.step is not None: + slice_str += ":{step}" + slice_str = slice_str.format( + start="" if self._slice_value.start is None else self._slice_value.start, + stop="" if self._slice_value.stop is None else self._slice_value.stop, + step=self._slice_value.step, + ) + return f"{repr(self._split)}(datasets.percent[{slice_str}])" + + +class NamedSplit(SplitBase): + """Descriptor corresponding to a named split (train, test, ...). + + Example: + Each descriptor can be composed with other using addition or slice: + + ```py + split = datasets.Split.TRAIN.subsplit(datasets.percent[0:25]) + datasets.Split.TEST + ``` + + The resulting split will correspond to 25% of the train split merged with + 100% of the test split. + + A split cannot be added twice, so the following will fail: + + ```py + split = ( + datasets.Split.TRAIN.subsplit(datasets.percent[:25]) + + datasets.Split.TRAIN.subsplit(datasets.percent[75:]) + ) # Error + split = datasets.Split.TEST + datasets.Split.ALL # Error + ``` + + The slices can be applied only one time. So the following are valid: + + ```py + split = ( + datasets.Split.TRAIN.subsplit(datasets.percent[:25]) + + datasets.Split.TEST.subsplit(datasets.percent[:50]) + ) + split = (datasets.Split.TRAIN + datasets.Split.TEST).subsplit(datasets.percent[:50]) + ``` + + But this is not valid: + + ```py + train = datasets.Split.TRAIN + test = datasets.Split.TEST + split = train.subsplit(datasets.percent[:25]).subsplit(datasets.percent[:25]) + split = (train.subsplit(datasets.percent[:25]) + test).subsplit(datasets.percent[:50]) + ``` + """ + + def __init__(self, name): + self._name = name + split_names_from_instruction = [split_instruction.split("[")[0] for split_instruction in name.split("+")] + for split_name in split_names_from_instruction: + if not re.match(_split_re, split_name): + raise ValueError(f"Split name should match '{_split_re}' but got '{split_name}'.") + + def __str__(self): + return self._name + + def __repr__(self): + return f"NamedSplit({self._name!r})" + + def __eq__(self, other): + """Equality: datasets.Split.TRAIN == 'train'.""" + if isinstance(other, NamedSplit): + return self._name == other._name # pylint: disable=protected-access + elif isinstance(other, SplitBase): + return False + elif isinstance(other, str): # Other should be string + return self._name == other + else: + raise ValueError(f"Equality not supported between split {self} and {other}") + + def __lt__(self, other): + return self._name < other._name # pylint: disable=protected-access + + def __hash__(self): + return hash(self._name) + + def get_read_instruction(self, split_dict): + return SplitReadInstruction(split_dict[self._name]) + + +class NamedSplitAll(NamedSplit): + """Split corresponding to the union of all defined dataset splits.""" + + def __init__(self): + super().__init__("all") + + def __repr__(self): + return "NamedSplitAll()" + + def get_read_instruction(self, split_dict): + # Merge all dataset split together + read_instructions = [SplitReadInstruction(s) for s in split_dict.values()] + return sum(read_instructions, SplitReadInstruction()) + + +class Split: + # pylint: disable=line-too-long + """`Enum` for dataset splits. + + Datasets are typically split into different subsets to be used at various + stages of training and evaluation. + + - `TRAIN`: the training data. + - `VALIDATION`: the validation data. If present, this is typically used as + evaluation data while iterating on a model (e.g. changing hyperparameters, + model architecture, etc.). + - `TEST`: the testing data. This is the data to report metrics on. Typically + you do not want to use this during model iteration as you may overfit to it. + - `ALL`: the union of all defined dataset splits. + + All splits, including compositions inherit from `datasets.SplitBase`. + + See the [guide](../load_hub#splits) on splits for more information. + + Example: + + ```py + >>> datasets.SplitGenerator( + ... name=datasets.Split.TRAIN, + ... gen_kwargs={"split_key": "train", "files": dl_manager.download_and extract(url)}, + ... ), + ... datasets.SplitGenerator( + ... name=datasets.Split.VALIDATION, + ... gen_kwargs={"split_key": "validation", "files": dl_manager.download_and extract(url)}, + ... ), + ... datasets.SplitGenerator( + ... name=datasets.Split.TEST, + ... gen_kwargs={"split_key": "test", "files": dl_manager.download_and extract(url)}, + ... ) + ``` + """ + + # pylint: enable=line-too-long + TRAIN = NamedSplit("train") + TEST = NamedSplit("test") + VALIDATION = NamedSplit("validation") + ALL = NamedSplitAll() + + def __new__(cls, name): + """Create a custom split with datasets.Split('custom_name').""" + return NamedSplitAll() if name == "all" else NamedSplit(name) + + +# Similar to SplitInfo, but contain an additional slice info +SlicedSplitInfo = collections.namedtuple( + "SlicedSplitInfo", + [ + "split_info", + "slice_value", + ], +) # noqa: E231 + + +class SplitReadInstruction: + """Object containing the reading instruction for the dataset. + + Similarly to `SplitDescriptor` nodes, this object can be composed with itself, + but the resolution happens instantaneously, instead of keeping track of the + tree, such as all instructions are compiled and flattened in a single + SplitReadInstruction object containing the list of files and slice to use. + + Once resolved, the instructions can be accessed with: + + ``` + read_instructions.get_list_sliced_split_info() # List of splits to use + ``` + + """ + + def __init__(self, split_info=None): + self._splits = NonMutableDict(error_msg="Overlap between splits. Split {key} has been added with " "itself.") + + if split_info: + self.add(SlicedSplitInfo(split_info=split_info, slice_value=None)) + + def add(self, sliced_split): + """Add a SlicedSplitInfo the read instructions.""" + # TODO(epot): Check that the number of examples per shard % 100 == 0 + # Otherwise the slices value may be unbalanced and not exactly reflect the + # requested slice. + self._splits[sliced_split.split_info.name] = sliced_split + + def __add__(self, other): + """Merging split together.""" + # Will raise error if a split has already be added (NonMutableDict) + # TODO(epot): If a split is already added but there is no overlap between + # the slices, should merge the slices (ex: [:10] + [80:]) + split_instruction = SplitReadInstruction() + split_instruction._splits.update(self._splits) # pylint: disable=protected-access + split_instruction._splits.update(other._splits) # pylint: disable=protected-access + return split_instruction + + def __getitem__(self, slice_value): + """Sub-splits.""" + # Will raise an error if a split has already been sliced + split_instruction = SplitReadInstruction() + for v in self._splits.values(): + if v.slice_value is not None: + raise ValueError(f"Trying to slice Split {v.split_info.name} which has already been sliced") + v = v._asdict() + v["slice_value"] = slice_value + split_instruction.add(SlicedSplitInfo(**v)) + return split_instruction + + def get_list_sliced_split_info(self): + return list(self._splits.values()) + + +class SplitDict(dict): + """Split info object.""" + + def __init__(self, *args, dataset_name=None, **kwargs): + super().__init__(*args, **kwargs) + self.dataset_name = dataset_name + + def __getitem__(self, key: Union[SplitBase, str]): + # 1st case: The key exists: `info.splits['train']` + if str(key) in self: + return super().__getitem__(str(key)) + # 2nd case: Uses instructions: `info.splits['train[50%]']` + else: + instructions = make_file_instructions( + name=self.dataset_name, + split_infos=self.values(), + instruction=key, + ) + return SubSplitInfo(instructions) + + def __setitem__(self, key: Union[SplitBase, str], value: SplitInfo): + if key != value.name: + raise ValueError(f"Cannot add elem. (key mismatch: '{key}' != '{value.name}')") + super().__setitem__(key, value) + + def add(self, split_info: SplitInfo): + """Add the split info.""" + if split_info.name in self: + raise ValueError(f"Split {split_info.name} already present") + split_info.dataset_name = self.dataset_name + super().__setitem__(split_info.name, split_info) + + @property + def total_num_examples(self): + """Return the total number of examples.""" + return sum(s.num_examples for s in self.values()) + + @classmethod + def from_split_dict(cls, split_infos: Union[List, Dict], dataset_name: Optional[str] = None): + """Returns a new SplitDict initialized from a Dict or List of `split_infos`.""" + if isinstance(split_infos, dict): + split_infos = list(split_infos.values()) + + if dataset_name is None: + dataset_name = split_infos[0].get("dataset_name") if split_infos else None + + split_dict = cls(dataset_name=dataset_name) + + for split_info in split_infos: + if isinstance(split_info, dict): + split_info = SplitInfo(**split_info) + split_dict.add(split_info) + + return split_dict + + def to_split_dict(self): + """Returns a list of SplitInfo protos that we have.""" + out = [] + for split_name, split_info in self.items(): + split_info = copy.deepcopy(split_info) + split_info.name = split_name + out.append(split_info) + return out + + def copy(self): + return SplitDict.from_split_dict(self.to_split_dict(), self.dataset_name) + + def _to_yaml_list(self) -> list: + out = [asdict(s) for s in self.to_split_dict()] + # we don't need the shard lengths in YAML, since it depends on max_shard_size and num_proc + for split_info_dict in out: + split_info_dict.pop("shard_lengths", None) + # we don't need the dataset_name attribute that is deprecated + for split_info_dict in out: + split_info_dict.pop("dataset_name", None) + return out + + @classmethod + def _from_yaml_list(cls, yaml_data: list) -> "SplitDict": + return cls.from_split_dict(yaml_data) + + +@dataclass +class SplitGenerator: + """Defines the split information for the generator. + + This should be used as returned value of + `GeneratorBasedBuilder._split_generators`. + See `GeneratorBasedBuilder._split_generators` for more info and example + of usage. + + Args: + name (`str`): + Name of the `Split` for which the generator will + create the examples. + **gen_kwargs (additional keyword arguments): + Keyword arguments to forward to the `DatasetBuilder._generate_examples` method + of the builder. + + Example: + + ```py + >>> datasets.SplitGenerator( + ... name=datasets.Split.TRAIN, + ... gen_kwargs={"split_key": "train", "files": dl_manager.download_and_extract(url)}, + ... ) + ``` + """ + + name: str + gen_kwargs: Dict = dataclasses.field(default_factory=dict) + split_info: SplitInfo = dataclasses.field(init=False) + + def __post_init__(self): + self.name = str(self.name) # Make sure we convert NamedSplits in strings + NamedSplit(self.name) # check that it's a valid split name + self.split_info = SplitInfo(name=self.name) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/table.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/table.py new file mode 100644 index 0000000000000000000000000000000000000000..ec1a88c3d3db614e69ff6387dc419d38784e1d3b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/table.py @@ -0,0 +1,2422 @@ +import copy +import os +from functools import partial +from itertools import groupby +from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple, TypeVar, Union + +import numpy as np +import pyarrow as pa +import pyarrow.compute as pc +import pyarrow.types + +from . import config +from .utils.logging import get_logger + + +if TYPE_CHECKING: + from .features.features import Features, FeatureType + + +logger = get_logger(__name__) + + +def inject_arrow_table_documentation(arrow_table_method): + def wrapper(fn): + fn.__doc__ = arrow_table_method.__doc__ + (fn.__doc__ if fn.__doc__ is not None else "") + fn.__doc__ = fn.__doc__.replace("pyarrow.Table", "Table") + if hasattr(arrow_table_method, "__annotations__"): + fn.__annotations__ = arrow_table_method.__annotations__ + return fn + + return wrapper + + +def _in_memory_arrow_table_from_file(filename: str) -> pa.Table: + in_memory_stream = pa.input_stream(filename) + opened_stream = pa.ipc.open_stream(in_memory_stream) + pa_table = opened_stream.read_all() + return pa_table + + +def _in_memory_arrow_table_from_buffer(buffer: pa.Buffer) -> pa.Table: + stream = pa.BufferReader(buffer) + opened_stream = pa.ipc.open_stream(stream) + table = opened_stream.read_all() + return table + + +def _memory_mapped_record_batch_reader_from_file(filename: str) -> pa.RecordBatchStreamReader: + memory_mapped_stream = pa.memory_map(filename) + return pa.ipc.open_stream(memory_mapped_stream) + + +def read_schema_from_file(filename: str) -> pa.Schema: + """ + Infer arrow table schema from file without loading whole file into memory. + Usefull especially while having very big files. + """ + with pa.memory_map(filename) as memory_mapped_stream: + schema = pa.ipc.open_stream(memory_mapped_stream).schema + return schema + + +def _memory_mapped_arrow_table_from_file(filename: str) -> pa.Table: + opened_stream = _memory_mapped_record_batch_reader_from_file(filename) + pa_table = opened_stream.read_all() + return pa_table + + +def _deepcopy(x, memo: dict): + """deepcopy a regular class instance""" + cls = x.__class__ + result = cls.__new__(cls) + memo[id(x)] = result + for k, v in x.__dict__.items(): + setattr(result, k, copy.deepcopy(v, memo)) + return result + + +def _interpolation_search(arr: List[int], x: int) -> int: + """ + Return the position i of a sorted array so that arr[i] <= x < arr[i+1] + + Args: + arr (`List[int]`): non-empty sorted list of integers + x (`int`): query + + Returns: + `int`: the position i so that arr[i] <= x < arr[i+1] + + Raises: + `IndexError`: if the array is empty or if the query is outside the array values + """ + i, j = 0, len(arr) - 1 + while i < j and arr[i] <= x < arr[j]: + k = i + ((j - i) * (x - arr[i]) // (arr[j] - arr[i])) + if arr[k] <= x < arr[k + 1]: + return k + elif arr[k] < x: + i, j = k + 1, j + else: + i, j = i, k + raise IndexError(f"Invalid query '{x}' for size {arr[-1] if len(arr) else 'none'}.") + + +class IndexedTableMixin: + def __init__(self, table: pa.Table): + self._schema: pa.Schema = table.schema + self._batches: List[pa.RecordBatch] = [ + recordbatch for recordbatch in table.to_batches() if len(recordbatch) > 0 + ] + self._offsets: np.ndarray = np.cumsum([0] + [len(b) for b in self._batches], dtype=np.int64) + + def fast_gather(self, indices: Union[List[int], np.ndarray]) -> pa.Table: + """ + Create a pa.Table by gathering the records at the records at the specified indices. Should be faster + than pa.concat_tables(table.fast_slice(int(i) % table.num_rows, 1) for i in indices) since NumPy can compute + the binary searches in parallel, highly optimized C + """ + if not len(indices): + raise ValueError("Indices must be non-empty") + batch_indices = np.searchsorted(self._offsets, indices, side="right") - 1 + return pa.Table.from_batches( + [ + self._batches[batch_idx].slice(i - self._offsets[batch_idx], 1) + for batch_idx, i in zip(batch_indices, indices) + ], + schema=self._schema, + ) + + def fast_slice(self, offset=0, length=None) -> pa.Table: + """ + Slice the Table using interpolation search. + The behavior is the same as `pyarrow.Table.slice` but it's significantly faster. + + Interpolation search is used to find the start and end indexes of the batches we want to keep. + The batches to keep are then concatenated to form the sliced Table. + """ + if offset < 0: + raise IndexError("Offset must be non-negative") + elif offset >= self._offsets[-1] or (length is not None and length <= 0): + return pa.Table.from_batches([], schema=self._schema) + i = _interpolation_search(self._offsets, offset) + if length is None or length + offset >= self._offsets[-1]: + batches = self._batches[i:] + batches[0] = batches[0].slice(offset - self._offsets[i]) + else: + j = _interpolation_search(self._offsets, offset + length - 1) + batches = self._batches[i : j + 1] + batches[-1] = batches[-1].slice(0, offset + length - self._offsets[j]) + batches[0] = batches[0].slice(offset - self._offsets[i]) + return pa.Table.from_batches(batches, schema=self._schema) + + +class Table(IndexedTableMixin): + """ + Wraps a pyarrow Table by using composition. + This is the base class for `InMemoryTable`, `MemoryMappedTable` and `ConcatenationTable`. + + It implements all the basic attributes/methods of the pyarrow Table class except + the Table transforms: `slice, filter, flatten, combine_chunks, cast, add_column, + append_column, remove_column, set_column, rename_columns` and `drop`. + + The implementation of these methods differs for the subclasses. + """ + + def __init__(self, table: pa.Table): + super().__init__(table) + self.table = table + + def __deepcopy__(self, memo: dict): + # arrow tables are immutable, so there's no need to copy self.table + # moreover calling deepcopy on a pyarrow table seems to make pa.total_allocated_bytes() decrease for some reason + # by adding it to the memo, self.table won't be copied + memo[id(self.table)] = self.table + # same for the recordbatches used by the index + memo[id(self._batches)] = list(self._batches) + return _deepcopy(self, memo) + + def validate(self, *args, **kwargs): + """ + Perform validation checks. An exception is raised if validation fails. + + By default only cheap validation checks are run. Pass `full=True` + for thorough validation checks (potentially `O(n)`). + + Args: + full (`bool`, defaults to `False`): + If `True`, run expensive checks, otherwise cheap checks only. + + Raises: + `pa.lib.ArrowInvalid`: if validation fails + """ + return self.table.validate(*args, **kwargs) + + def equals(self, *args, **kwargs): + """ + Check if contents of two tables are equal. + + Args: + other ([`~datasets.table.Table`]): + Table to compare against. + check_metadata `bool`, defaults to `False`): + Whether schema metadata equality should be checked as well. + + Returns: + `bool` + """ + args = tuple(arg.table if isinstance(arg, Table) else arg for arg in args) + kwargs = {k: v.table if isinstance(v, Table) else v for k, v in kwargs} + return self.table.equals(*args, **kwargs) + + def to_batches(self, *args, **kwargs): + """ + Convert Table to list of (contiguous) `RecordBatch` objects. + + Args: + max_chunksize (`int`, defaults to `None`): + Maximum size for `RecordBatch` chunks. Individual chunks may be + smaller depending on the chunk layout of individual columns. + + Returns: + `List[pyarrow.RecordBatch]` + """ + return self.table.to_batches(*args, **kwargs) + + def to_pydict(self, *args, **kwargs): + """ + Convert the Table to a `dict` or `OrderedDict`. + + Returns: + `dict` + """ + return self.table.to_pydict(*args, **kwargs) + + def to_pylist(self, *args, **kwargs): + """ + Convert the Table to a list + + Returns: + `list` + """ + return self.table.to_pylist(*args, **kwargs) + + def to_pandas(self, *args, **kwargs): + """ + Convert to a pandas-compatible NumPy array or DataFrame, as appropriate. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + Arrow MemoryPool to use for allocations. Uses the default memory + pool is not passed. + strings_to_categorical (`bool`, defaults to `False`): + Encode string (UTF8) and binary types to `pandas.Categorical`. + categories (`list`, defaults to `empty`): + List of fields that should be returned as `pandas.Categorical`. Only + applies to table-like data structures. + zero_copy_only (`bool`, defaults to `False`): + Raise an `ArrowException` if this function call would require copying + the underlying data. + integer_object_nulls (`bool`, defaults to `False`): + Cast integers with nulls to objects. + date_as_object (`bool`, defaults to `True`): + Cast dates to objects. If `False`, convert to `datetime64[ns]` dtype. + timestamp_as_object (`bool`, defaults to `False`): + Cast non-nanosecond timestamps (`np.datetime64`) to objects. This is + useful if you have timestamps that don't fit in the normal date + range of nanosecond timestamps (1678 CE-2262 CE). + If `False`, all timestamps are converted to `datetime64[ns]` dtype. + use_threads (`bool`, defaults to `True`): + Whether to parallelize the conversion using multiple threads. + deduplicate_objects (`bool`, defaults to `False`): + Do not create multiple copies Python objects when created, to save + on memory use. Conversion will be slower. + ignore_metadata (`bool`, defaults to `False`): + If `True`, do not use the 'pandas' metadata to reconstruct the + DataFrame index, if present. + safe (`bool`, defaults to `True`): + For certain data types, a cast is needed in order to store the + data in a pandas DataFrame or Series (e.g. timestamps are always + stored as nanoseconds in pandas). This option controls whether it + is a safe cast or not. + split_blocks (`bool`, defaults to `False`): + If `True`, generate one internal "block" for each column when + creating a pandas.DataFrame from a `RecordBatch` or `Table`. While this + can temporarily reduce memory note that various pandas operations + can trigger "consolidation" which may balloon memory use. + self_destruct (`bool`, defaults to `False`): + EXPERIMENTAL: If `True`, attempt to deallocate the originating Arrow + memory while converting the Arrow object to pandas. If you use the + object after calling `to_pandas` with this option it will crash your + program. + types_mapper (`function`, defaults to `None`): + A function mapping a pyarrow DataType to a pandas `ExtensionDtype`. + This can be used to override the default pandas type for conversion + of built-in pyarrow types or in absence of `pandas_metadata` in the + Table schema. The function receives a pyarrow DataType and is + expected to return a pandas `ExtensionDtype` or `None` if the + default conversion should be used for that type. If you have + a dictionary mapping, you can pass `dict.get` as function. + + Returns: + `pandas.Series` or `pandas.DataFrame`: `pandas.Series` or `pandas.DataFrame` depending on type of object + """ + return self.table.to_pandas(*args, **kwargs) + + def to_string(self, *args, **kwargs): + return self.table.to_string(*args, **kwargs) + + def to_reader(self, max_chunksize: Optional[int] = None): + """ + Convert the Table to a RecordBatchReader. + + Note that this method is zero-copy, it merely exposes the same data under a different API. + + Args: + max_chunksize (`int`, defaults to `None`) + Maximum size for RecordBatch chunks. Individual chunks may be smaller depending + on the chunk layout of individual columns. + + Returns: + `pyarrow.RecordBatchReader` + """ + return self.table.to_reader(max_chunksize=max_chunksize) + + def field(self, *args, **kwargs): + """ + Select a schema field by its column name or numeric index. + + Args: + i (`Union[int, str]`): + The index or name of the field to retrieve. + + Returns: + `pyarrow.Field` + """ + return self.table.field(*args, **kwargs) + + def column(self, *args, **kwargs): + """ + Select a column by its column name, or numeric index. + + Args: + i (`Union[int, str]`): + The index or name of the column to retrieve. + + Returns: + `pyarrow.ChunkedArray` + """ + return self.table.column(*args, **kwargs) + + def itercolumns(self, *args, **kwargs): + """ + Iterator over all columns in their numerical order. + + Yields: + `pyarrow.ChunkedArray` + """ + return self.table.itercolumns(*args, **kwargs) + + @property + def schema(self): + """ + Schema of the table and its columns. + + Returns: + `pyarrow.Schema` + """ + return self.table.schema + + @property + def columns(self): + """ + List of all columns in numerical order. + + Returns: + `List[pa.ChunkedArray]` + """ + return self.table.columns + + @property + def num_columns(self): + """ + Number of columns in this table. + + Returns: + int + """ + return self.table.num_columns + + @property + def num_rows(self): + """ + Number of rows in this table. + + Due to the definition of a table, all columns have the same number of + rows. + + Returns: + int + """ + return self.table.num_rows + + @property + def shape(self): + """ + Dimensions of the table: (#rows, #columns). + + Returns: + `(int, int)`: Number of rows and number of columns. + """ + return self.table.shape + + @property + def nbytes(self): + """ + Total number of bytes consumed by the elements of the table. + """ + return self.table.nbytes + + @property + def column_names(self): + """ + Names of the table's columns. + """ + return self.table.column_names + + def __eq__(self, other): + return self.equals(other) + + def __getitem__(self, i): + return self.table[i] + + def __len__(self): + return len(self.table) + + def __repr__(self): + return self.table.__repr__().replace("pyarrow.Table", self.__class__.__name__) + + def __str__(self): + return self.table.__str__().replace("pyarrow.Table", self.__class__.__name__) + + def slice(self, *args, **kwargs): + """ + Compute zero-copy slice of this Table. + + Args: + offset (`int`, defaults to `0`): + Offset from start of table to slice. + length (`int`, defaults to `None`): + Length of slice (default is until end of table starting from + offset). + + Returns: + `datasets.table.Table` + """ + raise NotImplementedError() + + def filter(self, *args, **kwargs): + """ + Select records from a Table. See `pyarrow.compute.filter` for full usage. + """ + raise NotImplementedError() + + def flatten(self, *args, **kwargs): + """ + Flatten this Table. Each column with a struct type is flattened + into one column per struct field. Other columns are left unchanged. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + raise NotImplementedError() + + def combine_chunks(self, *args, **kwargs): + """ + Make a new table by combining the chunks this table has. + + All the underlying chunks in the `ChunkedArray` of each column are + concatenated into zero or one chunk. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + raise NotImplementedError() + + def cast(self, *args, **kwargs): + """ + Cast table values to another schema. + + Args: + target_schema (`Schema`): + Schema to cast to, the names and order of fields must match. + safe (`bool`, defaults to `True`): + Check for overflows or other unsafe conversions. + + Returns: + `datasets.table.Table` + """ + raise NotImplementedError() + + def replace_schema_metadata(self, *args, **kwargs): + """ + EXPERIMENTAL: Create shallow copy of table by replacing schema + key-value metadata with the indicated new metadata (which may be None, + which deletes any existing metadata + + Args: + metadata (`dict`, defaults to `None`): + + Returns: + `datasets.table.Table`: shallow_copy + """ + raise NotImplementedError() + + def add_column(self, *args, **kwargs): + """ + Add column to Table at position. + + A new table is returned with the column added, the original table + object is left unchanged. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: New table with the passed column added. + """ + raise NotImplementedError() + + def append_column(self, *args, **kwargs): + """ + Append column at end of columns. + + Args: + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: New table with the passed column added. + """ + raise NotImplementedError() + + def remove_column(self, *args, **kwargs): + """ + Create new Table with the indicated column removed. + + Args: + i (`int`): + Index of column to remove. + + Returns: + `datasets.table.Table`: New table without the column. + """ + raise NotImplementedError() + + def set_column(self, *args, **kwargs): + """ + Replace column in Table at position. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: New table with the passed column set. + """ + raise NotImplementedError() + + def rename_columns(self, *args, **kwargs): + """ + Create new table with columns renamed to provided names. + """ + raise NotImplementedError() + + def drop(self, *args, **kwargs): + """ + Drop one or more columns and return a new table. + + Args: + columns (`List[str]`): + List of field names referencing existing columns. + + Raises: + `KeyError` : if any of the passed columns name are not existing. + + Returns: + `datasets.table.Table`: New table without the columns. + """ + raise NotImplementedError() + + def select(self, *args, **kwargs): + """ + Select columns of the table. + + Returns a new table with the specified columns, and metadata preserved. + + Args: + columns (:obj:`Union[List[str], List[int]]`): + The column names or integer indices to select. + + Returns: + `datasets.table.Table`: table with only a subset of the columns + """ + raise NotImplementedError() + + +class TableBlock(Table): + """ + `TableBlock` is the allowed class inside a `ConcanetationTable`. + Only `MemoryMappedTable` and `InMemoryTable` are `TableBlock`. + This is because we don't want a `ConcanetationTable` made out of other `ConcanetationTables`. + """ + + pass + + +class InMemoryTable(TableBlock): + """ + The table is said in-memory when it is loaded into the user's RAM. + + Pickling it does copy all the data using memory. + Its implementation is simple and uses the underlying pyarrow Table methods directly. + + This is different from the `MemoryMapped` table, for which pickling doesn't copy all the + data in memory. For a `MemoryMapped`, unpickling instead reloads the table from the disk. + + `InMemoryTable` must be used when data fit in memory, while `MemoryMapped` are reserved for + data bigger than memory or when you want the memory footprint of your application to + stay low. + """ + + @classmethod + def from_file(cls, filename: str): + table = _in_memory_arrow_table_from_file(filename) + return cls(table) + + @classmethod + def from_buffer(cls, buffer: pa.Buffer): + table = _in_memory_arrow_table_from_buffer(buffer) + return cls(table) + + @classmethod + def from_pandas(cls, *args, **kwargs): + """ + Convert pandas.DataFrame to an Arrow Table. + + The column types in the resulting Arrow Table are inferred from the + dtypes of the pandas.Series in the DataFrame. In the case of non-object + Series, the NumPy dtype is translated to its Arrow equivalent. In the + case of `object`, we need to guess the datatype by looking at the + Python objects in this Series. + + Be aware that Series of the `object` dtype don't carry enough + information to always lead to a meaningful Arrow type. In the case that + we cannot infer a type, e.g. because the DataFrame is of length 0 or + the Series only contains `None/nan` objects, the type is set to + null. This behavior can be avoided by constructing an explicit schema + and passing it to this function. + + Args: + df (`pandas.DataFrame`): + schema (`pyarrow.Schema`, *optional*): + The expected schema of the Arrow Table. This can be used to + indicate the type of columns if we cannot infer it automatically. + If passed, the output will have exactly this schema. Columns + specified in the schema that are not found in the DataFrame columns + or its index will raise an error. Additional columns or index + levels in the DataFrame which are not specified in the schema will + be ignored. + preserve_index (`bool`, *optional*): + Whether to store the index as an additional column in the resulting + `Table`. The default of None will store the index as a column, + except for RangeIndex which is stored as metadata only. Use + `preserve_index=True` to force it to be stored as a column. + nthreads (`int`, defaults to `None` (may use up to system CPU count threads)) + If greater than 1, convert columns to Arrow in parallel using + indicated number of threads. + columns (`List[str]`, *optional*): + List of column to be converted. If `None`, use all columns. + safe (`bool`, defaults to `True`): + Check for overflows or other unsafe conversions, + + Returns: + `datasets.table.Table`: + + Examples: + ```python + >>> import pandas as pd + >>> import pyarrow as pa + >>> df = pd.DataFrame({ + ... 'int': [1, 2], + ... 'str': ['a', 'b'] + ... }) + >>> pa.Table.from_pandas(df) + + ``` + """ + return cls(pa.Table.from_pandas(*args, **kwargs)) + + @classmethod + def from_arrays(cls, *args, **kwargs): + """ + Construct a Table from Arrow arrays. + + Args: + arrays (`List[Union[pyarrow.Array, pyarrow.ChunkedArray]]`): + Equal-length arrays that should form the table. + names (`List[str]`, *optional*): + Names for the table columns. If not passed, schema must be passed. + schema (`Schema`, defaults to `None`): + Schema for the created table. If not passed, names must be passed. + metadata (`Union[dict, Mapping]`, defaults to `None`): + Optional metadata for the schema (if inferred). + + Returns: + `datasets.table.Table` + """ + return cls(pa.Table.from_arrays(*args, **kwargs)) + + @classmethod + def from_pydict(cls, *args, **kwargs): + """ + Construct a Table from Arrow arrays or columns. + + Args: + mapping (`Union[dict, Mapping]`): + A mapping of strings to Arrays or Python lists. + schema (`Schema`, defaults to `None`): + If not passed, will be inferred from the Mapping values + metadata (`Union[dict, Mapping]`, defaults to `None`): + Optional metadata for the schema (if inferred). + + Returns: + `datasets.table.Table` + """ + return cls(pa.Table.from_pydict(*args, **kwargs)) + + @classmethod + def from_pylist(cls, mapping, *args, **kwargs): + """ + Construct a Table from list of rows / dictionaries. + + Args: + mapping (`List[dict]`): + A mapping of strings to row values. + schema (`Schema`, defaults to `None`): + If not passed, will be inferred from the Mapping values + metadata (`Union[dict, Mapping]`, defaults to `None`): + Optional metadata for the schema (if inferred). + + Returns: + `datasets.table.Table` + """ + return cls(pa.Table.from_pylist(mapping, *args, **kwargs)) + + @classmethod + def from_batches(cls, *args, **kwargs): + """ + Construct a Table from a sequence or iterator of Arrow `RecordBatches`. + + Args: + batches (`Union[Sequence[pyarrow.RecordBatch], Iterator[pyarrow.RecordBatch]]`): + Sequence of `RecordBatch` to be converted, all schemas must be equal. + schema (`Schema`, defaults to `None`): + If not passed, will be inferred from the first `RecordBatch`. + + Returns: + `datasets.table.Table`: + """ + return cls(pa.Table.from_batches(*args, **kwargs)) + + def slice(self, offset=0, length=None): + """ + Compute zero-copy slice of this Table. + + Args: + offset (`int`, defaults to `0`): + Offset from start of table to slice. + length (`int`, defaults to `None`): + Length of slice (default is until end of table starting from + offset). + + Returns: + `datasets.table.Table` + """ + # Use fast slicing here + return InMemoryTable(self.fast_slice(offset=offset, length=length)) + + def filter(self, *args, **kwargs): + """ + Select records from a Table. See `pyarrow.compute.filter` for full usage. + """ + return InMemoryTable(self.table.filter(*args, **kwargs)) + + def flatten(self, *args, **kwargs): + """ + Flatten this Table. Each column with a struct type is flattened + into one column per struct field. Other columns are left unchanged. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + return InMemoryTable(table_flatten(self.table, *args, **kwargs)) + + def combine_chunks(self, *args, **kwargs): + """ + Make a new table by combining the chunks this table has. + + All the underlying chunks in the `ChunkedArray` of each column are + concatenated into zero or one chunk. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + return InMemoryTable(self.table.combine_chunks(*args, **kwargs)) + + def cast(self, *args, **kwargs): + """ + Cast table values to another schema. + + Args: + target_schema (`Schema`): + Schema to cast to, the names and order of fields must match. + safe (`bool`, defaults to `True`): + Check for overflows or other unsafe conversions. + + Returns: + `datasets.table.Table` + """ + return InMemoryTable(table_cast(self.table, *args, **kwargs)) + + def replace_schema_metadata(self, *args, **kwargs): + """ + EXPERIMENTAL: Create shallow copy of table by replacing schema + key-value metadata with the indicated new metadata (which may be `None`, + which deletes any existing metadata). + + Args: + metadata (`dict`, defaults to `None`): + + Returns: + `datasets.table.Table`: shallow_copy + """ + return InMemoryTable(self.table.replace_schema_metadata(*args, **kwargs)) + + def add_column(self, *args, **kwargs): + """ + Add column to Table at position. + + A new table is returned with the column added, the original table + object is left unchanged. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: New table with the passed column added. + """ + return InMemoryTable(self.table.add_column(*args, **kwargs)) + + def append_column(self, *args, **kwargs): + """ + Append column at end of columns. + + Args: + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: + New table with the passed column added. + """ + return InMemoryTable(self.table.append_column(*args, **kwargs)) + + def remove_column(self, *args, **kwargs): + """ + Create new Table with the indicated column removed. + + Args: + i (`int`): + Index of column to remove. + + Returns: + `datasets.table.Table`: + New table without the column. + """ + return InMemoryTable(self.table.remove_column(*args, **kwargs)) + + def set_column(self, *args, **kwargs): + """ + Replace column in Table at position. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: + New table with the passed column set. + """ + return InMemoryTable(self.table.set_column(*args, **kwargs)) + + def rename_columns(self, *args, **kwargs): + """ + Create new table with columns renamed to provided names. + """ + return InMemoryTable(self.table.rename_columns(*args, **kwargs)) + + def drop(self, *args, **kwargs): + """ + Drop one or more columns and return a new table. + + Args: + columns (`List[str]`): + List of field names referencing existing columns. + + Raises: + `KeyError` : if any of the passed columns name are not existing. + + Returns: + `datasets.table.Table`: + New table without the columns. + """ + return InMemoryTable(self.table.drop(*args, **kwargs)) + + def select(self, *args, **kwargs): + """ + Select columns of the table. + + Returns a new table with the specified columns, and metadata preserved. + + Args: + columns (:obj:`Union[List[str], List[int]]`): + The column names or integer indices to select. + + Returns: + :class:`datasets.table.Table`: New table with the specified columns, and metadata preserved. + """ + return InMemoryTable(self.table.select(*args, **kwargs)) + + +# The MemoryMappedTable needs replays to properly reload tables from the disk +Replay = Tuple[str, tuple, dict] + + +class MemoryMappedTable(TableBlock): + """ + The table is said memory mapped when it doesn't use the user's RAM but loads the data + from the disk instead. + + Pickling it doesn't copy the data into memory. + Instead, only the path to the memory mapped arrow file is pickled, as well as the list + of transforms to "replay" when reloading the table from the disk. + + Its implementation requires to store an history of all the transforms that were applied + to the underlying pyarrow Table, so that they can be "replayed" when reloading the Table + from the disk. + + This is different from the `InMemoryTable` table, for which pickling does copy all the + data in memory. + + `InMemoryTable` must be used when data fit in memory, while `MemoryMapped` are reserved for + data bigger than memory or when you want the memory footprint of your application to + stay low. + """ + + def __init__(self, table: pa.Table, path: str, replays: Optional[List[Replay]] = None): + super().__init__(table) + self.path = os.path.abspath(path) + self.replays: List[Replay] = replays if replays is not None else [] + + @classmethod + def from_file(cls, filename: str, replays=None): + table = _memory_mapped_arrow_table_from_file(filename) + table = cls._apply_replays(table, replays) + return cls(table, filename, replays) + + def __getstate__(self): + return {"path": self.path, "replays": self.replays} + + def __setstate__(self, state): + path = state["path"] + replays = state["replays"] + table = _memory_mapped_arrow_table_from_file(path) + table = self._apply_replays(table, replays) + MemoryMappedTable.__init__(self, table, path=path, replays=replays) + + @staticmethod + def _apply_replays(table: pa.Table, replays: Optional[List[Replay]] = None) -> pa.Table: + if replays is not None: + for name, args, kwargs in replays: + if name == "cast": + table = table_cast(table, *args, **kwargs) + elif name == "flatten": + table = table_flatten(table, *args, **kwargs) + else: + table = getattr(table, name)(*args, **kwargs) + return table + + def _append_replay(self, replay: Replay) -> List[Replay]: + replays = copy.deepcopy(self.replays) + replays.append(replay) + return replays + + def slice(self, offset=0, length=None): + """ + Compute zero-copy slice of this Table. + + Args: + offset (`int`, defaults to `0`): + Offset from start of table to slice. + length (`int`, defaults to `None`): + Length of slice (default is until end of table starting from + offset). + + Returns: + `datasets.table.Table` + """ + replay = ("slice", (offset, length), {}) + replays = self._append_replay(replay) + # Use fast slicing here + return MemoryMappedTable(self.fast_slice(offset=offset, length=length), self.path, replays) + + def filter(self, *args, **kwargs): + """ + Select records from a Table. See `pyarrow.compute.filter` for full usage. + """ + replay = ("filter", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.filter(*args, **kwargs), self.path, replays) + + def flatten(self, *args, **kwargs): + """ + Flatten this Table. Each column with a struct type is flattened + into one column per struct field. Other columns are left unchanged. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + replay = ("flatten", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(table_flatten(self.table, *args, **kwargs), self.path, replays) + + def combine_chunks(self, *args, **kwargs): + """ + Make a new table by combining the chunks this table has. + + All the underlying chunks in the ChunkedArray of each column are + concatenated into zero or one chunk. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + replay = ("combine_chunks", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.combine_chunks(*args, **kwargs), self.path, replays) + + def cast(self, *args, **kwargs): + """ + Cast table values to another schema + + Args: + target_schema (`Schema`): + Schema to cast to, the names and order of fields must match. + safe (`bool`, defaults to `True`): + Check for overflows or other unsafe conversions. + + Returns: + `datasets.table.Table` + """ + replay = ("cast", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(table_cast(self.table, *args, **kwargs), self.path, replays) + + def replace_schema_metadata(self, *args, **kwargs): + """ + EXPERIMENTAL: Create shallow copy of table by replacing schema + key-value metadata with the indicated new metadata (which may be None, + which deletes any existing metadata. + + Args: + metadata (`dict`, defaults to `None`): + + Returns: + `datasets.table.Table`: shallow_copy + """ + replay = ("replace_schema_metadata", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.replace_schema_metadata(*args, **kwargs), self.path, replays) + + def add_column(self, *args, **kwargs): + """ + Add column to Table at position. + + A new table is returned with the column added, the original table + object is left unchanged. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: New table with the passed column added. + """ + replay = ("add_column", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.add_column(*args, **kwargs), self.path, replays) + + def append_column(self, *args, **kwargs): + """ + Append column at end of columns. + + Args: + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: + New table with the passed column added. + """ + replay = ("append_column", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.append_column(*args, **kwargs), self.path, replays) + + def remove_column(self, *args, **kwargs): + """ + Create new Table with the indicated column removed. + + Args: + i (`int`): + Index of column to remove. + + Returns: + `datasets.table.Table`: + New table without the column. + """ + replay = ("remove_column", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.remove_column(*args, **kwargs), self.path, replays) + + def set_column(self, *args, **kwargs): + """ + Replace column in Table at position. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: + New table with the passed column set. + """ + replay = ("set_column", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.set_column(*args, **kwargs), self.path, replays) + + def rename_columns(self, *args, **kwargs): + """ + Create new table with columns renamed to provided names. + """ + replay = ("rename_columns", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.rename_columns(*args, **kwargs), self.path, replays) + + def drop(self, *args, **kwargs): + """ + Drop one or more columns and return a new table. + + Args: + columns (`List[str]`): + List of field names referencing existing columns. + + Raises: + `KeyError` : if any of the passed columns name are not existing. + + Returns: + `datasets.table.Table`: + New table without the columns. + """ + replay = ("drop", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.drop(*args, **kwargs), self.path, replays) + + def select(self, *args, **kwargs): + """ + Select columns of the table. + + Returns a new table with the specified columns, and metadata preserved. + + Args: + columns (:obj:`Union[List[str], List[int]]`): + The column names or integer indices to select. + + Returns: + :class:`datasets.table.Table`: New table with the specified columns, and metadata preserved. + """ + replay = ("select", copy.deepcopy(args), copy.deepcopy(kwargs)) + replays = self._append_replay(replay) + return MemoryMappedTable(self.table.select(*args, **kwargs), self.path, replays) + + +# A ConcatenationTable is the concatenation of several tables. +# The ``blocks`` attributes stores a list of list of blocks. +# The first axis concatenates the tables along the axis 0 (it appends rows), +# while the second axis concatenates tables along the axis 1 (it appends columns). +TableBlockContainer = TypeVar("TableBlockContainer", TableBlock, List[TableBlock], List[List[TableBlock]]) + + +class ConcatenationTable(Table): + """ + The table comes from the concatenation of several tables called blocks. + It enables concatenation on both axis 0 (append rows) and axis 1 (append columns). + + The underlying tables are called "blocks" and can be either `InMemoryTable` + or `MemoryMappedTable` objects. + This allows to combine tables that come from memory or that are memory mapped. + When a `ConcatenationTable` is pickled, then each block is pickled: + - the `InMemoryTable` objects are pickled by copying all the data in memory. + - the MemoryMappedTable objects are pickled without copying the data into memory. + Instead, only the path to the memory mapped arrow file is pickled, as well as the list + of transforms to "replays" when reloading the table from the disk. + + Its implementation requires to store each block separately. + The `blocks` attributes stores a list of list of blocks. + The first axis concatenates the tables along the axis 0 (it appends rows), + while the second axis concatenates tables along the axis 1 (it appends columns). + + If some columns are missing when concatenating on axis 0, they are filled with null values. + This is done using `pyarrow.concat_tables(tables, promote=True)`. + + You can access the fully combined table by accessing the `ConcatenationTable.table` attribute, + and the blocks by accessing the `ConcatenationTable.blocks` attribute. + """ + + def __init__(self, table: pa.Table, blocks: List[List[TableBlock]]): + super().__init__(table) + self.blocks = blocks + # Check that all the blocks have the right type. + # Only InMemoryTable and MemoryMappedTable are allowed. + for subtables in blocks: + for subtable in subtables: + if not isinstance(subtable, TableBlock): + raise TypeError( + "The blocks of a ConcatenationTable must be InMemoryTable or MemoryMappedTable objects" + f", but got {_short_str(subtable)}." + ) + + def __getstate__(self): + return {"blocks": self.blocks, "schema": self.table.schema} + + def __setstate__(self, state): + blocks = state["blocks"] + schema = state["schema"] + table = self._concat_blocks_horizontally_and_vertically(blocks) + if schema is not None and table.schema != schema: + # We fix the columns by concatenating with an empty table with the right columns + empty_table = pa.Table.from_batches([], schema=schema) + # we set promote=True to fill missing columns with null values + if config.PYARROW_VERSION.major < 14: + table = pa.concat_tables([table, empty_table], promote=True) + else: + table = pa.concat_tables([table, empty_table], promote_options="default") + ConcatenationTable.__init__(self, table, blocks=blocks) + + @staticmethod + def _concat_blocks(blocks: List[Union[TableBlock, pa.Table]], axis: int = 0) -> pa.Table: + pa_tables = [table.table if hasattr(table, "table") else table for table in blocks] + if axis == 0: + # we set promote=True to fill missing columns with null values + if config.PYARROW_VERSION.major < 14: + return pa.concat_tables(pa_tables, promote=True) + else: + return pa.concat_tables(pa_tables, promote_options="default") + elif axis == 1: + for i, table in enumerate(pa_tables): + if i == 0: + pa_table = table + else: + for name, col in zip(table.column_names, table.columns): + pa_table = pa_table.append_column(name, col) + return pa_table + else: + raise ValueError("'axis' must be either 0 or 1") + + @classmethod + def _concat_blocks_horizontally_and_vertically(cls, blocks: List[List[TableBlock]]) -> pa.Table: + pa_tables_to_concat_vertically = [] + for i, tables in enumerate(blocks): + if not tables: + continue + pa_table_horizontally_concatenated = cls._concat_blocks(tables, axis=1) + pa_tables_to_concat_vertically.append(pa_table_horizontally_concatenated) + return cls._concat_blocks(pa_tables_to_concat_vertically, axis=0) + + @classmethod + def _merge_blocks(cls, blocks: TableBlockContainer, axis: Optional[int] = None) -> TableBlockContainer: + if axis is not None: + merged_blocks = [] + for is_in_memory, block_group in groupby(blocks, key=lambda x: isinstance(x, InMemoryTable)): + if is_in_memory: + block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))] + merged_blocks += list(block_group) + else: # both + merged_blocks = [cls._merge_blocks(row_block, axis=1) for row_block in blocks] + if all(len(row_block) == 1 for row_block in merged_blocks): + merged_blocks = cls._merge_blocks( + [block for row_block in merged_blocks for block in row_block], axis=0 + ) + return merged_blocks + + @classmethod + def _consolidate_blocks(cls, blocks: TableBlockContainer) -> TableBlockContainer: + if isinstance(blocks, TableBlock): + return blocks + elif isinstance(blocks[0], TableBlock): + return cls._merge_blocks(blocks, axis=0) + else: + return cls._merge_blocks(blocks) + + @classmethod + def from_blocks(cls, blocks: TableBlockContainer) -> "ConcatenationTable": + blocks = cls._consolidate_blocks(blocks) + if isinstance(blocks, TableBlock): + table = blocks + return cls(table.table, [[table]]) + elif isinstance(blocks[0], TableBlock): + table = cls._concat_blocks(blocks, axis=0) + blocks = [[t] for t in blocks] + return cls(table, blocks) + else: + table = cls._concat_blocks_horizontally_and_vertically(blocks) + return cls(table, blocks) + + @classmethod + def from_tables(cls, tables: List[Union[pa.Table, Table]], axis: int = 0) -> "ConcatenationTable": + """Create `ConcatenationTable` from list of tables. + + Args: + tables (list of `Table` or list of `pyarrow.Table`): + List of tables. + axis (`{0, 1}`, defaults to `0`, meaning over rows): + Axis to concatenate over, where `0` means over rows (vertically) and `1` means over columns + (horizontally). + + + """ + + def to_blocks(table: Union[pa.Table, Table]) -> List[List[TableBlock]]: + if isinstance(table, pa.Table): + return [[InMemoryTable(table)]] + elif isinstance(table, ConcatenationTable): + return copy.deepcopy(table.blocks) + else: + return [[table]] + + def _slice_row_block(row_block: List[TableBlock], length: int) -> Tuple[List[TableBlock], List[TableBlock]]: + sliced = [table.slice(0, length) for table in row_block] + remainder = [table.slice(length, len(row_block[0]) - length) for table in row_block] + return sliced, remainder + + def _split_both_like( + result: List[List[TableBlock]], blocks: List[List[TableBlock]] + ) -> Tuple[List[List[TableBlock]], List[List[TableBlock]]]: + """ + Make sure each row_block contain the same num_rows to be able to concatenate them on axis=1. + + To do so, we modify both blocks sets to have the same row_blocks boundaries. + For example, if `result` has 2 row_blocks of 3 rows and `blocks` has 3 row_blocks of 2 rows, + we modify both to have 4 row_blocks of size 2, 1, 1 and 2: + + [ x x x | x x x ] + + [ y y | y y | y y ] + ----------------------------- + = [ x x | x | x | x x ] + [ y y | y | y | y y ] + + """ + result, blocks = list(result), list(blocks) + new_result, new_blocks = [], [] + while result and blocks: + # we slice the longest row block to save two row blocks of same length + # and we replace the long row block by its remainder if necessary + if len(result[0][0]) > len(blocks[0][0]): + new_blocks.append(blocks[0]) + sliced, result[0] = _slice_row_block(result[0], len(blocks.pop(0)[0])) + new_result.append(sliced) + elif len(result[0][0]) < len(blocks[0][0]): + new_result.append(result[0]) + sliced, blocks[0] = _slice_row_block(blocks[0], len(result.pop(0)[0])) + new_blocks.append(sliced) + else: + new_result.append(result.pop(0)) + new_blocks.append(blocks.pop(0)) + if result or blocks: + raise ValueError("Failed to concatenate on axis=1 because tables don't have the same number of rows") + return new_result, new_blocks + + def _extend_blocks( + result: List[List[TableBlock]], blocks: List[List[TableBlock]], axis: int = 0 + ) -> List[List[TableBlock]]: + if axis == 0: + result.extend(blocks) + elif axis == 1: + # We make sure each row_block have the same num_rows + result, blocks = _split_both_like(result, blocks) + for i, row_block in enumerate(blocks): + result[i].extend(row_block) + return result + + blocks = to_blocks(tables[0]) + for table in tables[1:]: + table_blocks = to_blocks(table) + blocks = _extend_blocks(blocks, table_blocks, axis=axis) + return cls.from_blocks(blocks) + + @property + def _slices(self): + offset = 0 + for tables in self.blocks: + length = len(tables[0]) + yield (offset, length) + offset += length + + def slice(self, offset=0, length=None): + """ + Compute zero-copy slice of this Table. + + Args: + offset (`int`, defaults to `0`): + Offset from start of table to slice. + length (`int`, defaults to `None`): + Length of slice (default is until end of table starting from + offset). + + Returns: + `datasets.table.Table` + """ + table = self.table.slice(offset, length=length) + length = length if length is not None else self.num_rows - offset + blocks = [] + for tables in self.blocks: + n_rows = len(tables[0]) + if length == 0: + break + elif n_rows <= offset: + offset = offset - n_rows + elif n_rows <= offset + length: + blocks.append([t.slice(offset) for t in tables]) + length, offset = length + offset - n_rows, 0 + else: + blocks.append([t.slice(offset, length) for t in tables]) + length, offset = 0, 0 + return ConcatenationTable(table, blocks) + + def filter(self, mask, *args, **kwargs): + """ + Select records from a Table. See `pyarrow.compute.filter` for full usage. + """ + table = self.table.filter(mask, *args, **kwargs) + blocks = [] + for (offset, length), tables in zip(self._slices, self.blocks): + submask = mask.slice(offset, length) + blocks.append([t.filter(submask, *args, **kwargs) for t in tables]) + return ConcatenationTable(table, blocks) + + def flatten(self, *args, **kwargs): + """ + Flatten this Table. Each column with a struct type is flattened + into one column per struct field. Other columns are left unchanged. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + table = table_flatten(self.table, *args, **kwargs) + blocks = [] + for tables in self.blocks: + blocks.append([t.flatten(*args, **kwargs) for t in tables]) + return ConcatenationTable(table, blocks) + + def combine_chunks(self, *args, **kwargs): + """ + Make a new table by combining the chunks this table has. + + All the underlying chunks in the `ChunkedArray` of each column are + concatenated into zero or one chunk. + + Args: + memory_pool (`MemoryPool`, defaults to `None`): + For memory allocations, if required, otherwise use default pool. + + Returns: + `datasets.table.Table` + """ + table = self.table.combine_chunks(*args, **kwargs) + blocks = [] + for tables in self.blocks: + blocks.append([t.combine_chunks(*args, **kwargs) for t in tables]) + return ConcatenationTable(table, blocks) + + def cast(self, target_schema, *args, **kwargs): + """ + Cast table values to another schema. + + Args: + target_schema (`Schema`): + Schema to cast to, the names and order of fields must match. + safe (`bool`, defaults to `True`): + Check for overflows or other unsafe conversions. + + Returns: + `datasets.table.Table` + """ + from .features import Features + + table = table_cast(self.table, target_schema, *args, **kwargs) + target_features = Features.from_arrow_schema(target_schema) + blocks = [] + for subtables in self.blocks: + new_tables = [] + fields = list(target_schema) + for subtable in subtables: + subfields = [] + for name in subtable.column_names: + subfields.append(fields.pop(next(i for i, field in enumerate(fields) if field.name == name))) + subfeatures = Features({subfield.name: target_features[subfield.name] for subfield in subfields}) + subschema = subfeatures.arrow_schema + new_tables.append(subtable.cast(subschema, *args, **kwargs)) + blocks.append(new_tables) + return ConcatenationTable(table, blocks) + + def replace_schema_metadata(self, *args, **kwargs): + """ + EXPERIMENTAL: Create shallow copy of table by replacing schema + key-value metadata with the indicated new metadata (which may be `None`, + which deletes any existing metadata). + + Args: + metadata (`dict`, defaults to `None`): + + Returns: + `datasets.table.Table`: shallow_copy + """ + table = self.table.replace_schema_metadata(*args, **kwargs) + blocks = [] + for tables in self.blocks: + blocks.append([t.replace_schema_metadata(*args, **kwargs) for t in tables]) + return ConcatenationTable(table, self.blocks) + + def add_column(self, *args, **kwargs): + """ + Add column to Table at position. + + A new table is returned with the column added, the original table + object is left unchanged. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: New table with the passed column added. + """ + raise NotImplementedError() + + def append_column(self, *args, **kwargs): + """ + Append column at end of columns. + + Args: + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: + New table with the passed column added. + """ + raise NotImplementedError() + + def remove_column(self, i, *args, **kwargs): + """ + Create new Table with the indicated column removed. + + Args: + i (`int`): + Index of column to remove. + + Returns: + `datasets.table.Table`: + New table without the column. + """ + table = self.table.remove_column(i, *args, **kwargs) + name = self.table.column_names[i] + blocks = [] + for tables in self.blocks: + blocks.append( + [ + t.remove_column(t.column_names.index(name), *args, **kwargs) if name in t.column_names else t + for t in tables + ] + ) + return ConcatenationTable(table, blocks) + + def set_column(self, *args, **kwargs): + """ + Replace column in Table at position. + + Args: + i (`int`): + Index to place the column at. + field_ (`Union[str, pyarrow.Field]`): + If a string is passed then the type is deduced from the column + data. + column (`Union[pyarrow.Array, List[pyarrow.Array]]`): + Column data. + + Returns: + `datasets.table.Table`: + New table with the passed column set. + """ + raise NotImplementedError() + + def rename_columns(self, names, *args, **kwargs): + """ + Create new table with columns renamed to provided names. + """ + table = self.table.rename_columns(names, *args, **kwargs) + names = dict(zip(self.table.column_names, names)) + blocks = [] + for tables in self.blocks: + blocks.append( + [t.rename_columns([names[name] for name in t.column_names], *args, **kwargs) for t in tables] + ) + return ConcatenationTable(table, blocks) + + def drop(self, columns, *args, **kwargs): + """ + Drop one or more columns and return a new table. + + Args: + columns (`List[str]`): + List of field names referencing existing columns. + + Raises: + `KeyError` : if any of the passed columns name are not existing. + + Returns: + `datasets.table.Table`: + New table without the columns. + """ + table = self.table.drop(columns, *args, **kwargs) + blocks = [] + for tables in self.blocks: + blocks.append([t.drop([c for c in columns if c in t.column_names], *args, **kwargs) for t in tables]) + return ConcatenationTable(table, blocks) + + def select(self, columns, *args, **kwargs): + """ + Select columns of the table. + + Returns a new table with the specified columns, and metadata preserved. + + Args: + columns (:obj:`Union[List[str], List[int]]`): + The column names or integer indices to select. + + Returns: + :class:`datasets.table.Table`: New table with the specified columns, and metadata preserved. + """ + table = self.table.select(columns, *args, **kwargs) + blocks = [] + for tables in self.blocks: + blocks.append([t.select([c for c in columns if c in t.column_names], *args, **kwargs) for t in tables]) + return ConcatenationTable(table, blocks) + + +def concat_tables(tables: List[Table], axis: int = 0) -> Table: + """ + Concatenate tables. + + Args: + tables (list of `Table`): + List of tables to be concatenated. + axis (`{0, 1}`, defaults to `0`, meaning over rows): + Axis to concatenate over, where `0` means over rows (vertically) and `1` means over columns + (horizontally). + + + Returns: + `datasets.table.Table`: + If the number of input tables is > 1, then the returned table is a `datasets.table.ConcatenationTable`. + Otherwise if there's only one table, it is returned as is. + """ + tables = list(tables) + if len(tables) == 1: + return tables[0] + return ConcatenationTable.from_tables(tables, axis=axis) + + +def list_table_cache_files(table: Table) -> List[str]: + """ + Get the cache files that are loaded by the table. + Cache file are used when parts of the table come from the disk via memory mapping. + + Returns: + `List[str]`: + A list of paths to the cache files loaded by the table. + """ + if isinstance(table, ConcatenationTable): + cache_files = [] + for subtables in table.blocks: + for subtable in subtables: + cache_files += list_table_cache_files(subtable) + return cache_files + elif isinstance(table, MemoryMappedTable): + return [table.path] + else: + return [] + + +def _wrap_for_chunked_arrays(func): + """Apply the function on each chunk of a `pyarrow.ChunkedArray`, or on the array directly""" + + def wrapper(array, *args, **kwargs): + if isinstance(array, pa.ChunkedArray): + return pa.chunked_array([func(chunk, *args, **kwargs) for chunk in array.chunks]) + else: + return func(array, *args, **kwargs) + + return wrapper + + +def _are_list_values_of_length(array: pa.ListArray, length: int) -> bool: + """Check if all the sub-lists of a `pa.ListArray` have the specified length.""" + return pc.all(pc.equal(array.value_lengths(), length)).as_py() or array.null_count == len(array) + + +def _combine_list_array_offsets_with_mask(array: pa.ListArray) -> pa.Array: + """Add the null bitmap to the offsets of a `pa.ListArray`.""" + offsets = array.offsets + if array.null_count > 0: + offsets = pa.concat_arrays( + [ + pc.replace_with_mask(offsets[:-1], array.is_null(), pa.nulls(len(array), pa.int32())), + offsets[-1:], + ] + ) + return offsets + + +def _storage_type(type: pa.DataType) -> pa.DataType: + """Convert a (possibly nested) `pa.ExtensionType` to its storage type.""" + if isinstance(type, pa.ExtensionType): + return _storage_type(type.storage_type) + elif isinstance(type, pa.StructType): + return pa.struct([pa.field(field.name, _storage_type(field.type)) for field in type]) + elif isinstance(type, pa.ListType): + return pa.list_(_storage_type(type.value_type)) + elif isinstance(type, pa.FixedSizeListType): + return pa.list_(_storage_type(type.value_type), type.list_size) + return type + + +def _short_str(value: Any) -> str: + out = str(value) + if len(out) > 3000: + out = out[:1500] + "\n...\n" + out[-1500:] + return out + + +@_wrap_for_chunked_arrays +def array_cast( + array: pa.Array, pa_type: pa.DataType, allow_primitive_to_str: bool = True, allow_decimal_to_str: bool = True +) -> Union[pa.Array, pa.FixedSizeListArray, pa.ListArray, pa.StructArray, pa.ExtensionArray]: + """Improved version of `pa.Array.cast` + + It supports casting `pa.StructArray` objects to re-order the fields. + It also let you control certain aspects of the casting, e.g. whether + to disable casting primitives (`booleans`, `floats` or `ints`) or + disable casting decimals to strings. + + Args: + array (`pa.Array`): + PyArrow array to cast + pa_type (`pa.DataType`): + Target PyArrow type + allow_primitive_to_str (`bool`, defaults to `True`): + Whether to allow casting primitives to strings. + Defaults to `True`. + allow_decimal_to_str (`bool`, defaults to `True`): + Whether to allow casting decimals to strings. + Defaults to `True`. + + Raises: + `pa.ArrowInvalidError`: if the arrow data casting fails + `TypeError`: if the target type is not supported according, e.g. + + - if a field is missing + - if casting from primitives to strings and `allow_primitive_to_str` is `False` + - if casting from decimals to strings and `allow_decimal_to_str` is `False` + + Returns: + `List[pyarrow.Array]`: the casted array + """ + _c = partial(array_cast, allow_primitive_to_str=allow_primitive_to_str, allow_decimal_to_str=allow_decimal_to_str) + if isinstance(array, pa.ExtensionArray): + array = array.storage + if isinstance(pa_type, pa.ExtensionType): + return pa_type.wrap_array(_c(array, pa_type.storage_type)) + elif array.type == pa_type: + return array + elif pa.types.is_struct(array.type): + if pa.types.is_struct(pa_type) and ({field.name for field in pa_type} == {field.name for field in array.type}): + if array.type.num_fields == 0: + return array + arrays = [_c(array.field(field.name), field.type) for field in pa_type] + return pa.StructArray.from_arrays(arrays, fields=list(pa_type), mask=array.is_null()) + elif pa.types.is_list(array.type): + if pa.types.is_fixed_size_list(pa_type): + if _are_list_values_of_length(array, pa_type.list_size): + if array.null_count > 0: + # Ensure each null value in the array translates to [null] * pa_type.list_size in the array's values array + array_type = array.type + storage_type = _storage_type(array_type) + if array_type != storage_type: + # Temporarily convert to the storage type to support extension types in the slice operation + array = _c(array, storage_type) + array = pc.list_slice(array, 0, pa_type.list_size, return_fixed_size_list=True) + array = _c(array, array_type) + else: + array = pc.list_slice(array, 0, pa_type.list_size, return_fixed_size_list=True) + array_values = array.values + if config.PYARROW_VERSION.major < 15: + return pa.Array.from_buffers( + pa_type, + len(array), + [array.is_valid().buffers()[1]], + children=[_c(array_values, pa_type.value_type)], + ) + else: + return pa.FixedSizeListArray.from_arrays( + _c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null() + ) + else: + array_values = array.values[ + array.offset * pa_type.length : (array.offset + len(array)) * pa_type.length + ] + return pa.FixedSizeListArray.from_arrays(_c(array_values, pa_type.value_type), pa_type.list_size) + elif pa.types.is_list(pa_type): + # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError + array_offsets = _combine_list_array_offsets_with_mask(array) + return pa.ListArray.from_arrays(array_offsets, _c(array.values, pa_type.value_type)) + elif pa.types.is_fixed_size_list(array.type): + if pa.types.is_fixed_size_list(pa_type): + if pa_type.list_size == array.type.list_size: + array_values = array.values[ + array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size + ] + if config.PYARROW_VERSION.major < 15: + return pa.Array.from_buffers( + pa_type, + len(array), + [array.is_valid().buffers()[1]], + children=[_c(array_values, pa_type.value_type)], + ) + else: + return pa.FixedSizeListArray.from_arrays( + _c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null() + ) + elif pa.types.is_list(pa_type): + array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size + return pa.ListArray.from_arrays(array_offsets, _c(array.values, pa_type.value_type), mask=array.is_null()) + else: + if pa.types.is_string(pa_type): + if not allow_primitive_to_str and pa.types.is_primitive(array.type): + raise TypeError( + f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)} " + f"since allow_primitive_to_str is set to {allow_primitive_to_str} " + ) + if not allow_decimal_to_str and pa.types.is_decimal(array.type): + raise TypeError( + f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)} " + f"and allow_decimal_to_str is set to {allow_decimal_to_str}" + ) + if pa.types.is_null(pa_type) and not pa.types.is_null(array.type): + raise TypeError(f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)}") + return array.cast(pa_type) + raise TypeError(f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)}") + + +@_wrap_for_chunked_arrays +def cast_array_to_feature( + array: pa.Array, feature: "FeatureType", allow_primitive_to_str: bool = True, allow_decimal_to_str: bool = True +) -> pa.Array: + """Cast an array to the arrow type that corresponds to the requested feature type. + For custom features like [`Audio`] or [`Image`], it takes into account the "cast_storage" methods + they defined to enable casting from other arrow types. + + Args: + array (`pa.Array`): + The PyArrow array to cast. + feature (`datasets.features.FeatureType`): + The target feature type. + allow_primitive_to_str (`bool`, defaults to `True`): + Whether to allow casting primitives to strings. + Defaults to `True`. + allow_decimal_to_str (`bool`, defaults to `True`): + Whether to allow casting decimals to strings. + Defaults to `True`. + + Raises: + `pa.ArrowInvalidError`: if the arrow data casting fails + `TypeError`: if the target type is not supported according, e.g. + + - if a field is missing + - if casting from primitives and `allow_primitive_to_str` is `False` + - if casting from decimals and `allow_decimal_to_str` is `False` + + Returns: + array (`pyarrow.Array`): the casted array + """ + from .features.features import Sequence, get_nested_type + + _c = partial( + cast_array_to_feature, + allow_primitive_to_str=allow_primitive_to_str, + allow_decimal_to_str=allow_decimal_to_str, + ) + + if isinstance(array, pa.ExtensionArray): + array = array.storage + if hasattr(feature, "cast_storage"): + return feature.cast_storage(array) + + elif pa.types.is_struct(array.type): + # feature must be a dict or Sequence(subfeatures_dict) + if isinstance(feature, Sequence) and isinstance(feature.feature, dict): + feature = { + name: Sequence(subfeature, length=feature.length) for name, subfeature in feature.feature.items() + } + if isinstance(feature, dict) and {field.name for field in array.type} == set(feature): + if array.type.num_fields == 0: + return array + arrays = [_c(array.field(name), subfeature) for name, subfeature in feature.items()] + return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null()) + elif pa.types.is_list(array.type): + # feature must be either [subfeature] or Sequence(subfeature) + if isinstance(feature, list): + casted_array_values = _c(array.values, feature[0]) + if casted_array_values.type == array.values.type: + return array + else: + # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError + array_offsets = _combine_list_array_offsets_with_mask(array) + return pa.ListArray.from_arrays(array_offsets, casted_array_values) + elif isinstance(feature, Sequence): + if feature.length > -1: + if _are_list_values_of_length(array, feature.length): + if array.null_count > 0: + # Ensure each null value in the array translates to [null] * pa_type.list_size in the array's values array + array_type = array.type + storage_type = _storage_type(array_type) + if array_type != storage_type: + # Temporarily convert to the storage type to support extension types in the slice operation + array = array_cast( + array, + storage_type, + allow_primitive_to_str=allow_primitive_to_str, + allow_decimal_to_str=allow_decimal_to_str, + ) + array = pc.list_slice(array, 0, feature.length, return_fixed_size_list=True) + array = array_cast( + array, + array_type, + allow_primitive_to_str=allow_primitive_to_str, + allow_decimal_to_str=allow_decimal_to_str, + ) + else: + array = pc.list_slice(array, 0, feature.length, return_fixed_size_list=True) + array_values = array.values + casted_array_values = _c(array_values, feature.feature) + if config.PYARROW_VERSION.major < 15: + return pa.Array.from_buffers( + pa.list_(casted_array_values.type, feature.length), + len(array), + [array.is_valid().buffers()[1]], + children=[casted_array_values], + ) + else: + return pa.FixedSizeListArray.from_arrays( + casted_array_values, feature.length, mask=array.is_null() + ) + else: + array_values = array.values[ + array.offset * feature.length : (array.offset + len(array)) * feature.length + ] + return pa.FixedSizeListArray.from_arrays(_c(array_values, feature.feature), feature.length) + else: + casted_array_values = _c(array.values, feature.feature) + if casted_array_values.type == array.values.type: + return array + else: + # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError + array_offsets = _combine_list_array_offsets_with_mask(array) + return pa.ListArray.from_arrays(array_offsets, casted_array_values) + elif pa.types.is_fixed_size_list(array.type): + # feature must be either [subfeature] or Sequence(subfeature) + if isinstance(feature, list): + array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size + return pa.ListArray.from_arrays(array_offsets, _c(array.values, feature[0]), mask=array.is_null()) + elif isinstance(feature, Sequence): + if feature.length > -1: + if feature.length == array.type.list_size: + array_values = array.values[ + array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size + ] + casted_array_values = _c(array_values, feature.feature) + if config.PYARROW_VERSION.major < 15: + return pa.Array.from_buffers( + pa.list_(casted_array_values.type, feature.length), + len(array), + [array.is_valid().buffers()[1]], + children=[casted_array_values], + ) + else: + return pa.FixedSizeListArray.from_arrays( + casted_array_values, feature.length, mask=array.is_null() + ) + else: + array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size + return pa.ListArray.from_arrays(array_offsets, _c(array.values, feature.feature), mask=array.is_null()) + if pa.types.is_null(array.type): + return array_cast( + array, + get_nested_type(feature), + allow_primitive_to_str=allow_primitive_to_str, + allow_decimal_to_str=allow_decimal_to_str, + ) + elif not isinstance(feature, (Sequence, dict, list, tuple)): + return array_cast( + array, + feature(), + allow_primitive_to_str=allow_primitive_to_str, + allow_decimal_to_str=allow_decimal_to_str, + ) + raise TypeError(f"Couldn't cast array of type\n{_short_str(array.type)}\nto\n{_short_str(feature)}") + + +@_wrap_for_chunked_arrays +def embed_array_storage(array: pa.Array, feature: "FeatureType"): + """Embed data into an arrays's storage. + For custom features like Audio or Image, it takes into account the "embed_storage" methods + they define to embed external data (e.g. an image file) into an array. + + + + Args: + array (`pa.Array`): + The PyArrow array in which to embed data. + feature (`datasets.features.FeatureType`): + Array features. + + Raises: + `TypeError`: if the target type is not supported according, e.g. + + - if a field is missing + + Returns: + array (`pyarrow.Array`): the casted array + """ + from .features import Sequence + + _e = embed_array_storage + + if isinstance(array, pa.ExtensionArray): + array = array.storage + if hasattr(feature, "embed_storage"): + return feature.embed_storage(array) + elif pa.types.is_struct(array.type): + # feature must be a dict or Sequence(subfeatures_dict) + if isinstance(feature, Sequence) and isinstance(feature.feature, dict): + feature = { + name: Sequence(subfeature, length=feature.length) for name, subfeature in feature.feature.items() + } + if isinstance(feature, dict): + arrays = [_e(array.field(name), subfeature) for name, subfeature in feature.items()] + return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null()) + elif pa.types.is_list(array.type): + # feature must be either [subfeature] or Sequence(subfeature) + # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError + array_offsets = _combine_list_array_offsets_with_mask(array) + if isinstance(feature, list): + return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature[0])) + if isinstance(feature, Sequence) and feature.length == -1: + return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature)) + elif pa.types.is_fixed_size_list(array.type): + # feature must be Sequence(subfeature) + if isinstance(feature, Sequence) and feature.length > -1: + array_values = array.values[ + array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size + ] + embedded_array_values = _e(array_values, feature.feature) + if config.PYARROW_VERSION.major < 15: + return pa.Array.from_buffers( + pa.list_(array_values.type, feature.length), + len(array), + [array.is_valid().buffers()[1]], + children=[embedded_array_values], + ) + else: + return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null()) + if not isinstance(feature, (Sequence, dict, list, tuple)): + return array + raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}") + + +class CastError(ValueError): + """When it's not possible to cast an Arrow table to a specific schema or set of features""" + + def __init__(self, *args, table_column_names: List[str], requested_column_names: List[str]) -> None: + super().__init__(*args) + self.table_column_names = table_column_names + self.requested_column_names = requested_column_names + + def __reduce__(self): + # Fix unpickling: TypeError: __init__() missing 2 required keyword-only arguments: 'table_column_names' and 'requested_column_names' + return partial( + CastError, table_column_names=self.table_column_names, requested_column_names=self.requested_column_names + ), () + + def details(self): + new_columns = set(self.table_column_names) - set(self.requested_column_names) + missing_columns = set(self.requested_column_names) - set(self.table_column_names) + if new_columns and missing_columns: + return f"there are {len(new_columns)} new columns ({_short_str(new_columns)}) and {len(missing_columns)} missing columns ({_short_str(missing_columns)})." + elif new_columns: + return f"there are {len(new_columns)} new columns ({_short_str(new_columns)})" + else: + return f"there are {len(missing_columns)} missing columns ({_short_str(missing_columns)})" + + +def cast_table_to_features(table: pa.Table, features: "Features"): + """Cast a table to the arrow schema that corresponds to the requested features. + + Args: + table (`pyarrow.Table`): + PyArrow table to cast. + features ([`Features`]): + Target features. + + Returns: + table (`pyarrow.Table`): the casted table + """ + if sorted(table.column_names) != sorted(features): + raise CastError( + f"Couldn't cast\n{_short_str(table.schema)}\nto\n{_short_str(features)}\nbecause column names don't match", + table_column_names=table.column_names, + requested_column_names=list(features), + ) + arrays = [cast_array_to_feature(table[name], feature) for name, feature in features.items()] + return pa.Table.from_arrays(arrays, schema=features.arrow_schema) + + +def cast_table_to_schema(table: pa.Table, schema: pa.Schema): + """Cast a table to the arrow schema. Different from `cast_table_to_features`, this method can preserve nullability. + + Args: + table (`pa.Table`): + PyArrow table to cast. + features ([`Features`]): + Target features. + + Returns: + `pa.Table`: the casted table + """ + from .features import Features + + features = Features.from_arrow_schema(schema) + if sorted(table.column_names) != sorted(features): + raise CastError( + f"Couldn't cast\n{_short_str(table.schema)}\nto\n{_short_str(features)}\nbecause column names don't match", + table_column_names=table.column_names, + requested_column_names=list(features), + ) + arrays = [cast_array_to_feature(table[name], feature) for name, feature in features.items()] + return pa.Table.from_arrays(arrays, schema=schema) + + +def embed_table_storage(table: pa.Table): + """Embed external data into a table's storage. + + + + Args: + table (`pyarrow.Table`): + PyArrow table in which to embed data. + + Returns: + table (`pyarrow.Table`): the table with embedded data + """ + from .features.features import Features, require_storage_embed + + features = Features.from_arrow_schema(table.schema) + arrays = [ + embed_array_storage(table[name], feature) if require_storage_embed(feature) else table[name] + for name, feature in features.items() + ] + return pa.Table.from_arrays(arrays, schema=features.arrow_schema) + + +def table_cast(table: pa.Table, schema: pa.Schema): + """Improved version of `pa.Table.cast`. + + It supports casting to feature types stored in the schema metadata. + + Args: + table (`pyarrow.Table`): + PyArrow table to cast. + schema (`pyarrow.Schema`): + Target PyArrow schema. + + Returns: + table (`pyarrow.Table`): the casted table + """ + if table.schema != schema: + return cast_table_to_schema(table, schema) + elif table.schema.metadata != schema.metadata: + return table.replace_schema_metadata(schema.metadata) + else: + return table + + +def table_flatten(table: pa.Table): + """Improved version of `pa.Table.flatten`. + + It behaves as `pa.Table.flatten` in a sense it does 1-step flatten of the columns with a struct type into one column per struct field, + but updates the metadata and skips decodable features unless the `decode` attribute of these features is set to False. + + Args: + table (`pa.Table`): + PyArrow table to flatten. + + Returns: + `Table`: the flattened table + """ + from .features import Features + + features = Features.from_arrow_schema(table.schema) + if any(hasattr(subfeature, "flatten") and subfeature.flatten() == subfeature for subfeature in features.values()): + flat_arrays = [] + flat_column_names = [] + for field in table.schema: + array = table.column(field.name) + subfeature = features[field.name] + if pa.types.is_struct(field.type) and ( + not hasattr(subfeature, "flatten") or subfeature.flatten() != subfeature + ): + flat_arrays.extend(array.flatten()) + flat_column_names.extend([f"{field.name}.{subfield.name}" for subfield in field.type]) + else: + flat_arrays.append(array) + flat_column_names.append(field.name) + flat_table = pa.Table.from_arrays( + flat_arrays, + names=flat_column_names, + ) + else: + flat_table = table.flatten() + # Preserve complex types in the metadata + flat_features = features.flatten(max_depth=2) + flat_features = Features({column_name: flat_features[column_name] for column_name in flat_table.column_names}) + return flat_table.replace_schema_metadata(flat_features.arrow_schema.metadata) + + +def table_visitor(table: pa.Table, function: Callable[[pa.Array], None]): + """Visit all arrays in a table and apply a function to them. + + Args: + table (`pyarrow.Table`): + PyArrow table to visit. + function (`Callable[[pa.Array], None]`): + Function to apply to each array. + """ + from .features import Features, Sequence + + features = Features.from_arrow_schema(table.schema) + + def _visit(array, feature): + if isinstance(array, pa.ChunkedArray): + for chunk in array.chunks: + _visit(chunk, feature) + else: + if isinstance(array, pa.ExtensionArray): + array = array.storage + function(array, feature) + if pa.types.is_struct(array.type) and not hasattr(feature, "cast_storage"): + if isinstance(feature, Sequence) and isinstance(feature.feature, dict): + feature = { + name: Sequence(subfeature, length=feature.length) + for name, subfeature in feature.feature.items() + } + for name, subfeature in feature.items(): + _visit(array.field(name), subfeature) + elif pa.types.is_list(array.type): + if isinstance(feature, list): + _visit(array.values, feature[0]) + elif isinstance(feature, Sequence): + _visit(array.values, feature.feature) + + for name, feature in features.items(): + _visit(table[name], feature) + + +def table_iter(table: Table, batch_size: int, drop_last_batch=False) -> Iterator[pa.Table]: + """Iterate over sub-tables of size `batch_size`. + + Args: + table (`pyarrow.Table`): + PyArrow table to iterate over. + batch_size (`int`): + Size of each sub-table to yield. + drop_last_batch (`bool`, defaults to `False`): + Drop the last batch if it is smaller than `batch_size`. + """ + chunks_buffer = [] + chunks_buffer_size = 0 + for chunk in table.to_reader(max_chunksize=batch_size): + if len(chunk) == 0: + continue + elif chunks_buffer_size + len(chunk) < batch_size: + chunks_buffer.append(chunk) + chunks_buffer_size += len(chunk) + continue + elif chunks_buffer_size + len(chunk) == batch_size: + chunks_buffer.append(chunk) + yield pa.Table.from_batches(chunks_buffer) + chunks_buffer = [] + chunks_buffer_size = 0 + else: + cropped_chunk_length = batch_size - chunks_buffer_size + chunks_buffer.append(chunk.slice(0, cropped_chunk_length)) + yield pa.Table.from_batches(chunks_buffer) + chunks_buffer = [chunk.slice(cropped_chunk_length, len(chunk) - cropped_chunk_length)] + chunks_buffer_size = len(chunk) - cropped_chunk_length + if not drop_last_batch and chunks_buffer: + yield pa.Table.from_batches(chunks_buffer) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4022db505cf433332c121564683ac235a3ce0447 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.py @@ -0,0 +1,98 @@ +import os +import sys +import types +from collections.abc import MutableSequence +from functools import total_ordering +from typing import Any, Type + +__version__ = "1.5.0" + +__all__ = ("FrozenList", "PyFrozenList") # type: Tuple[str, ...] + + +NO_EXTENSIONS = bool(os.environ.get("FROZENLIST_NO_EXTENSIONS")) # type: bool + + +@total_ordering +class FrozenList(MutableSequence): + __slots__ = ("_frozen", "_items") + + if sys.version_info >= (3, 9): + __class_getitem__ = classmethod(types.GenericAlias) + else: + + @classmethod + def __class_getitem__( + cls: Type["FrozenList"], + cls_item: Any, + ) -> Type["FrozenList"]: + return cls + + def __init__(self, items=None): + self._frozen = False + if items is not None: + items = list(items) + else: + items = [] + self._items = items + + @property + def frozen(self): + return self._frozen + + def freeze(self): + self._frozen = True + + def __getitem__(self, index): + return self._items[index] + + def __setitem__(self, index, value): + if self._frozen: + raise RuntimeError("Cannot modify frozen list.") + self._items[index] = value + + def __delitem__(self, index): + if self._frozen: + raise RuntimeError("Cannot modify frozen list.") + del self._items[index] + + def __len__(self): + return self._items.__len__() + + def __iter__(self): + return self._items.__iter__() + + def __reversed__(self): + return self._items.__reversed__() + + def __eq__(self, other): + return list(self) == other + + def __le__(self, other): + return list(self) <= other + + def insert(self, pos, item): + if self._frozen: + raise RuntimeError("Cannot modify frozen list.") + self._items.insert(pos, item) + + def __repr__(self): + return f"" + + def __hash__(self): + if self._frozen: + return hash(tuple(self)) + else: + raise RuntimeError("Cannot hash unfrozen list.") + + +PyFrozenList = FrozenList + + +if not NO_EXTENSIONS: + try: + from ._frozenlist import FrozenList as CFrozenList # type: ignore + except ImportError: # pragma: no cover + pass + else: + FrozenList = CFrozenList # type: ignore diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.pyi b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..ae803ef6aad72f57e7379db5a2044a95f214df7b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.pyi @@ -0,0 +1,47 @@ +from typing import ( + Generic, + Iterable, + Iterator, + List, + MutableSequence, + Optional, + TypeVar, + Union, + overload, +) + +_T = TypeVar("_T") +_Arg = Union[List[_T], Iterable[_T]] + +class FrozenList(MutableSequence[_T], Generic[_T]): + def __init__(self, items: Optional[_Arg[_T]] = None) -> None: ... + @property + def frozen(self) -> bool: ... + def freeze(self) -> None: ... + @overload + def __getitem__(self, i: int) -> _T: ... + @overload + def __getitem__(self, s: slice) -> FrozenList[_T]: ... + @overload + def __setitem__(self, i: int, o: _T) -> None: ... + @overload + def __setitem__(self, s: slice, o: Iterable[_T]) -> None: ... + @overload + def __delitem__(self, i: int) -> None: ... + @overload + def __delitem__(self, i: slice) -> None: ... + def __len__(self) -> int: ... + def __iter__(self) -> Iterator[_T]: ... + def __reversed__(self) -> Iterator[_T]: ... + def __eq__(self, other: object) -> bool: ... + def __le__(self, other: FrozenList[_T]) -> bool: ... + def __ne__(self, other: object) -> bool: ... + def __lt__(self, other: FrozenList[_T]) -> bool: ... + def __ge__(self, other: FrozenList[_T]) -> bool: ... + def __gt__(self, other: FrozenList[_T]) -> bool: ... + def insert(self, pos: int, item: _T) -> None: ... + def __repr__(self) -> str: ... + def __hash__(self) -> int: ... + +# types for C accelerators are the same +CFrozenList = PyFrozenList = FrozenList diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/_frozenlist.pyx b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/_frozenlist.pyx new file mode 100644 index 0000000000000000000000000000000000000000..45d11de13264d426c35f754dfbffbf049af84abf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/_frozenlist.pyx @@ -0,0 +1,123 @@ +import sys +import types +from collections.abc import MutableSequence + + +cdef class FrozenList: + + if sys.version_info >= (3, 9): + __class_getitem__ = classmethod(types.GenericAlias) + else: + @classmethod + def __class_getitem__(cls, cls_item): + return cls + + cdef readonly bint frozen + cdef list _items + + def __init__(self, items=None): + self.frozen = False + if items is not None: + items = list(items) + else: + items = [] + self._items = items + + cdef object _check_frozen(self): + if self.frozen: + raise RuntimeError("Cannot modify frozen list.") + + cdef inline object _fast_len(self): + return len(self._items) + + def freeze(self): + self.frozen = True + + def __getitem__(self, index): + return self._items[index] + + def __setitem__(self, index, value): + self._check_frozen() + self._items[index] = value + + def __delitem__(self, index): + self._check_frozen() + del self._items[index] + + def __len__(self): + return self._fast_len() + + def __iter__(self): + return self._items.__iter__() + + def __reversed__(self): + return self._items.__reversed__() + + def __richcmp__(self, other, op): + if op == 0: # < + return list(self) < other + if op == 1: # <= + return list(self) <= other + if op == 2: # == + return list(self) == other + if op == 3: # != + return list(self) != other + if op == 4: # > + return list(self) > other + if op == 5: # => + return list(self) >= other + + def insert(self, pos, item): + self._check_frozen() + self._items.insert(pos, item) + + def __contains__(self, item): + return item in self._items + + def __iadd__(self, items): + self._check_frozen() + self._items += list(items) + return self + + def index(self, item): + return self._items.index(item) + + def remove(self, item): + self._check_frozen() + self._items.remove(item) + + def clear(self): + self._check_frozen() + self._items.clear() + + def extend(self, items): + self._check_frozen() + self._items += list(items) + + def reverse(self): + self._check_frozen() + self._items.reverse() + + def pop(self, index=-1): + self._check_frozen() + return self._items.pop(index) + + def append(self, item): + self._check_frozen() + return self._items.append(item) + + def count(self, item): + return self._items.count(item) + + def __repr__(self): + return ''.format(self.frozen, + self._items) + + def __hash__(self): + if self.frozen: + return hash(tuple(self._items)) + else: + raise RuntimeError("Cannot hash unfrozen list.") + + +MutableSequence.register(FrozenList) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/py.typed b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..f5642f79f21d872f010979dcf6f0c4a415acc19d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/py.typed @@ -0,0 +1 @@ +Marker diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_models.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_models.py new file mode 100644 index 0000000000000000000000000000000000000000..8a65f13347d6621289a166d08123cbc8e1ad0157 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_models.py @@ -0,0 +1,516 @@ +from __future__ import annotations + +import base64 +import ssl +import typing +import urllib.parse + +# Functions for typechecking... + + +ByteOrStr = typing.Union[bytes, str] +HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]] +HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr] +HeaderTypes = typing.Union[HeadersAsSequence, HeadersAsMapping, None] + +Extensions = typing.MutableMapping[str, typing.Any] + + +def enforce_bytes(value: bytes | str, *, name: str) -> bytes: + """ + Any arguments that are ultimately represented as bytes can be specified + either as bytes or as strings. + + However we enforce that any string arguments must only contain characters in + the plain ASCII range. chr(0)...chr(127). If you need to use characters + outside that range then be precise, and use a byte-wise argument. + """ + if isinstance(value, str): + try: + return value.encode("ascii") + except UnicodeEncodeError: + raise TypeError(f"{name} strings may not include unicode characters.") + elif isinstance(value, bytes): + return value + + seen_type = type(value).__name__ + raise TypeError(f"{name} must be bytes or str, but got {seen_type}.") + + +def enforce_url(value: URL | bytes | str, *, name: str) -> URL: + """ + Type check for URL parameters. + """ + if isinstance(value, (bytes, str)): + return URL(value) + elif isinstance(value, URL): + return value + + seen_type = type(value).__name__ + raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.") + + +def enforce_headers( + value: HeadersAsMapping | HeadersAsSequence | None = None, *, name: str +) -> list[tuple[bytes, bytes]]: + """ + Convienence function that ensure all items in request or response headers + are either bytes or strings in the plain ASCII range. + """ + if value is None: + return [] + elif isinstance(value, typing.Mapping): + return [ + ( + enforce_bytes(k, name="header name"), + enforce_bytes(v, name="header value"), + ) + for k, v in value.items() + ] + elif isinstance(value, typing.Sequence): + return [ + ( + enforce_bytes(k, name="header name"), + enforce_bytes(v, name="header value"), + ) + for k, v in value + ] + + seen_type = type(value).__name__ + raise TypeError( + f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}." + ) + + +def enforce_stream( + value: bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes] | None, + *, + name: str, +) -> typing.Iterable[bytes] | typing.AsyncIterable[bytes]: + if value is None: + return ByteStream(b"") + elif isinstance(value, bytes): + return ByteStream(value) + return value + + +# * https://tools.ietf.org/html/rfc3986#section-3.2.3 +# * https://url.spec.whatwg.org/#url-miscellaneous +# * https://url.spec.whatwg.org/#scheme-state +DEFAULT_PORTS = { + b"ftp": 21, + b"http": 80, + b"https": 443, + b"ws": 80, + b"wss": 443, +} + + +def include_request_headers( + headers: list[tuple[bytes, bytes]], + *, + url: "URL", + content: None | bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes], +) -> list[tuple[bytes, bytes]]: + headers_set = set(k.lower() for k, v in headers) + + if b"host" not in headers_set: + default_port = DEFAULT_PORTS.get(url.scheme) + if url.port is None or url.port == default_port: + header_value = url.host + else: + header_value = b"%b:%d" % (url.host, url.port) + headers = [(b"Host", header_value)] + headers + + if ( + content is not None + and b"content-length" not in headers_set + and b"transfer-encoding" not in headers_set + ): + if isinstance(content, bytes): + content_length = str(len(content)).encode("ascii") + headers += [(b"Content-Length", content_length)] + else: + headers += [(b"Transfer-Encoding", b"chunked")] # pragma: nocover + + return headers + + +# Interfaces for byte streams... + + +class ByteStream: + """ + A container for non-streaming content, and that supports both sync and async + stream iteration. + """ + + def __init__(self, content: bytes) -> None: + self._content = content + + def __iter__(self) -> typing.Iterator[bytes]: + yield self._content + + async def __aiter__(self) -> typing.AsyncIterator[bytes]: + yield self._content + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{len(self._content)} bytes]>" + + +class Origin: + def __init__(self, scheme: bytes, host: bytes, port: int) -> None: + self.scheme = scheme + self.host = host + self.port = port + + def __eq__(self, other: typing.Any) -> bool: + return ( + isinstance(other, Origin) + and self.scheme == other.scheme + and self.host == other.host + and self.port == other.port + ) + + def __str__(self) -> str: + scheme = self.scheme.decode("ascii") + host = self.host.decode("ascii") + port = str(self.port) + return f"{scheme}://{host}:{port}" + + +class URL: + """ + Represents the URL against which an HTTP request may be made. + + The URL may either be specified as a plain string, for convienence: + + ```python + url = httpcore.URL("https://www.example.com/") + ``` + + Or be constructed with explicitily pre-parsed components: + + ```python + url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/') + ``` + + Using this second more explicit style allows integrations that are using + `httpcore` to pass through URLs that have already been parsed in order to use + libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures + that URL parsing is treated identically at both the networking level and at any + higher layers of abstraction. + + The four components are important here, as they allow the URL to be precisely + specified in a pre-parsed format. They also allow certain types of request to + be created that could not otherwise be expressed. + + For example, an HTTP request to `http://www.example.com/` forwarded via a proxy + at `http://localhost:8080`... + + ```python + # Constructs an HTTP request with a complete URL as the target: + # GET https://www.example.com/ HTTP/1.1 + url = httpcore.URL( + scheme=b'http', + host=b'localhost', + port=8080, + target=b'https://www.example.com/' + ) + request = httpcore.Request( + method="GET", + url=url + ) + ``` + + Another example is constructing an `OPTIONS *` request... + + ```python + # Constructs an 'OPTIONS *' HTTP request: + # OPTIONS * HTTP/1.1 + url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*') + request = httpcore.Request(method="OPTIONS", url=url) + ``` + + This kind of request is not possible to formulate with a URL string, + because the `/` delimiter is always used to demark the target from the + host/port portion of the URL. + + For convenience, string-like arguments may be specified either as strings or + as bytes. However, once a request is being issue over-the-wire, the URL + components are always ultimately required to be a bytewise representation. + + In order to avoid any ambiguity over character encodings, when strings are used + as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`. + If you require a bytewise representation that is outside this range you must + handle the character encoding directly, and pass a bytes instance. + """ + + def __init__( + self, + url: bytes | str = "", + *, + scheme: bytes | str = b"", + host: bytes | str = b"", + port: int | None = None, + target: bytes | str = b"", + ) -> None: + """ + Parameters: + url: The complete URL as a string or bytes. + scheme: The URL scheme as a string or bytes. + Typically either `"http"` or `"https"`. + host: The URL host as a string or bytes. Such as `"www.example.com"`. + port: The port to connect to. Either an integer or `None`. + target: The target of the HTTP request. Such as `"/items?search=red"`. + """ + if url: + parsed = urllib.parse.urlparse(enforce_bytes(url, name="url")) + self.scheme = parsed.scheme + self.host = parsed.hostname or b"" + self.port = parsed.port + self.target = (parsed.path or b"/") + ( + b"?" + parsed.query if parsed.query else b"" + ) + else: + self.scheme = enforce_bytes(scheme, name="scheme") + self.host = enforce_bytes(host, name="host") + self.port = port + self.target = enforce_bytes(target, name="target") + + @property + def origin(self) -> Origin: + default_port = { + b"http": 80, + b"https": 443, + b"ws": 80, + b"wss": 443, + b"socks5": 1080, + b"socks5h": 1080, + }[self.scheme] + return Origin( + scheme=self.scheme, host=self.host, port=self.port or default_port + ) + + def __eq__(self, other: typing.Any) -> bool: + return ( + isinstance(other, URL) + and other.scheme == self.scheme + and other.host == self.host + and other.port == self.port + and other.target == self.target + ) + + def __bytes__(self) -> bytes: + if self.port is None: + return b"%b://%b%b" % (self.scheme, self.host, self.target) + return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target) + + def __repr__(self) -> str: + return ( + f"{self.__class__.__name__}(scheme={self.scheme!r}, " + f"host={self.host!r}, port={self.port!r}, target={self.target!r})" + ) + + +class Request: + """ + An HTTP request. + """ + + def __init__( + self, + method: bytes | str, + url: URL | bytes | str, + *, + headers: HeaderTypes = None, + content: bytes + | typing.Iterable[bytes] + | typing.AsyncIterable[bytes] + | None = None, + extensions: Extensions | None = None, + ) -> None: + """ + Parameters: + method: The HTTP request method, either as a string or bytes. + For example: `GET`. + url: The request URL, either as a `URL` instance, or as a string or bytes. + For example: `"https://www.example.com".` + headers: The HTTP request headers. + content: The content of the request body. + extensions: A dictionary of optional extra information included on + the request. Possible keys include `"timeout"`, and `"trace"`. + """ + self.method: bytes = enforce_bytes(method, name="method") + self.url: URL = enforce_url(url, name="url") + self.headers: list[tuple[bytes, bytes]] = enforce_headers( + headers, name="headers" + ) + self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = ( + enforce_stream(content, name="content") + ) + self.extensions = {} if extensions is None else extensions + + if "target" in self.extensions: + self.url = URL( + scheme=self.url.scheme, + host=self.url.host, + port=self.url.port, + target=self.extensions["target"], + ) + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.method!r}]>" + + +class Response: + """ + An HTTP response. + """ + + def __init__( + self, + status: int, + *, + headers: HeaderTypes = None, + content: bytes + | typing.Iterable[bytes] + | typing.AsyncIterable[bytes] + | None = None, + extensions: Extensions | None = None, + ) -> None: + """ + Parameters: + status: The HTTP status code of the response. For example `200`. + headers: The HTTP response headers. + content: The content of the response body. + extensions: A dictionary of optional extra information included on + the responseself.Possible keys include `"http_version"`, + `"reason_phrase"`, and `"network_stream"`. + """ + self.status: int = status + self.headers: list[tuple[bytes, bytes]] = enforce_headers( + headers, name="headers" + ) + self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = ( + enforce_stream(content, name="content") + ) + self.extensions = {} if extensions is None else extensions + + self._stream_consumed = False + + @property + def content(self) -> bytes: + if not hasattr(self, "_content"): + if isinstance(self.stream, typing.Iterable): + raise RuntimeError( + "Attempted to access 'response.content' on a streaming response. " + "Call 'response.read()' first." + ) + else: + raise RuntimeError( + "Attempted to access 'response.content' on a streaming response. " + "Call 'await response.aread()' first." + ) + return self._content + + def __repr__(self) -> str: + return f"<{self.__class__.__name__} [{self.status}]>" + + # Sync interface... + + def read(self) -> bytes: + if not isinstance(self.stream, typing.Iterable): # pragma: nocover + raise RuntimeError( + "Attempted to read an asynchronous response using 'response.read()'. " + "You should use 'await response.aread()' instead." + ) + if not hasattr(self, "_content"): + self._content = b"".join([part for part in self.iter_stream()]) + return self._content + + def iter_stream(self) -> typing.Iterator[bytes]: + if not isinstance(self.stream, typing.Iterable): # pragma: nocover + raise RuntimeError( + "Attempted to stream an asynchronous response using 'for ... in " + "response.iter_stream()'. " + "You should use 'async for ... in response.aiter_stream()' instead." + ) + if self._stream_consumed: + raise RuntimeError( + "Attempted to call 'for ... in response.iter_stream()' more than once." + ) + self._stream_consumed = True + for chunk in self.stream: + yield chunk + + def close(self) -> None: + if not isinstance(self.stream, typing.Iterable): # pragma: nocover + raise RuntimeError( + "Attempted to close an asynchronous response using 'response.close()'. " + "You should use 'await response.aclose()' instead." + ) + if hasattr(self.stream, "close"): + self.stream.close() + + # Async interface... + + async def aread(self) -> bytes: + if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover + raise RuntimeError( + "Attempted to read an synchronous response using " + "'await response.aread()'. " + "You should use 'response.read()' instead." + ) + if not hasattr(self, "_content"): + self._content = b"".join([part async for part in self.aiter_stream()]) + return self._content + + async def aiter_stream(self) -> typing.AsyncIterator[bytes]: + if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover + raise RuntimeError( + "Attempted to stream an synchronous response using 'async for ... in " + "response.aiter_stream()'. " + "You should use 'for ... in response.iter_stream()' instead." + ) + if self._stream_consumed: + raise RuntimeError( + "Attempted to call 'async for ... in response.aiter_stream()' " + "more than once." + ) + self._stream_consumed = True + async for chunk in self.stream: + yield chunk + + async def aclose(self) -> None: + if not isinstance(self.stream, typing.AsyncIterable): # pragma: nocover + raise RuntimeError( + "Attempted to close a synchronous response using " + "'await response.aclose()'. " + "You should use 'response.close()' instead." + ) + if hasattr(self.stream, "aclose"): + await self.stream.aclose() + + +class Proxy: + def __init__( + self, + url: URL | bytes | str, + auth: tuple[bytes | str, bytes | str] | None = None, + headers: HeadersAsMapping | HeadersAsSequence | None = None, + ssl_context: ssl.SSLContext | None = None, + ): + self.url = enforce_url(url, name="url") + self.headers = enforce_headers(headers, name="headers") + self.ssl_context = ssl_context + + if auth is not None: + username = enforce_bytes(auth[0], name="auth") + password = enforce_bytes(auth[1], name="auth") + userpass = username + b":" + password + authorization = b"Basic " + base64.b64encode(userpass) + self.auth: tuple[bytes, bytes] | None = (username, password) + self.headers = [(b"Proxy-Authorization", authorization)] + self.headers + else: + self.auth = None diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_ssl.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_ssl.py new file mode 100644 index 0000000000000000000000000000000000000000..c99c5a67945b8a3a3544d481e979c791ab45fe23 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_ssl.py @@ -0,0 +1,9 @@ +import ssl + +import certifi + + +def default_ssl_context() -> ssl.SSLContext: + context = ssl.create_default_context() + context.load_verify_locations(certifi.where()) + return context diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_utils.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..c44ff93cb2f572afc6e679308024b744b65c3b0a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_utils.py @@ -0,0 +1,37 @@ +from __future__ import annotations + +import select +import socket +import sys + + +def is_socket_readable(sock: socket.socket | None) -> bool: + """ + Return whether a socket, as identifed by its file descriptor, is readable. + "A socket is readable" means that the read buffer isn't empty, i.e. that calling + .recv() on it would immediately return some data. + """ + # NOTE: we want check for readability without actually attempting to read, because + # we don't want to block forever if it's not readable. + + # In the case that the socket no longer exists, or cannot return a file + # descriptor, we treat it as being readable, as if it the next read operation + # on it is ready to return the terminating `b""`. + sock_fd = None if sock is None else sock.fileno() + if sock_fd is None or sock_fd < 0: # pragma: nocover + return True + + # The implementation below was stolen from: + # https://github.com/python-trio/trio/blob/20ee2b1b7376db637435d80e266212a35837ddcc/trio/_socket.py#L471-L478 + # See also: https://github.com/encode/httpcore/pull/193#issuecomment-703129316 + + # Use select.select on Windows, and when poll is unavailable and select.poll + # everywhere else. (E.g. When eventlet is in use. See #327) + if ( + sys.platform == "win32" or getattr(select, "poll", None) is None + ): # pragma: nocover + rready, _, _ = select.select([sock_fd], [], [], 0) + return bool(rready) + p = select.poll() + p.register(sock_fd, select.POLLIN) + return bool(p.poll(0)) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/py.typed b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..ffef2ff3bfa0c42b6e6e3eefda700391d181c9a0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/METADATA @@ -0,0 +1,84 @@ +Metadata-Version: 2.4 +Name: Jinja2 +Version: 3.1.6 +Summary: A very fast and expressive template engine. +Maintainer-email: Pallets +Requires-Python: >=3.7 +Description-Content-Type: text/markdown +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Web Environment +Classifier: Intended Audience :: Developers +Classifier: License :: OSI Approved :: BSD License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Typing :: Typed +License-File: LICENSE.txt +Requires-Dist: MarkupSafe>=2.0 +Requires-Dist: Babel>=2.7 ; extra == "i18n" +Project-URL: Changes, https://jinja.palletsprojects.com/changes/ +Project-URL: Chat, https://discord.gg/pallets +Project-URL: Documentation, https://jinja.palletsprojects.com/ +Project-URL: Donate, https://palletsprojects.com/donate +Project-URL: Source, https://github.com/pallets/jinja/ +Provides-Extra: i18n + +# Jinja + +Jinja is a fast, expressive, extensible templating engine. Special +placeholders in the template allow writing code similar to Python +syntax. Then the template is passed data to render the final document. + +It includes: + +- Template inheritance and inclusion. +- Define and import macros within templates. +- HTML templates can use autoescaping to prevent XSS from untrusted + user input. +- A sandboxed environment can safely render untrusted templates. +- AsyncIO support for generating templates and calling async + functions. +- I18N support with Babel. +- Templates are compiled to optimized Python code just-in-time and + cached, or can be compiled ahead-of-time. +- Exceptions point to the correct line in templates to make debugging + easier. +- Extensible filters, tests, functions, and even syntax. + +Jinja's philosophy is that while application logic belongs in Python if +possible, it shouldn't make the template designer's job difficult by +restricting functionality too much. + + +## In A Nutshell + +```jinja +{% extends "base.html" %} +{% block title %}Members{% endblock %} +{% block content %} + +{% endblock %} +``` + +## Donate + +The Pallets organization develops and supports Jinja and other popular +packages. In order to grow the community of contributors and users, and +allow the maintainers to devote more time to the projects, [please +donate today][]. + +[please donate today]: https://palletsprojects.com/donate + +## Contributing + +See our [detailed contributing documentation][contrib] for many ways to +contribute, including reporting issues, requesting features, asking or answering +questions, and making PRs. + +[contrib]: https://palletsprojects.com/contributing/ + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..ffa3866a426ed6111af469ba4369ad65438342ac --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/RECORD @@ -0,0 +1,57 @@ +jinja2-3.1.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +jinja2-3.1.6.dist-info/METADATA,sha256=aMVUj7Z8QTKhOJjZsx7FDGvqKr3ZFdkh8hQ1XDpkmcg,2871 +jinja2-3.1.6.dist-info/RECORD,, +jinja2-3.1.6.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82 +jinja2-3.1.6.dist-info/entry_points.txt,sha256=OL85gYU1eD8cuPlikifFngXpeBjaxl6rIJ8KkC_3r-I,58 +jinja2-3.1.6.dist-info/licenses/LICENSE.txt,sha256=O0nc7kEF6ze6wQ-vG-JgQI_oXSUrjp3y4JefweCUQ3s,1475 +jinja2/__init__.py,sha256=xxepO9i7DHsqkQrgBEduLtfoz2QCuT6_gbL4XSN1hbU,1928 +jinja2/__pycache__/__init__.cpython-312.pyc,, +jinja2/__pycache__/_identifier.cpython-312.pyc,, +jinja2/__pycache__/async_utils.cpython-312.pyc,, +jinja2/__pycache__/bccache.cpython-312.pyc,, +jinja2/__pycache__/compiler.cpython-312.pyc,, +jinja2/__pycache__/constants.cpython-312.pyc,, +jinja2/__pycache__/debug.cpython-312.pyc,, +jinja2/__pycache__/defaults.cpython-312.pyc,, +jinja2/__pycache__/environment.cpython-312.pyc,, +jinja2/__pycache__/exceptions.cpython-312.pyc,, +jinja2/__pycache__/ext.cpython-312.pyc,, +jinja2/__pycache__/filters.cpython-312.pyc,, +jinja2/__pycache__/idtracking.cpython-312.pyc,, +jinja2/__pycache__/lexer.cpython-312.pyc,, +jinja2/__pycache__/loaders.cpython-312.pyc,, +jinja2/__pycache__/meta.cpython-312.pyc,, +jinja2/__pycache__/nativetypes.cpython-312.pyc,, +jinja2/__pycache__/nodes.cpython-312.pyc,, +jinja2/__pycache__/optimizer.cpython-312.pyc,, +jinja2/__pycache__/parser.cpython-312.pyc,, +jinja2/__pycache__/runtime.cpython-312.pyc,, +jinja2/__pycache__/sandbox.cpython-312.pyc,, +jinja2/__pycache__/tests.cpython-312.pyc,, +jinja2/__pycache__/utils.cpython-312.pyc,, +jinja2/__pycache__/visitor.cpython-312.pyc,, +jinja2/_identifier.py,sha256=_zYctNKzRqlk_murTNlzrju1FFJL7Va_Ijqqd7ii2lU,1958 +jinja2/async_utils.py,sha256=vK-PdsuorOMnWSnEkT3iUJRIkTnYgO2T6MnGxDgHI5o,2834 +jinja2/bccache.py,sha256=gh0qs9rulnXo0PhX5jTJy2UHzI8wFnQ63o_vw7nhzRg,14061 +jinja2/compiler.py,sha256=9RpCQl5X88BHllJiPsHPh295Hh0uApvwFJNQuutULeM,74131 +jinja2/constants.py,sha256=GMoFydBF_kdpaRKPoM5cl5MviquVRLVyZtfp5-16jg0,1433 +jinja2/debug.py,sha256=CnHqCDHd-BVGvti_8ZsTolnXNhA3ECsY-6n_2pwU8Hw,6297 +jinja2/defaults.py,sha256=boBcSw78h-lp20YbaXSJsqkAI2uN_mD_TtCydpeq5wU,1267 +jinja2/environment.py,sha256=9nhrP7Ch-NbGX00wvyr4yy-uhNHq2OCc60ggGrni_fk,61513 +jinja2/exceptions.py,sha256=ioHeHrWwCWNaXX1inHmHVblvc4haO7AXsjCp3GfWvx0,5071 +jinja2/ext.py,sha256=5PF5eHfh8mXAIxXHHRB2xXbXohi8pE3nHSOxa66uS7E,31875 +jinja2/filters.py,sha256=PQ_Egd9n9jSgtnGQYyF4K5j2nYwhUIulhPnyimkdr-k,55212 +jinja2/idtracking.py,sha256=-ll5lIp73pML3ErUYiIJj7tdmWxcH_IlDv3yA_hiZYo,10555 +jinja2/lexer.py,sha256=LYiYio6br-Tep9nPcupWXsPEtjluw3p1mU-lNBVRUfk,29786 +jinja2/loaders.py,sha256=wIrnxjvcbqh5VwW28NSkfotiDq8qNCxIOSFbGUiSLB4,24055 +jinja2/meta.py,sha256=OTDPkaFvU2Hgvx-6akz7154F8BIWaRmvJcBFvwopHww,4397 +jinja2/nativetypes.py,sha256=7GIGALVJgdyL80oZJdQUaUfwSt5q2lSSZbXt0dNf_M4,4210 +jinja2/nodes.py,sha256=m1Duzcr6qhZI8JQ6VyJgUNinjAf5bQzijSmDnMsvUx8,34579 +jinja2/optimizer.py,sha256=rJnCRlQ7pZsEEmMhsQDgC_pKyDHxP5TPS6zVPGsgcu8,1651 +jinja2/parser.py,sha256=lLOFy3sEmHc5IaEHRiH1sQVnId2moUQzhyeJZTtdY30,40383 +jinja2/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +jinja2/runtime.py,sha256=gDk-GvdriJXqgsGbHgrcKTP0Yp6zPXzhzrIpCFH3jAU,34249 +jinja2/sandbox.py,sha256=Mw2aitlY2I8la7FYhcX2YG9BtUYcLnD0Gh3d29cDWrY,15009 +jinja2/tests.py,sha256=VLsBhVFnWg-PxSBz1MhRnNWgP1ovXk3neO1FLQMeC9Q,5926 +jinja2/utils.py,sha256=rRp3o9e7ZKS4fyrWRbELyLcpuGVTFcnooaOa1qx_FIk,24129 +jinja2/visitor.py,sha256=EcnL1PIwf_4RVCOMxsRNuR8AXHbS1qfAdMOE2ngKJz4,3557 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..23d2d7e9a5d381ef8a375db09f82052144d1fd96 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: flit 3.11.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/entry_points.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..abc3eae3b3bc573957cf7401711948799b3465c0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/entry_points.txt @@ -0,0 +1,3 @@ +[babel.extractors] +jinja2=jinja2.ext:babel_extract[i18n] + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..f1d57c56836aa1e8fe40a16105209bcdb92e5436 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/METADATA @@ -0,0 +1,103 @@ +Metadata-Version: 2.4 +Name: lxml +Version: 6.0.2 +Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API. +Home-page: https://lxml.de/ +Author: lxml dev team +Author-email: lxml@lxml.de +Maintainer: lxml dev team +Maintainer-email: lxml@lxml.de +License: BSD-3-Clause +Project-URL: Source, https://github.com/lxml/lxml +Project-URL: Bug Tracker, https://bugs.launchpad.net/lxml +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: Programming Language :: Cython +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3.14 +Classifier: Programming Language :: C +Classifier: Operating System :: OS Independent +Classifier: Topic :: Text Processing :: Markup :: HTML +Classifier: Topic :: Text Processing :: Markup :: XML +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=3.8 +License-File: LICENSE.txt +License-File: LICENSES.txt +Provides-Extra: source +Provides-Extra: cssselect +Requires-Dist: cssselect>=0.7; extra == "cssselect" +Provides-Extra: html5 +Requires-Dist: html5lib; extra == "html5" +Provides-Extra: htmlsoup +Requires-Dist: BeautifulSoup4; extra == "htmlsoup" +Provides-Extra: html-clean +Requires-Dist: lxml_html_clean; extra == "html-clean" +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: home-page +Dynamic: license +Dynamic: license-file +Dynamic: maintainer +Dynamic: maintainer-email +Dynamic: project-url +Dynamic: provides-extra +Dynamic: requires-python +Dynamic: summary + +lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries. +It provides safe and convenient access to these libraries using the +ElementTree API. + +It extends the ElementTree API significantly to offer support for XPath, +RelaxNG, XML Schema, XSLT, C14N and much more. + +To contact the project, go to the `project home page `_ +or see our bug tracker at https://launchpad.net/lxml + +In case you want to use the current in-development version of lxml, +you can get it from the github repository at +https://github.com/lxml/lxml . Note that this requires Cython to +build the sources, see the build instructions on the project home page. + + +After an official release of a new stable series, bug fixes may become available at +https://github.com/lxml/lxml/tree/lxml-6.0 . +Running ``pip install https://github.com/lxml/lxml/archive/refs/heads/lxml-6.0.tar.gz`` +will install the unreleased branch state as soon as a maintenance branch has been established. +Note that this requires Cython to be installed at an appropriate version for the build. + +6.0.2 (2025-09-21) +================== + +Bugs fixed +---------- + +* LP#2125278: Compilation with libxml2 2.15.0 failed. + Original patch by Xi Ruoyao. + +* Setting ``decompress=True`` in the parser had no effect in libxml2 2.15. + +* Binary wheels on Linux and macOS use the library version libxml2 2.14.6. + See https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.14.6 + +* Test failures in libxml2 2.15.0 were fixed. + +Other changes +------------- + +* Binary wheels for Py3.9-3.11 on the ``riscv64`` architecture were added. + +* Error constants were updated to match libxml2 2.15.0. + +* Built using Cython 3.1.4. + + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..6a4df99696d36a86bad307cd7527634e57b321fc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/RECORD @@ -0,0 +1,204 @@ +lxml-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +lxml-6.0.2.dist-info/METADATA,sha256=0qIHkwlNTTMz4-c5e8ZnbbGgt_vpYZHCEoqXyckR95Q,3622 +lxml-6.0.2.dist-info/RECORD,, +lxml-6.0.2.dist-info/WHEEL,sha256=1rk9WkINO5IYd_dGyocTHV6htge3I27wu_Vax8WCadA,152 +lxml-6.0.2.dist-info/licenses/LICENSE.txt,sha256=j8K1aBM1FuRoRdIUeRet7uFkjnCumrXtbFQXr-9M6FU,1507 +lxml-6.0.2.dist-info/licenses/LICENSES.txt,sha256=QdSd1AaqDhVIptXyGjDWv2OLPNlutyid00jYPtLkA5I,1514 +lxml-6.0.2.dist-info/top_level.txt,sha256=NjD988wqaKq512nshNdLt-uDxsjkp4Bh51m6N-dhUrk,5 +lxml/ElementInclude.py,sha256=PSLeZFvCa76WHJulPLxcZXJtCI2-4dK2CtqPRiYOAQg,8560 +lxml/__init__.py,sha256=rgOcPyZUNBFL30ylxIxd8fHHWi6TwyIUCi8Av84XWwo,574 +lxml/__pycache__/ElementInclude.cpython-312.pyc,, +lxml/__pycache__/__init__.cpython-312.pyc,, +lxml/__pycache__/_elementpath.cpython-312.pyc,, +lxml/__pycache__/builder.cpython-312.pyc,, +lxml/__pycache__/cssselect.cpython-312.pyc,, +lxml/__pycache__/doctestcompare.cpython-312.pyc,, +lxml/__pycache__/pyclasslookup.cpython-312.pyc,, +lxml/__pycache__/sax.cpython-312.pyc,, +lxml/__pycache__/usedoctest.cpython-312.pyc,, +lxml/_elementpath.cpython-312-x86_64-linux-gnu.so,sha256=1mB7tnIOx_08TqlYHQQSYJX5SXE4lQZrrnexJZBuvi8,217352 +lxml/_elementpath.py,sha256=b80hM3ndAkTtRX6v54za3LkkAqCcd0700BbMPZHnTBU,10959 +lxml/apihelpers.pxi,sha256=9S6bzp-VKCUPZv0f6-el5PsbPFN4FJqSnMCIYilS0eU,63881 +lxml/builder.cpython-312-x86_64-linux-gnu.so,sha256=iSov_1syOR8dCLyAPsAlfGOkc67Yl1GX7I93Af993ZI,129080 +lxml/builder.py,sha256=KI1HxHTd4wJqqVfmTRtSbXBQdl2T-P36ih4hT-J3MNw,8485 +lxml/classlookup.pxi,sha256=Tax8Vhbm5C6UCjgmRFsYjW0pFHxIuTthH1MOgASDLgc,22435 +lxml/cleanup.pxi,sha256=ZNEpbv7qx_ICPzsxhCaMUHCOfiznOoZ_u3jlYXHAuh4,8454 +lxml/cssselect.py,sha256=_wZdX-B9p5MeIYABmENIYRWEkwXwX-7jO8Dkf-1rUZU,3306 +lxml/debug.pxi,sha256=KTcpR8-slUYvmIPbE35GoHDNTb-gjTEvD7bw6LltM4c,1125 +lxml/docloader.pxi,sha256=bYSZAxxbBEfVzfLXTUWFRfOyUTfV23L7i9hR2dgtSNY,5772 +lxml/doctestcompare.py,sha256=40EDnkwpcvW86qNa86990OXF42xdHaosSZoiBsEjkzU,17731 +lxml/dtd.pxi,sha256=IAKkmA4ZoC68sqAWcTqoS8jEGYcPQrVMCZgn4iLBYko,15281 +lxml/etree.cpython-312-x86_64-linux-gnu.so,sha256=4SybuGGBSJ2dF8AZo5PSuo8BaiLbT3eF8sofIH2RT_U,5395056 +lxml/etree.h,sha256=_NkGkD3C_jpE4UegvQ6Y32_ycTbUCLyOBz9xfWRPkug,9792 +lxml/etree.pyx,sha256=2qCb8ZNjsdoB0fUELYwAM4ldLQZWS5_gt-OxKEUM-vs,138014 +lxml/etree_api.h,sha256=dNCm28ubaVS8SbhLuxs9JvYWg41NoR_yD3qTRr7hliA,17372 +lxml/extensions.pxi,sha256=xKLad35EQgpsDhs07tw31aKJBBMWIK9rMc0JTXETAUA,32022 +lxml/html/ElementSoup.py,sha256=s_dLobLMuKn2DhexR-iDXdZrMFg1RjLy1feHsIeZMpw,320 +lxml/html/__init__.py,sha256=CC5WdsvSptZhr9MZya1qsL6JKVbviYdrHOhXrGhmORg,64425 +lxml/html/__pycache__/ElementSoup.cpython-312.pyc,, +lxml/html/__pycache__/__init__.cpython-312.pyc,, +lxml/html/__pycache__/_diffcommand.cpython-312.pyc,, +lxml/html/__pycache__/_difflib.cpython-312.pyc,, +lxml/html/__pycache__/_html5builder.cpython-312.pyc,, +lxml/html/__pycache__/_setmixin.cpython-312.pyc,, +lxml/html/__pycache__/builder.cpython-312.pyc,, +lxml/html/__pycache__/clean.cpython-312.pyc,, +lxml/html/__pycache__/defs.cpython-312.pyc,, +lxml/html/__pycache__/diff.cpython-312.pyc,, +lxml/html/__pycache__/formfill.cpython-312.pyc,, +lxml/html/__pycache__/html5parser.cpython-312.pyc,, +lxml/html/__pycache__/soupparser.cpython-312.pyc,, +lxml/html/__pycache__/usedoctest.cpython-312.pyc,, +lxml/html/_diffcommand.py,sha256=kz_7EP9PmYWuczlZcGiw74_rG0eTKvQ2lrO0rkiwlYE,2081 +lxml/html/_difflib.cpython-312-x86_64-linux-gnu.so,sha256=XuPeciCf-4e7FpclT9B1viDjUaTJVJg4zkeEW_zXauo,570296 +lxml/html/_difflib.py,sha256=GgH_jVrZQC8tI8WV_lFZQsXFJ3mOTAPup1zjBJNvkPo,84954 +lxml/html/_html5builder.py,sha256=NLaT-Ev-aBgJpeQl-6ZbJChLZK5GV-znDkHOJD5VQC4,3230 +lxml/html/_setmixin.py,sha256=8IFIOLmVz0G-XzsD2tCEkSFWO-dgPBHgvHufC8ni67s,1188 +lxml/html/builder.py,sha256=Uz3r5uiuCdoN0UPa7ngoLMwAadVIhslzGvlRPGigY_M,6187 +lxml/html/clean.py,sha256=FghSJy4jt2RaBy6dgusowkU18hxpZ4XLE5ceCK9qxyA,503 +lxml/html/defs.py,sha256=l_6nh4DHvrsVyWVqWCUUx14QiahRyZv4Melqy_thf6Q,4250 +lxml/html/diff.cpython-312-x86_64-linux-gnu.so,sha256=iWcPoTRaf2StqEyPKB6xz1j15rvZDLvW_a-KwYLJLyY,377848 +lxml/html/diff.py,sha256=Za0By-yeYlQEjUu7m7xKB288kKiy8VBS5gT0RPOaFY0,32989 +lxml/html/formfill.py,sha256=umgk0BbkAI1W6q9musFbL-cDnI_aap2NsLBJqk0UmVI,9681 +lxml/html/html5parser.py,sha256=dnyC4cqHxywjZSzk0mu2L7THTZjxhg4yF4pncjusa_w,8634 +lxml/html/soupparser.py,sha256=xo8VvNeOEb-SChuXLKCRECh8J7HBiJLE9sAbEskoUUQ,10197 +lxml/html/usedoctest.py,sha256=tPlmVz4KK1GRKV5DJLrdVECeqsT9PlDzSqqTodVi5s0,249 +lxml/includes/__init__.pxd,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +lxml/includes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +lxml/includes/__pycache__/__init__.cpython-312.pyc,, +lxml/includes/c14n.pxd,sha256=DBQcOJ0c_YS245ohMb8fmuEC1kFyv1LrNY_8Mf-syZg,1110 +lxml/includes/config.pxd,sha256=H6Mrl8It21hzRI2hzMId9W48QqkYYkoLT4dniLNmdTw,96 +lxml/includes/dtdvalid.pxd,sha256=Nv0OykjYehv2lO-Zj--q6jS3TAC_dvQVPSgPMuse1NM,689 +lxml/includes/etree_defs.h,sha256=h_UjJTmNUqPyKNNrWB9hxmt6v4CF7_83XVY8dOfxqW0,14524 +lxml/includes/etreepublic.pxd,sha256=Bn4d3JkWPqXputXqI-eJ0xmPrwNFPTfDCa7axgjB7FM,10184 +lxml/includes/extlibs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +lxml/includes/extlibs/__pycache__/__init__.cpython-312.pyc,, +lxml/includes/extlibs/libcharset.h,sha256=GA0FumrbNI4VDGlzq3lf5CLaCwXgn4unw2l0btGQFwI,1510 +lxml/includes/extlibs/localcharset.h,sha256=Z_AagaQeq0aDE7NPsVOqEf4nO4KcUp46ggo4d0ONIOQ,6338 +lxml/includes/extlibs/zconf.h,sha256=ROVD_0UUx6mgHWSAGcLJqB0RBcv6PHfx-vbNhur6ir0,16464 +lxml/includes/extlibs/zlib.h,sha256=ilV5r3LqT0J_8ApBUPDMs_xcHkN59ybhARM7Grn8YAw,96829 +lxml/includes/htmlparser.pxd,sha256=9uASkP5dU7OE2lCOLT-z2e01qSbFlp4ehgwdostF_qk,2802 +lxml/includes/libexslt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +lxml/includes/libexslt/__pycache__/__init__.cpython-312.pyc,, +lxml/includes/libexslt/exslt.h,sha256=eSW5tMJAewSUANLqk7AGEiU8b2BbCNRyauHnez7nKSU,3114 +lxml/includes/libexslt/exsltconfig.h,sha256=QHxzEbRlv_h0USBvpr0Zrl0Muzlc71VCrvgR6lqnLEY,1172 +lxml/includes/libexslt/exsltexports.h,sha256=1Jm9KTXm2FUUJIZ6V6-Uw55yG0BMULX3_goyxDd2LL8,1077 +lxml/includes/libxml/HTMLparser.h,sha256=sU4xGqj-vBtEvzlxA3hBPWJboifvkc4F1hynKXmsl3k,9569 +lxml/includes/libxml/HTMLtree.h,sha256=Q7UBKFbQ8fx4d_dMnmR6ay8JmfOhopFkDp2B63YkLDU,3517 +lxml/includes/libxml/SAX.h,sha256=SFnG27EFrYGUB9HDL_xSIGBwEns5pl07rApXWThFZFM,386 +lxml/includes/libxml/SAX2.h,sha256=RfFP5o3le-Rg8bnA2GW7L7L9_pfXCs3TieODcv1DTWY,4240 +lxml/includes/libxml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +lxml/includes/libxml/__pycache__/__init__.cpython-312.pyc,, +lxml/includes/libxml/c14n.h,sha256=BSBXw6nIZutC8mWvbRrLLmoWjw3wRt-nM93vjXGMCm8,2742 +lxml/includes/libxml/catalog.h,sha256=H9ssTCaBjtDqc-AZqCk1R7h8F2iD9szqLjJyHpaczXg,4633 +lxml/includes/libxml/chvalid.h,sha256=TZcceNp6Cw0QlYwIqK9GxyYqL5UiAjpQyjt_yrZGTQE,5087 +lxml/includes/libxml/debugXML.h,sha256=XXRNI39gJW7bGRC4SzE4ad-SJ906BsUGz3AwOtkKuS4,1667 +lxml/includes/libxml/dict.h,sha256=SweaPGMtTTf4je6dNTIoEzcfEvpsAT9_PhR7FC0K-rQ,1770 +lxml/includes/libxml/encoding.h,sha256=haL7ratww2wkIERGmtwUqU2BbTVe52FZFU7MmrOpsPk,9623 +lxml/includes/libxml/entities.h,sha256=LEOCA826-0f8dhRJzC_2hvUVsSH7lKQjrea9hSTdBbo,4419 +lxml/includes/libxml/globals.h,sha256=NH8zyRI5cXJJGp5k2aLxOm-reJEGOFX6LYP82GBXRlY,583 +lxml/includes/libxml/hash.h,sha256=KIIpAYKBfGUU3ydWhGehUyfuauZz_Ps0gyambzQo_rc,7017 +lxml/includes/libxml/list.h,sha256=oh7iJNQajRA_cHsNk9CcFPYkaW2smf4J_MpedPPjC4k,3128 +lxml/includes/libxml/nanoftp.h,sha256=22PBtWhJueYLFvwukt4oFooRct_xJA83hbluHRBNXUM,302 +lxml/includes/libxml/nanohttp.h,sha256=bLbzYjAyAKmP3ComMOPH6XaUImu6bNAESF1HrVtRve0,2124 +lxml/includes/libxml/parser.h,sha256=Uq7-ce55UUAsvo4n6CiBlNQpmowewvWhOsQtgGM1UQ8,48498 +lxml/includes/libxml/parserInternals.h,sha256=8_Wr6UgRzm8BRn1RPLxyBkw6BagAdDvVqMA_e181_EI,14539 +lxml/includes/libxml/relaxng.h,sha256=VXZ74r5Yja06KqypdBHc8neDwPxQ2aMrsWHSdRt5oi4,5991 +lxml/includes/libxml/schemasInternals.h,sha256=V8M4In3zf24EX55Yt4dcfxwp7NpHGYViKnLKwtyrPJ4,26233 +lxml/includes/libxml/schematron.h,sha256=8EhPDhvtlMxl9e0C5rSbEruOvzJS5BC_OOFbq9RXZnY,4255 +lxml/includes/libxml/threads.h,sha256=mT3CgK4lXK7-NDnUOFXqYuCK6fyY70S3BsHF-TnT45k,1619 +lxml/includes/libxml/tree.h,sha256=zTRLt6h5x6ApyeXgs90CKQZSAl2hKm7b5NxtPKUQFAE,36106 +lxml/includes/libxml/uri.h,sha256=J9teJHme5z883c4twF5oImEYY-E3xSvhdSGpyRVtvIg,2855 +lxml/includes/libxml/valid.h,sha256=By61IbPvk_eLux7a8x0mOaly7oclFaSGaFE8b2xZcUE,13226 +lxml/includes/libxml/xinclude.h,sha256=K3I5jhw2zAMj26LuRNZc15Bwv2JE2hWxwVn4TCqv2b4,3258 +lxml/includes/libxml/xlink.h,sha256=TVLOkISrcKDelo9n_XIUyPiStDYa8NxuF2dz70aBFCI,5062 +lxml/includes/libxml/xmlIO.h,sha256=FvbuMYTy1-S5PScabE03wz0oWKf626pmXvOPZNuLm-w,11948 +lxml/includes/libxml/xmlautomata.h,sha256=7Sc3YgPz1ZIBKCHPSxs5oAwJEZWQ1RT2kyUw85pUtmU,4004 +lxml/includes/libxml/xmlerror.h,sha256=mMfltMxUza6kiSBfP2QfnY3UlMP_rEXKfX0wruBLl4A,37561 +lxml/includes/libxml/xmlexports.h,sha256=IyV3AMeQVbOl0wkjlnNX4B8WUZ-5GNKQmxZc6-maWUU,2025 +lxml/includes/libxml/xmlmemory.h,sha256=m7wGvVMxNzZiuOAo3vkjxaVWstc8aQLzb6obbjPsebE,4658 +lxml/includes/libxml/xmlmodule.h,sha256=ERUHUmDdZRmh6NjLYWUpse51rLWR8qNjPHOtdgmlLF0,1198 +lxml/includes/libxml/xmlreader.h,sha256=BAHinlSOTXX3DEax9BniaIIPAXJyLGfzym9R-27LCcU,12387 +lxml/includes/libxml/xmlregexp.h,sha256=_q6C1XRy8DS3kSmLbEKpvkKQciTgjTJgGc_zUQ6m22M,2632 +lxml/includes/libxml/xmlsave.h,sha256=zcEQr9sO5CsFrnoOLshhdsqMEr8k4AeFhbkYyNfO9Fs,2934 +lxml/includes/libxml/xmlschemas.h,sha256=5AfLnYUcfmxHRzg0dVpdHig--4ui1-XDwDgpKGDKCiU,7067 +lxml/includes/libxml/xmlschemastypes.h,sha256=MYwlGmoKAo3lHRaaKgnCXiLmPT9KRjdxyCJ7TEyZ6jM,4583 +lxml/includes/libxml/xmlstring.h,sha256=d5PpqxP1I1sfmCUHvVJtjoC9h7hLHcAAQ5ok_Rtf50I,5271 +lxml/includes/libxml/xmlunicode.h,sha256=8sq3wEW2AiyTCuc3ZceOEkce7lfrI7VnkRfwEQgc6pU,278 +lxml/includes/libxml/xmlversion.h,sha256=oVpaE_xbttaeZNFKSuSfcLOceWz7LQgKP71Z1msXZNo,5112 +lxml/includes/libxml/xmlwriter.h,sha256=BEUwYNKx3xymDE9vepksEK7yVq9SXYm1d2pQnzlPy90,20688 +lxml/includes/libxml/xpath.h,sha256=CQv6X_pRhuXoCVpqoDXYB7FfusLK7AuPxCNigwhNYAA,16156 +lxml/includes/libxml/xpathInternals.h,sha256=mc9B5tdpfssyz_NPUzww6dKuWCtBybBiBRJkTe4AE4U,18504 +lxml/includes/libxml/xpointer.h,sha256=DAxMsfPp2SSZgXFrPbxBA84RwTMRf35Qg_LBbUzPQhA,1026 +lxml/includes/libxslt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +lxml/includes/libxslt/__pycache__/__init__.cpython-312.pyc,, +lxml/includes/libxslt/attributes.h,sha256=qKwzfGf7r89esLC65s96iYJWRA-s-Ezss2_V6Mmo1hk,957 +lxml/includes/libxslt/documents.h,sha256=kBihgH5pqRvFalhm_fOFHtJTFhTpBcm681yT5dxgwfw,2704 +lxml/includes/libxslt/extensions.h,sha256=W5UMyJqUP_1zt6sXZ0mgc0gAIwDJrZ8gjByhyrWqvd8,6899 +lxml/includes/libxslt/extra.h,sha256=6X3Wu3NdPtrlqz-Koo7dB-rccnnszi6j3zg599gTByg,1640 +lxml/includes/libxslt/functions.h,sha256=fc4CZj-9KeBHzO9-WWU_bNqmaEZAz3n7NNwClIBXk14,1972 +lxml/includes/libxslt/imports.h,sha256=18kIjoGqdFXR63Ce3ZtzxsTiYV3XGKpchYakMUPDuUI,1840 +lxml/includes/libxslt/keys.h,sha256=16v25VEluS7jYhgg6gYFwVxgGMn-1ctnlhhWWT4RcBY,1155 +lxml/includes/libxslt/namespaces.h,sha256=VofSn2Kkn-a5JyRKCmY3jPp7amQy3n09vzy0KUQt4q0,1666 +lxml/includes/libxslt/numbersInternals.h,sha256=Eg5gYZ5p3h0_e5wyI61S-0E6_ArVJzv0yr63j6BU2fc,2019 +lxml/includes/libxslt/pattern.h,sha256=tJ-BPfs9UYgiZMMoQZbhij3g7xVppYq7TrrOu25eR7Q,2110 +lxml/includes/libxslt/preproc.h,sha256=D_LjEdHhsdyBnEAvflnwFgoR4hGUb72kgEhXkkmPRsw,896 +lxml/includes/libxslt/security.h,sha256=fUD1cy_WxFCTvTNAF0WOQIU4p5CNWn1LHFyZJd-Fx5U,2652 +lxml/includes/libxslt/templates.h,sha256=bnt6Jqui6KU5pNUdMNPbQZkZ5d-VTWqC0TMGkOlVoIo,2268 +lxml/includes/libxslt/transform.h,sha256=ICT7meUV0OTAx27WaKVrKj-aUmR9LSpTNaOAJd2UStg,6311 +lxml/includes/libxslt/variables.h,sha256=cQAgPe4QCcK2uKbWg7Iz-9peM9xWGm7m3M6jQm0sjIA,3143 +lxml/includes/libxslt/xslt.h,sha256=wmFx2Q31Pd8Iq2phAQpY9J3QQatb8lWg3gABtqKFgEw,1964 +lxml/includes/libxslt/xsltInternals.h,sha256=2EbEKYmnYZq0HjGnUMAlpqnqZJurRXzjlgk5Js1WYaY,57949 +lxml/includes/libxslt/xsltconfig.h,sha256=cV5scdRK6xmOHeOg3OCw6hBfcQ_nrtNs_tKefX67304,2910 +lxml/includes/libxslt/xsltexports.h,sha256=1-luH-0bCIgBAlKAXhV-dqHBfwOAQNDamiYbxIlTf0k,1124 +lxml/includes/libxslt/xsltlocale.h,sha256=ppxGEmJfZIJgwRQzCM0_77p9WNekEWq1NrdYZrQl4IE,942 +lxml/includes/libxslt/xsltutils.h,sha256=1eguYgR9-jeNOVlBUktHboaq-VLX6JXraO80TfbARKM,9085 +lxml/includes/lxml-version.h,sha256=KZfk_lJnXSnxkyRdUV5taHsWJe4xbC6UEYfYldlfouI,71 +lxml/includes/relaxng.pxd,sha256=HzHlQ6mCcf_tj_JZ9NAVJTVAv8ScCkE8Ifq15y3bS0c,2615 +lxml/includes/schematron.pxd,sha256=Hob7xh-K-MKqp7WiG8thMagf5EkQzmgfi4ds0EF91JA,1604 +lxml/includes/tree.pxd,sha256=XApzMRy_LSqCtQ-OTS-vNSW7CT_OWstybfIT2H84LsA,20179 +lxml/includes/uri.pxd,sha256=3vOXw6AbSPxAM9uo71T1qnfx-wd9ezXLDQtWsb2zX0I,145 +lxml/includes/xinclude.pxd,sha256=CuO_XZNB6E2JK1qXXWn11APrjFQV5kA6SMyb77WZn0A,804 +lxml/includes/xmlerror.pxd,sha256=OQqayytkV0NigAPbsQCCcvmy7luRe0XhVzpTdzJjP3g,58837 +lxml/includes/xmlparser.pxd,sha256=eDGyU5kZyNVksK0dUhMIi7rnE-LSevXsqyl72v99Ess,13730 +lxml/includes/xmlschema.pxd,sha256=OLZPd2WDJyopiXJJyo-dAyyYHaeSYFiMAI4tqIiv-Ik,1702 +lxml/includes/xpath.pxd,sha256=e8-ZYUbRG7N1mHETAlknJ_QqAteOosrYLRgpH-OsTkg,5603 +lxml/includes/xslt.pxd,sha256=4yl3pOu7pAvsx5Tc-W4IWCoB8wgtSSR62HI1jqu6jko,8241 +lxml/isoschematron/__init__.py,sha256=uauerYeKTlWFCJSqieIHhF5l6rYV2myeEJ0Imd1LzRc,13274 +lxml/isoschematron/__pycache__/__init__.cpython-312.pyc,, +lxml/isoschematron/resources/rng/iso-schematron.rng,sha256=VsWxPyi3iViJDDbjJJw0wWkEHkLrz9zoCA8zJLor9N4,18337 +lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl,sha256=ObebsB8Wt-d3uIA_U5NU85TpnQ3PxPX38TdOAqosMac,3172 +lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl,sha256=QweRrIIM-zFcgg98GXA2CaWfIbgVE0XKEeYSfvv67A0,4563 +lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl,sha256=xSZ_Ekq_I-62ZpiE5AqYYHwFW_qh855zt9V4_s7rbkY,11703 +lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl,sha256=x42QJ-dxQ1waPzydsCoQnp2Xj15y53nW43O7BuoDRHk,39957 +lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl,sha256=Tr9BnO6pzjVWwhqJfm10UlvAy95EgfSCz2iMlrVGT6Q,2015 +lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl,sha256=ue8q_88X4e_jsJizo31GRNBxNhdxkEE9fY20oq0Iqwk,71764 +lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl,sha256=BBAdsVSi5zAzeGepuN6gS1saQINDqITXKplmmj4dTWg,20382 +lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt,sha256=OGLiFswuLJEW5EPYKOeoauuCJFEtVa6jyzBE1OcJI98,3310 +lxml/iterparse.pxi,sha256=JXvYhSOCaRjT_hYbRGMlJt2rlqx0TiRpN4FE1jQc63w,16521 +lxml/lxml.etree.h,sha256=_NkGkD3C_jpE4UegvQ6Y32_ycTbUCLyOBz9xfWRPkug,9792 +lxml/lxml.etree_api.h,sha256=dAbJPd53D_9CIGzePAUB3otgyhG4o2cSdA4-6apdzRA,17377 +lxml/nsclasses.pxi,sha256=5pzNBhBtlqObPdThL9QIGRs1Dxj1qnr0PyXuTCURqTg,9129 +lxml/objectify.cpython-312-x86_64-linux-gnu.so,sha256=TYF3CoGF-cenIwFh_1nY0sr2UI2wdsS8tZO2Wi0evyg,2933112 +lxml/objectify.pyx,sha256=I4bQQXmQssBtk5bTrid-eVURBLKRTM5iQZiviugIrts,75823 +lxml/objectpath.pxi,sha256=s5TNG2-EbaWWKLFAiX303B95zK_Ui8ausB__3QvFFGw,11450 +lxml/parser.pxi,sha256=VZfychEJ3-XPE3x6oGOEzn6HVAr74R7lXfDSVF-hq-U,85411 +lxml/parsertarget.pxi,sha256=v1PidxRaG5giwXcTDkpBI7PDFmsZuOcK0y9LdkQaY8M,6326 +lxml/proxy.pxi,sha256=8IVvYF2KTuzl7Hb3XGHEmcxfSLbUZkA2Q1Y50hLsyzE,23929 +lxml/public-api.pxi,sha256=XoP6_cJOEoQIItvE1RiYCKYD1ry4AobaOr4XLo0KSE4,6666 +lxml/pyclasslookup.py,sha256=gLD1HM2HtITYYiGzjEOewSwbB7XkVx_NZv_quCt79Oc,92 +lxml/readonlytree.pxi,sha256=ddRYczhHieJ4XUvWvTPW9N9oQ8vuKtv7lC1mtE1qvH8,18976 +lxml/relaxng.pxi,sha256=3OQ-fZMzP-KF5vM6HTozT_9ee3J0DJnpj9RcHC8LoMw,6339 +lxml/sax.cpython-312-x86_64-linux-gnu.so,sha256=UQn-l56AOOT5UUJ395Fil7It-Im_brnlsMYfmUpwQe0,190272 +lxml/sax.py,sha256=yrNvKD6rlon48jrR-1qpFXER8j4psYC2R5yt0u6TWLs,9706 +lxml/saxparser.pxi,sha256=TmkdM5h9xII9iKRaBk_1NGk2KTfeesl5Ha8bpFQGqLc,33529 +lxml/schematron.pxi,sha256=F2OHKZUl57-byBk_wWtPTnHZ1fwlj0FtwG3VuGtG-UY,6064 +lxml/serializer.pxi,sha256=iIXfechFHfvFs2sTk7wMIy3sDJxmaMPbNO33mkLLBUE,68063 +lxml/usedoctest.py,sha256=qRgZKQVcAZcl-zN0AIXVJnOsETUXz2nPXkxuzs1lGgk,230 +lxml/xinclude.pxi,sha256=7eBrI_OK47mmrHQ0ixbixRI8pKqQ1nwkMV-OmKUVlD4,2456 +lxml/xmlerror.pxi,sha256=i1kR42WB2BAxtrmh7m2ADlH-jffVQ-blW3pW0Ps4s-g,50061 +lxml/xmlid.pxi,sha256=5zf9oR6bsCtavGiOmilNyHqYwgG_bnrIabSd2SURtm0,6073 +lxml/xmlschema.pxi,sha256=mumNoHni5S3BQPtcmOHRd61KRaVWu4eOie2wQeB0e6E,8490 +lxml/xpath.pxi,sha256=aqW24V817dUxps4Gnc8h7Tm3QVlITKvxU5_9WgJUIFg,19132 +lxml/xslt.pxi,sha256=wxdbuvNFVA8eP57tHmBYWER__ceFhf6HGdsbBHbx_0A,36315 +lxml/xsltext.pxi,sha256=TImDiAPlAezC07P7RY1N9YChA7AuKFH-G53hXdel9yc,11088 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..7ec38e9cfde4b6cba61ce79b6423605719654196 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/WHEEL @@ -0,0 +1,6 @@ +Wheel-Version: 1.0 +Generator: setuptools (80.9.0) +Root-Is-Purelib: false +Tag: cp312-cp312-manylinux_2_26_x86_64 +Tag: cp312-cp312-manylinux_2_28_x86_64 + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/top_level.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..ab90481d5d75200bdc779014d93d69ff85bf9742 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/top_level.txt @@ -0,0 +1 @@ +lxml diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/License.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/License.txt new file mode 100644 index 0000000000000000000000000000000000000000..b491c70e0aef319022ded661e111ddbd45b8a17f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/License.txt @@ -0,0 +1,1568 @@ +End User License Agreement +-------------------------- + + +Preface +------- + +The Software License Agreement in Chapter 1 and the Supplement +in Chapter 2 contain license terms and conditions that govern +the use of NVIDIA software. By accepting this agreement, you +agree to comply with all the terms and conditions applicable +to the product(s) included herein. + + +NVIDIA Driver + + +Description + +This package contains the operating system driver and +fundamental system software components for NVIDIA GPUs. + + +NVIDIA CUDA Toolkit + + +Description + +The NVIDIA CUDA Toolkit provides command-line and graphical +tools for building, debugging and optimizing the performance +of applications accelerated by NVIDIA GPUs, runtime and math +libraries, and documentation including programming guides, +user manuals, and API references. + + +Default Install Location of CUDA Toolkit + +Windows platform: + +%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.# + +Linux platform: + +/usr/local/cuda-#.# + +Mac platform: + +/Developer/NVIDIA/CUDA-#.# + + +NVIDIA CUDA Samples + + +Description + +This package includes over 100+ CUDA examples that demonstrate +various CUDA programming principles, and efficient CUDA +implementation of algorithms in specific application domains. + + +Default Install Location of CUDA Samples + +Windows platform: + +%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.# + +Linux platform: + +/usr/local/cuda-#.#/samples + +and + +$HOME/NVIDIA_CUDA-#.#_Samples + +Mac platform: + +/Developer/NVIDIA/CUDA-#.#/samples + + +NVIDIA Nsight Visual Studio Edition (Windows only) + + +Description + +NVIDIA Nsight Development Platform, Visual Studio Edition is a +development environment integrated into Microsoft Visual +Studio that provides tools for debugging, profiling, analyzing +and optimizing your GPU computing and graphics applications. + + +Default Install Location of Nsight Visual Studio Edition + +Windows platform: + +%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.# + + +1. License Agreement for NVIDIA Software Development Kits +--------------------------------------------------------- + + +Release Date: July 26, 2018 +--------------------------- + + +Important NoticeRead before downloading, installing, +copying or using the licensed software: +------------------------------------------------------- + +This license agreement, including exhibits attached +("Agreement”) is a legal agreement between you and NVIDIA +Corporation ("NVIDIA") and governs your use of a NVIDIA +software development kit (“SDK”). + +Each SDK has its own set of software and materials, but here +is a description of the types of items that may be included in +a SDK: source code, header files, APIs, data sets and assets +(examples include images, textures, models, scenes, videos, +native API input/output files), binary software, sample code, +libraries, utility programs, programming code and +documentation. + +This Agreement can be accepted only by an adult of legal age +of majority in the country in which the SDK is used. + +If you are entering into this Agreement on behalf of a company +or other legal entity, you represent that you have the legal +authority to bind the entity to this Agreement, in which case +“you” will mean the entity you represent. + +If you don’t have the required age or authority to accept +this Agreement, or if you don’t accept all the terms and +conditions of this Agreement, do not download, install or use +the SDK. + +You agree to use the SDK only for purposes that are permitted +by (a) this Agreement, and (b) any applicable law, regulation +or generally accepted practices or guidelines in the relevant +jurisdictions. + + +1.1. License + + +1.1.1. License Grant + +Subject to the terms of this Agreement, NVIDIA hereby grants +you a non-exclusive, non-transferable license, without the +right to sublicense (except as expressly provided in this +Agreement) to: + + 1. Install and use the SDK, + + 2. Modify and create derivative works of sample source code + delivered in the SDK, and + + 3. Distribute those portions of the SDK that are identified + in this Agreement as distributable, as incorporated in + object code format into a software application that meets + the distribution requirements indicated in this Agreement. + + +1.1.2. Distribution Requirements + +These are the distribution requirements for you to exercise +the distribution grant: + + 1. Your application must have material additional + functionality, beyond the included portions of the SDK. + + 2. The distributable portions of the SDK shall only be + accessed by your application. + + 3. The following notice shall be included in modifications + and derivative works of sample source code distributed: + “This software contains source code provided by NVIDIA + Corporation.” + + 4. Unless a developer tool is identified in this Agreement + as distributable, it is delivered for your internal use + only. + + 5. The terms under which you distribute your application + must be consistent with the terms of this Agreement, + including (without limitation) terms relating to the + license grant and license restrictions and protection of + NVIDIA’s intellectual property rights. Additionally, you + agree that you will protect the privacy, security and + legal rights of your application users. + + 6. You agree to notify NVIDIA in writing of any known or + suspected distribution or use of the SDK not in compliance + with the requirements of this Agreement, and to enforce + the terms of your agreements with respect to distributed + SDK. + + +1.1.3. Authorized Users + +You may allow employees and contractors of your entity or of +your subsidiary(ies) to access and use the SDK from your +secure network to perform work on your behalf. + +If you are an academic institution you may allow users +enrolled or employed by the academic institution to access and +use the SDK from your secure network. + +You are responsible for the compliance with the terms of this +Agreement by your authorized users. If you become aware that +your authorized users didn’t follow the terms of this +Agreement, you agree to take reasonable steps to resolve the +non-compliance and prevent new occurrences. + + +1.1.4. Pre-Release SDK + +The SDK versions identified as alpha, beta, preview or +otherwise as pre-release, may not be fully functional, may +contain errors or design flaws, and may have reduced or +different security, privacy, accessibility, availability, and +reliability standards relative to commercial versions of +NVIDIA software and materials. Use of a pre-release SDK may +result in unexpected results, loss of data, project delays or +other unpredictable damage or loss. + +You may use a pre-release SDK at your own risk, understanding +that pre-release SDKs are not intended for use in production +or business-critical systems. + +NVIDIA may choose not to make available a commercial version +of any pre-release SDK. NVIDIA may also choose to abandon +development and terminate the availability of a pre-release +SDK at any time without liability. + + +1.1.5. Updates + +NVIDIA may, at its option, make available patches, workarounds +or other updates to this SDK. Unless the updates are provided +with their separate governing terms, they are deemed part of +the SDK licensed to you as provided in this Agreement. You +agree that the form and content of the SDK that NVIDIA +provides may change without prior notice to you. While NVIDIA +generally maintains compatibility between versions, NVIDIA may +in some cases make changes that introduce incompatibilities in +future versions of the SDK. + + +1.1.6. Third Party Licenses + +The SDK may come bundled with, or otherwise include or be +distributed with, third party software licensed by a NVIDIA +supplier and/or open source software provided under an open +source license. Use of third party software is subject to the +third-party license terms, or in the absence of third party +terms, the terms of this Agreement. Copyright to third party +software is held by the copyright holders indicated in the +third-party software or license. + + +1.1.7. Reservation of Rights + +NVIDIA reserves all rights, title, and interest in and to the +SDK, not expressly granted to you under this Agreement. + + +1.2. Limitations + +The following license limitations apply to your use of the +SDK: + + 1. You may not reverse engineer, decompile or disassemble, + or remove copyright or other proprietary notices from any + portion of the SDK or copies of the SDK. + + 2. Except as expressly provided in this Agreement, you may + not copy, sell, rent, sublicense, transfer, distribute, + modify, or create derivative works of any portion of the + SDK. For clarity, you may not distribute or sublicense the + SDK as a stand-alone product. + + 3. Unless you have an agreement with NVIDIA for this + purpose, you may not indicate that an application created + with the SDK is sponsored or endorsed by NVIDIA. + + 4. You may not bypass, disable, or circumvent any + encryption, security, digital rights management or + authentication mechanism in the SDK. + + 5. You may not use the SDK in any manner that would cause it + to become subject to an open source software license. As + examples, licenses that require as a condition of use, + modification, and/or distribution that the SDK be: + + a. Disclosed or distributed in source code form; + + b. Licensed for the purpose of making derivative works; + or + + c. Redistributable at no charge. + + 6. Unless you have an agreement with NVIDIA for this + purpose, you may not use the SDK with any system or + application where the use or failure of the system or + application can reasonably be expected to threaten or + result in personal injury, death, or catastrophic loss. + Examples include use in avionics, navigation, military, + medical, life support or other life critical applications. + NVIDIA does not design, test or manufacture the SDK for + these critical uses and NVIDIA shall not be liable to you + or any third party, in whole or in part, for any claims or + damages arising from such uses. + + 7. You agree to defend, indemnify and hold harmless NVIDIA + and its affiliates, and their respective employees, + contractors, agents, officers and directors, from and + against any and all claims, damages, obligations, losses, + liabilities, costs or debt, fines, restitutions and + expenses (including but not limited to attorney’s fees + and costs incident to establishing the right of + indemnification) arising out of or related to your use of + the SDK outside of the scope of this Agreement, or not in + compliance with its terms. + + +1.3. Ownership + + 1. NVIDIA or its licensors hold all rights, title and + interest in and to the SDK and its modifications and + derivative works, including their respective intellectual + property rights, subject to your rights described in this + section. This SDK may include software and materials from + NVIDIA’s licensors, and these licensors are intended + third party beneficiaries that may enforce this Agreement + with respect to their intellectual property rights. + + 2. You hold all rights, title and interest in and to your + applications and your derivative works of the sample + source code delivered in the SDK, including their + respective intellectual property rights, subject to + NVIDIA’s rights described in this section. + + 3. You may, but don’t have to, provide to NVIDIA + suggestions, feature requests or other feedback regarding + the SDK, including possible enhancements or modifications + to the SDK. For any feedback that you voluntarily provide, + you hereby grant NVIDIA and its affiliates a perpetual, + non-exclusive, worldwide, irrevocable license to use, + reproduce, modify, license, sublicense (through multiple + tiers of sublicensees), and distribute (through multiple + tiers of distributors) it without the payment of any + royalties or fees to you. NVIDIA will use feedback at its + choice. NVIDIA is constantly looking for ways to improve + its products, so you may send feedback to NVIDIA through + the developer portal at https://developer.nvidia.com. + + +1.4. No Warranties + +THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL +FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND +ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND +OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING, +BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE +ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO +WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF +DEALING OR COURSE OF TRADE. + + +1.5. Limitation of Liability + +TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS +AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL, +PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS +OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF +PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION +WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK, +WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH +OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE), +PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF +LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES +TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS +AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE +NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS +LIMIT. + +These exclusions and limitations of liability shall apply +regardless if NVIDIA or its affiliates have been advised of +the possibility of such damages, and regardless of whether a +remedy fails its essential purpose. These exclusions and +limitations of liability form an essential basis of the +bargain between the parties, and, absent any of these +exclusions or limitations of liability, the provisions of this +Agreement, including, without limitation, the economic terms, +would be substantially different. + + +1.6. Termination + + 1. This Agreement will continue to apply until terminated by + either you or NVIDIA as described below. + + 2. If you want to terminate this Agreement, you may do so by + stopping to use the SDK. + + 3. NVIDIA may, at any time, terminate this Agreement if: + + a. (i) you fail to comply with any term of this + Agreement and the non-compliance is not fixed within + thirty (30) days following notice from NVIDIA (or + immediately if you violate NVIDIA’s intellectual + property rights); + + b. (ii) you commence or participate in any legal + proceeding against NVIDIA with respect to the SDK; or + + c. (iii) NVIDIA decides to no longer provide the SDK in + a country or, in NVIDIA’s sole discretion, the + continued use of it is no longer commercially viable. + + 4. Upon any termination of this Agreement, you agree to + promptly discontinue use of the SDK and destroy all copies + in your possession or control. Your prior distributions in + accordance with this Agreement are not affected by the + termination of this Agreement. Upon written request, you + will certify in writing that you have complied with your + commitments under this section. Upon any termination of + this Agreement all provisions survive except for the + license grant provisions. + + +1.7. General + +If you wish to assign this Agreement or your rights and +obligations, including by merger, consolidation, dissolution +or operation of law, contact NVIDIA to ask for permission. Any +attempted assignment not approved by NVIDIA in writing shall +be void and of no effect. NVIDIA may assign, delegate or +transfer this Agreement and its rights and obligations, and if +to a non-affiliate you will be notified. + +You agree to cooperate with NVIDIA and provide reasonably +requested information to verify your compliance with this +Agreement. + +This Agreement will be governed in all respects by the laws of +the United States and of the State of Delaware as those laws +are applied to contracts entered into and performed entirely +within Delaware by Delaware residents, without regard to the +conflicts of laws principles. The United Nations Convention on +Contracts for the International Sale of Goods is specifically +disclaimed. You agree to all terms of this Agreement in the +English language. + +The state or federal courts residing in Santa Clara County, +California shall have exclusive jurisdiction over any dispute +or claim arising out of this Agreement. Notwithstanding this, +you agree that NVIDIA shall still be allowed to apply for +injunctive remedies or an equivalent type of urgent legal +relief in any jurisdiction. + +If any court of competent jurisdiction determines that any +provision of this Agreement is illegal, invalid or +unenforceable, such provision will be construed as limited to +the extent necessary to be consistent with and fully +enforceable under the law and the remaining provisions will +remain in full force and effect. Unless otherwise specified, +remedies are cumulative. + +Each party acknowledges and agrees that the other is an +independent contractor in the performance of this Agreement. + +The SDK has been developed entirely at private expense and is +“commercial items” consisting of “commercial computer +software” and “commercial computer software +documentation” provided with RESTRICTED RIGHTS. Use, +duplication or disclosure by the U.S. Government or a U.S. +Government subcontractor is subject to the restrictions in +this Agreement pursuant to DFARS 227.7202-3(a) or as set forth +in subparagraphs (c)(1) and (2) of the Commercial Computer +Software - Restricted Rights clause at FAR 52.227-19, as +applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas +Expressway, Santa Clara, CA 95051. + +The SDK is subject to United States export laws and +regulations. You agree that you will not ship, transfer or +export the SDK into any country, or use the SDK in any manner, +prohibited by the United States Bureau of Industry and +Security or economic sanctions regulations administered by the +U.S. Department of Treasury’s Office of Foreign Assets +Control (OFAC), or any applicable export laws, restrictions or +regulations. These laws include restrictions on destinations, +end users and end use. By accepting this Agreement, you +confirm that you are not a resident or citizen of any country +currently embargoed by the U.S. and that you are not otherwise +prohibited from receiving the SDK. + +Any notice delivered by NVIDIA to you under this Agreement +will be delivered via mail, email or fax. You agree that any +notices that NVIDIA sends you electronically will satisfy any +legal communication requirements. Please direct your legal +notices or other correspondence to NVIDIA Corporation, 2788 +San Tomas Expressway, Santa Clara, California 95051, United +States of America, Attention: Legal Department. + +This Agreement and any exhibits incorporated into this +Agreement constitute the entire agreement of the parties with +respect to the subject matter of this Agreement and supersede +all prior negotiations or documentation exchanged between the +parties relating to this SDK license. Any additional and/or +conflicting terms on documents issued by you are null, void, +and invalid. Any amendment or waiver under this Agreement +shall be in writing and signed by representatives of both +parties. + + +2. CUDA Toolkit Supplement to Software License Agreement for +NVIDIA Software Development Kits +------------------------------------------------------------ + + +Release date: August 16, 2018 +----------------------------- + +The terms in this supplement govern your use of the NVIDIA +CUDA Toolkit SDK under the terms of your license agreement +(“Agreement”) as modified by this supplement. Capitalized +terms used but not defined below have the meaning assigned to +them in the Agreement. + +This supplement is an exhibit to the Agreement and is +incorporated as an integral part of the Agreement. In the +event of conflict between the terms in this supplement and the +terms in the Agreement, the terms in this supplement govern. + + +2.1. License Scope + +The SDK is licensed for you to develop applications only for +use in systems with NVIDIA GPUs. + + +2.2. Distribution + +The portions of the SDK that are distributable under the +Agreement are listed in Attachment A. + + +2.3. Operating Systems + +Those portions of the SDK designed exclusively for use on the +Linux or FreeBSD operating systems, or other operating systems +derived from the source code to these operating systems, may +be copied and redistributed for use in accordance with this +Agreement, provided that the object code files are not +modified in any way (except for unzipping of compressed +files). + + +2.4. Audio and Video Encoders and Decoders + +You acknowledge and agree that it is your sole responsibility +to obtain any additional third-party licenses required to +make, have made, use, have used, sell, import, and offer for +sale your products or services that include or incorporate any +third-party software and content relating to audio and/or +video encoders and decoders from, including but not limited +to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A., +MPEG-LA, and Coding Technologies. NVIDIA does not grant to you +under this Agreement any necessary patent or other rights with +respect to any audio and/or video encoders and decoders. + + +2.5. Licensing + +If the distribution terms in this Agreement are not suitable +for your organization, or for any questions regarding this +Agreement, please contact NVIDIA at +nvidia-compute-license-questions@nvidia.com. + + +2.6. Attachment A + +The following portions of the SDK are distributable under the +Agreement: + +Component + +CUDA Runtime + +Windows + +cudart.dll, cudart_static.lib, cudadevrt.lib + +Mac OSX + +libcudart.dylib, libcudart_static.a, libcudadevrt.a + +Linux + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Android + +libcudart.so, libcudart_static.a, libcudadevrt.a + +Component + +CUDA FFT Library + +Windows + +cufft.dll, cufftw.dll, cufft.lib, cufftw.lib + +Mac OSX + +libcufft.dylib, libcufft_static.a, libcufftw.dylib, +libcufftw_static.a + +Linux + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Android + +libcufft.so, libcufft_static.a, libcufftw.so, +libcufftw_static.a + +Component + +CUDA BLAS Library + +Windows + +cublas.dll, cublasLt.dll + +Mac OSX + +libcublas.dylib, libcublasLt.dylib, libcublas_static.a, +libcublasLt_static.a + +Linux + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Android + +libcublas.so, libcublasLt.so, libcublas_static.a, +libcublasLt_static.a + +Component + +NVIDIA "Drop-in" BLAS Library + +Windows + +nvblas.dll + +Mac OSX + +libnvblas.dylib + +Linux + +libnvblas.so + +Component + +CUDA Sparse Matrix Library + +Windows + +cusparse.dll, cusparse.lib + +Mac OSX + +libcusparse.dylib, libcusparse_static.a + +Linux + +libcusparse.so, libcusparse_static.a + +Android + +libcusparse.so, libcusparse_static.a + +Component + +CUDA Linear Solver Library + +Windows + +cusolver.dll, cusolver.lib + +Mac OSX + +libcusolver.dylib, libcusolver_static.a + +Linux + +libcusolver.so, libcusolver_static.a + +Android + +libcusolver.so, libcusolver_static.a + +Component + +CUDA Random Number Generation Library + +Windows + +curand.dll, curand.lib + +Mac OSX + +libcurand.dylib, libcurand_static.a + +Linux + +libcurand.so, libcurand_static.a + +Android + +libcurand.so, libcurand_static.a + +Component + +CUDA Accelerated Graph Library + +Component + +NVIDIA Performance Primitives Library + +Windows + +nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll, +nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll, +nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib, +nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll, +nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib + +Mac OSX + +libnppc.dylib, libnppc_static.a, libnppial.dylib, +libnppial_static.a, libnppicc.dylib, libnppicc_static.a, +libnppicom.dylib, libnppicom_static.a, libnppidei.dylib, +libnppidei_static.a, libnppif.dylib, libnppif_static.a, +libnppig.dylib, libnppig_static.a, libnppim.dylib, +libnppisu_static.a, libnppitc.dylib, libnppitc_static.a, +libnpps.dylib, libnpps_static.a + +Linux + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Android + +libnppc.so, libnppc_static.a, libnppial.so, +libnppial_static.a, libnppicc.so, libnppicc_static.a, +libnppicom.so, libnppicom_static.a, libnppidei.so, +libnppidei_static.a, libnppif.so, libnppif_static.a +libnppig.so, libnppig_static.a, libnppim.so, +libnppim_static.a, libnppist.so, libnppist_static.a, +libnppisu.so, libnppisu_static.a, libnppitc.so +libnppitc_static.a, libnpps.so, libnpps_static.a + +Component + +NVIDIA JPEG Library + +Linux + +libnvjpeg.so, libnvjpeg_static.a + +Component + +Internal common library required for statically linking to +cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP + +Mac OSX + +libculibos.a + +Linux + +libculibos.a + +Component + +NVIDIA Runtime Compilation Library and Header + +All + +nvrtc.h + +Windows + +nvrtc.dll, nvrtc-builtins.dll + +Mac OSX + +libnvrtc.dylib, libnvrtc-builtins.dylib + +Linux + +libnvrtc.so, libnvrtc-builtins.so + +Component + +NVIDIA Optimizing Compiler Library + +Windows + +nvvm.dll + +Mac OSX + +libnvvm.dylib + +Linux + +libnvvm.so + +Component + +NVIDIA Common Device Math Functions Library + +Windows + +libdevice.10.bc + +Mac OSX + +libdevice.10.bc + +Linux + +libdevice.10.bc + +Component + +CUDA Occupancy Calculation Header Library + +All + +cuda_occupancy.h + +Component + +CUDA Half Precision Headers + +All + +cuda_fp16.h, cuda_fp16.hpp + +Component + +CUDA Profiling Tools Interface (CUPTI) Library + +Windows + +cupti.dll + +Mac OSX + +libcupti.dylib + +Linux + +libcupti.so + +Component + +NVIDIA Tools Extension Library + +Windows + +nvToolsExt.dll, nvToolsExt.lib + +Mac OSX + +libnvToolsExt.dylib + +Linux + +libnvToolsExt.so + +Component + +NVIDIA CUDA Driver Libraries + +Linux + +libcuda.so, libnvidia-fatbinaryloader.so, +libnvidia-ptxjitcompiler.so + +The NVIDIA CUDA Driver Libraries are only distributable in +applications that meet this criteria: + + 1. The application was developed starting from a NVIDIA CUDA + container obtained from Docker Hub or the NVIDIA GPU + Cloud, and + + 2. The resulting application is packaged as a Docker + container and distributed to users on Docker Hub or the + NVIDIA GPU Cloud only. + + +2.7. Attachment B + + +Additional Licensing Obligations + +The following third party components included in the SOFTWARE +are licensed to Licensee pursuant to the following terms and +conditions: + + 1. Licensee's use of the GDB third party component is + subject to the terms and conditions of GNU GPL v3: + + This product includes copyrighted third-party software licensed + under the terms of the GNU General Public License v3 ("GPL v3"). + All third-party software packages are copyright by their respective + authors. GPL v3 terms and conditions are hereby incorporated into + the Agreement by this reference: http://www.gnu.org/licenses/gpl.txt + + Consistent with these licensing requirements, the software + listed below is provided under the terms of the specified + open source software licenses. To obtain source code for + software provided under licenses that require + redistribution of source code, including the GNU General + Public License (GPL) and GNU Lesser General Public License + (LGPL), contact oss-requests@nvidia.com. This offer is + valid for a period of three (3) years from the date of the + distribution of this product by NVIDIA CORPORATION. + + Component License + CUDA-GDB GPL v3 + + 2. Licensee represents and warrants that any and all third + party licensing and/or royalty payment obligations in + connection with Licensee's use of the H.264 video codecs + are solely the responsibility of Licensee. + + 3. Licensee's use of the Thrust library is subject to the + terms and conditions of the Apache License Version 2.0. + All third-party software packages are copyright by their + respective authors. Apache License Version 2.0 terms and + conditions are hereby incorporated into the Agreement by + this reference. + http://www.apache.org/licenses/LICENSE-2.0.html + + In addition, Licensee acknowledges the following notice: + Thrust includes source code from the Boost Iterator, + Tuple, System, and Random Number libraries. + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 4. Licensee's use of the LLVM third party component is + subject to the following terms and conditions: + + ====================================================== + LLVM Release License + ====================================================== + University of Illinois/NCSA + Open Source License + + Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign. + All rights reserved. + + Developed by: + + LLVM Team + + University of Illinois at Urbana-Champaign + + http://llvm.org + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to + deal with the Software without restriction, including without limitation the + rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + sell copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimers. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimers in the + documentation and/or other materials provided with the distribution. + + * Neither the names of the LLVM Team, University of Illinois at Urbana- + Champaign, nor the names of its contributors may be used to endorse or + promote products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR + OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + DEALINGS WITH THE SOFTWARE. + + 5. Licensee's use (e.g. nvprof) of the PCRE third party + component is subject to the following terms and + conditions: + + ------------ + PCRE LICENCE + ------------ + PCRE is a library of functions to support regular expressions whose syntax + and semantics are as close as possible to those of the Perl 5 language. + Release 8 of PCRE is distributed under the terms of the "BSD" licence, as + specified below. The documentation for PCRE, supplied in the "doc" + directory, is distributed under the same terms as the software itself. The + basic library functions are written in C and are freestanding. Also + included in the distribution is a set of C++ wrapper functions, and a just- + in-time compiler that can be used to optimize pattern matching. These are + both optional features that can be omitted when the library is built. + + THE BASIC LIBRARY FUNCTIONS + --------------------------- + Written by: Philip Hazel + Email local part: ph10 + Email domain: cam.ac.uk + University of Cambridge Computing Service, + Cambridge, England. + Copyright (c) 1997-2012 University of Cambridge + All rights reserved. + + PCRE JUST-IN-TIME COMPILATION SUPPORT + ------------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2010-2012 Zoltan Herczeg + All rights reserved. + + STACK-LESS JUST-IN-TIME COMPILER + -------------------------------- + Written by: Zoltan Herczeg + Email local part: hzmester + Emain domain: freemail.hu + Copyright(c) 2009-2012 Zoltan Herczeg + All rights reserved. + + THE C++ WRAPPER FUNCTIONS + ------------------------- + Contributed by: Google Inc. + Copyright (c) 2007-2012, Google Inc. + All rights reserved. + + THE "BSD" LICENCE + ----------------- + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of the University of Cambridge nor the name of Google + Inc. nor the names of their contributors may be used to endorse or + promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 6. Some of the cuBLAS library routines were written by or + derived from code written by Vasily Volkov and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2007-2009, Regents of the University of California + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the University of California, Berkeley nor + the names of its contributors may be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 7. Some of the cuBLAS library routines were written by or + derived from code written by Davide Barbieri and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * The name of the author may not be used to endorse or promote + products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR + IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING + IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + POSSIBILITY OF SUCH DAMAGE. + + 8. Some of the cuBLAS library routines were derived from + code developed by the University of Tennessee and are + subject to the Modified Berkeley Software Distribution + License as follows: + + Copyright (c) 2010 The University of Tennessee. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer listed in this license in the documentation and/or + other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 9. Some of the cuBLAS library routines were written by or + derived from code written by Jonathan Hogg and are subject + to the Modified Berkeley Software Distribution License as + follows: + + Copyright (c) 2012, The Science and Technology Facilities Council (STFC). + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the STFC nor the names of its contributors + may be used to endorse or promote products derived from this + software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE + LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR + BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE + OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN + IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 10. Some of the cuBLAS library routines were written by or + derived from code written by Ahmad M. Abdelfattah, David + Keyes, and Hatem Ltaief, and are subject to the Apache + License, Version 2.0, as follows: + + -- (C) Copyright 2013 King Abdullah University of Science and Technology + Authors: + Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa) + David Keyes (david.keyes@kaust.edu.sa) + Hatem Ltaief (hatem.ltaief@kaust.edu.sa) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of the King Abdullah University of Science and + Technology nor the names of its contributors may be used to endorse + or promote products derived from this software without specific prior + written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE + + 11. Some of the cuSPARSE library routines were written by or + derived from code written by Li-Wen Chang and are subject + to the NCSA Open Source License as follows: + + Copyright (c) 2012, University of Illinois. + + All rights reserved. + + Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal with the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimers in the documentation and/or other materials provided + with the distribution. + * Neither the names of IMPACT Group, University of Illinois, nor + the names of its contributors may be used to endorse or promote + products derived from this Software without specific prior + written permission. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT + HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR + IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE + SOFTWARE. + + 12. Some of the cuRAND library routines were written by or + derived from code written by Mutsuo Saito and Makoto + Matsumoto and are subject to the following license: + + Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima + University. All rights reserved. + + Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima + University and University of Tokyo. All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 13. Some of the cuRAND library routines were derived from + code developed by D. E. Shaw Research and are subject to + the following license: + + Copyright 2010-2011, D. E. Shaw Research. + + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions, and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + * Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 14. Some of the Math library routines were written by or + derived from code developed by Norbert Juffa and are + subject to the following license: + + Copyright (c) 2015-2017, Norbert Juffa + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 15. Licensee's use of the lz4 third party component is + subject to the following terms and conditions: + + Copyright (C) 2011-2013, Yann Collet. + BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following disclaimer + in the documentation and/or other materials provided with the + distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + 16. The NPP library uses code from the Boost Math Toolkit, + and is subject to the following license: + + Boost Software License - Version 1.0 - August 17th, 2003 + . . . . + + Permission is hereby granted, free of charge, to any person or + organization obtaining a copy of the software and accompanying + documentation covered by this license (the "Software") to use, + reproduce, display, distribute, execute, and transmit the Software, + and to prepare derivative works of the Software, and to permit + third-parties to whom the Software is furnished to do so, all + subject to the following: + + The copyright notices in the Software and this entire statement, + including the above license grant, this restriction and the following + disclaimer, must be included in all copies of the Software, in whole + or in part, and all derivative works of the Software, unless such + copies or derivative works are solely in the form of machine-executable + object code generated by a source language processor. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND + NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR + ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR + OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING + FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + OTHER DEALINGS IN THE SOFTWARE. + + 17. Portions of the Nsight Eclipse Edition is subject to the + following license: + + The Eclipse Foundation makes available all content in this plug-in + ("Content"). Unless otherwise indicated below, the Content is provided + to you under the terms and conditions of the Eclipse Public License + Version 1.0 ("EPL"). A copy of the EPL is available at http:// + www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program" + will mean the Content. + + If you did not receive this Content directly from the Eclipse + Foundation, the Content is being redistributed by another party + ("Redistributor") and different terms and conditions may apply to your + use of any object code in the Content. Check the Redistributor's + license that was provided with the Content. If no such license exists, + contact the Redistributor. Unless otherwise indicated below, the terms + and conditions of the EPL still apply to any source code in the + Content and such source code may be obtained at http://www.eclipse.org. + + 18. Some of the cuBLAS library routines uses code from + OpenAI, which is subject to the following license: + + License URL + https://github.com/openai/openai-gemm/blob/master/LICENSE + + License Text + The MIT License + + Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in + all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + THE SOFTWARE. + + 19. Licensee's use of the Visual Studio Setup Configuration + Samples is subject to the following license: + + The MIT License (MIT) + Copyright (C) Microsoft Corporation. All rights reserved. + + Permission is hereby granted, free of charge, to any person + obtaining a copy of this software and associated documentation + files (the "Software"), to deal in the Software without restriction, + including without limitation the rights to use, copy, modify, merge, + publish, distribute, sublicense, and/or sell copies of the Software, + and to permit persons to whom the Software is furnished to do so, + subject to the following conditions: + + The above copyright notice and this permission notice shall be included + in all copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + 20. Licensee's use of linmath.h header for CPU functions for + GL vector/matrix operations from lunarG is subject to the + Apache License Version 2.0. + + 21. The DX12-CUDA sample uses the d3dx12.h header, which is + subject to the MIT license . + +----------------- diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..7617880958e7c1f9e79db788e5ab17cad840dca2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/METADATA @@ -0,0 +1,44 @@ +Metadata-Version: 2.2 +Name: nvidia-curand-cu12 +Version: 10.3.9.90 +Summary: CURAND native runtime libraries +Home-page: https://developer.nvidia.com/cuda-zone +Author: Nvidia CUDA Installer Team +Author-email: compute_installer@nvidia.com +License: NVIDIA Proprietary Software +Keywords: cuda,nvidia,runtime,machine learning,deep learning +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: Science/Research +Classifier: License :: Other/Proprietary License +Classifier: Natural Language :: English +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Topic :: Scientific/Engineering +Classifier: Topic :: Scientific/Engineering :: Mathematics +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Topic :: Software Development +Classifier: Topic :: Software Development :: Libraries +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX :: Linux +Requires-Python: >=3 +License-File: License.txt +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: home-page +Dynamic: keywords +Dynamic: license +Dynamic: requires-python +Dynamic: summary + +CURAND native runtime libraries diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..a67e810e231244231dc17956cfc4bf36432c8ab9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/RECORD @@ -0,0 +1,32 @@ +nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/__pycache__/__init__.cpython-312.pyc,, +nvidia/curand/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/curand/__pycache__/__init__.cpython-312.pyc,, +nvidia/curand/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/curand/include/__pycache__/__init__.cpython-312.pyc,, +nvidia/curand/include/curand.h,sha256=strQ9idlRTQoBJy_hAbAT4pgkW6BKYg8p_nUjbb8BVw,44075 +nvidia/curand/include/curand_discrete.h,sha256=2qD3BkI622XEu0444wVP7HeYkKAx0Rjr2HDhqU4SA7E,3486 +nvidia/curand/include/curand_discrete2.h,sha256=ZrQTO5R9x83AMX88uq7M8M94DLSC5VEz0PAkfcwtQeg,10883 +nvidia/curand/include/curand_globals.h,sha256=bES1Kx0NrATXk1DReMMkqWrB062nOnaAp39y22wViXU,3717 +nvidia/curand/include/curand_kernel.h,sha256=SjfAeh13ybXIxiekcgczzua02kIAqETopJKRhYvCat8,53133 +nvidia/curand/include/curand_lognormal.h,sha256=-X-iNkJSzWpAYYjogm689EJTZfzore9sxU7ObddljLk,28142 +nvidia/curand/include/curand_mrg32k3a.h,sha256=ZVVREjGNsJQJ-3IzZZ_LKGtGteslicb8E0Aly49BKPs,170296 +nvidia/curand/include/curand_mtgp32.h,sha256=Qhrmx0pHWF-P2Uu5bKwYE9ymEWq3c7qBzCITVMaKMfI,7845 +nvidia/curand/include/curand_mtgp32_host.h,sha256=SXqzmSQkzTLSRJ4pojTg_TNCC3T-G89HdBK-boSDqr4,18274 +nvidia/curand/include/curand_mtgp32_kernel.h,sha256=ajZnXr5ZXnQExElf6LPpigrrKPTmMIZbRyTEnJ-BDhw,13731 +nvidia/curand/include/curand_mtgp32dc_p_11213.h,sha256=7_gGYUH47UugIAEt60vYH5nFa-QUwTpDwSEgLg9cZts,276889 +nvidia/curand/include/curand_normal.h,sha256=lnmYVk2fn0oEVWOytdKhXrHL36GLCjMnB8OnZeCaYcA,26953 +nvidia/curand/include/curand_normal_static.h,sha256=5K4iTC9AuSWCe1LVxuj_0y3BVjtp0bxO6hndv2rbmiw,4727 +nvidia/curand/include/curand_philox4x32_x.h,sha256=T21IP-Rdg3_tSVU9Je4dLKuwEqE4ovfwi7r1hOY92Dw,7166 +nvidia/curand/include/curand_poisson.h,sha256=KrhXOmO_D7aclnj8geIyHqdpSQwWHurS9V_pVtgzodM,25461 +nvidia/curand/include/curand_precalc.h,sha256=I6NZdgT42fMm9qSCtP-rlOAqt4Zsqgal0ajktcPmEak,1392393 +nvidia/curand/include/curand_uniform.h,sha256=gpmRgQu5r6ppgLTg60NXoDdVJS6wMUy6jC5bh8l04e8,17472 +nvidia/curand/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +nvidia/curand/lib/__pycache__/__init__.cpython-312.pyc,, +nvidia/curand/lib/libcurand.so.10,sha256=-b6gOKJwO3IVcf1FopmomBQf2MsmSlkSY1yVEW9ZYP4,136749240 +nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +nvidia_curand_cu12-10.3.9.90.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262 +nvidia_curand_cu12-10.3.9.90.dist-info/METADATA,sha256=fU3xSITD3i7JIsVG2ZXO5i-aDlIls-ry2JUVICEsv28,1684 +nvidia_curand_cu12-10.3.9.90.dist-info/RECORD,, +nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL,sha256=VtFLEVB-VX8niQT4kQ5pcQOOqiKvUvqfZe5V14HmU88,109 +nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..476a64f798fcad2101388098d22bc98258e64990 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.8.0) +Root-Is-Purelib: true +Tag: py3-none-manylinux_2_27_x86_64 + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..862f7abf232cdfbb928609856247292e81c9decb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt @@ -0,0 +1 @@ +nvidia diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..f1b3d6243d800c28469efad4bb452e40c76e6703 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/__init__.py @@ -0,0 +1,81 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +from .__version__ import __author__, __copyright__, __email__, __license__, __version__ +from ._base import AbstractSanitizer, AbstractValidator +from ._common import ( + ascii_symbols, + normalize_platform, + replace_ansi_escape, + replace_unprintable_char, + unprintable_ascii_chars, + validate_pathtype, + validate_unprintable_char, +) +from ._const import Platform +from ._filename import ( + FileNameSanitizer, + FileNameValidator, + is_valid_filename, + sanitize_filename, + validate_filename, +) +from ._filepath import ( + FilePathSanitizer, + FilePathValidator, + is_valid_filepath, + sanitize_filepath, + validate_filepath, +) +from ._ltsv import sanitize_ltsv_label, validate_ltsv_label +from ._symbol import replace_symbol, validate_symbol +from .error import ( + ErrorReason, + InvalidCharError, + InvalidReservedNameError, + NullNameError, + ReservedNameError, + ValidationError, + ValidReservedNameError, +) + + +__all__ = ( + "__author__", + "__copyright__", + "__email__", + "__license__", + "__version__", + "AbstractSanitizer", + "AbstractValidator", + "Platform", + "ascii_symbols", + "normalize_platform", + "replace_ansi_escape", + "replace_unprintable_char", + "unprintable_ascii_chars", + "validate_pathtype", + "validate_unprintable_char", + "FileNameSanitizer", + "FileNameValidator", + "is_valid_filename", + "sanitize_filename", + "validate_filename", + "FilePathSanitizer", + "FilePathValidator", + "is_valid_filepath", + "sanitize_filepath", + "validate_filepath", + "sanitize_ltsv_label", + "validate_ltsv_label", + "replace_symbol", + "validate_symbol", + "ErrorReason", + "InvalidCharError", + "InvalidReservedNameError", + "NullNameError", + "ReservedNameError", + "ValidationError", + "ValidReservedNameError", +) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/__version__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/__version__.py new file mode 100644 index 0000000000000000000000000000000000000000..f6e32a6ccba9ee9e711229bf8af8026a84d4651c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/__version__.py @@ -0,0 +1,9 @@ +from typing import Final + + +__author__: Final = "Tsuyoshi Hombashi" +__copyright__: Final = f"Copyright 2016-2025, {__author__}" +__license__: Final = "MIT License" +__version__ = "3.3.1" +__maintainer__: Final = __author__ +__email__: Final = "tsuyoshi.hombashi@gmail.com" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_base.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_base.py new file mode 100644 index 0000000000000000000000000000000000000000..c74e842ee63ec9b744ffc9c9b6bc54906fc86a9f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_base.py @@ -0,0 +1,252 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import abc +import os +import re +import sys +from collections.abc import Sequence +from typing import Final, Optional + +from ._common import normalize_platform, unprintable_ascii_chars +from ._const import DEFAULT_MIN_LEN, Platform +from ._types import PathType, PlatformType +from .error import ReservedNameError, ValidationError +from .handler import NullValueHandler, ReservedNameHandler, ValidationErrorHandler + + +class BaseFile: + _INVALID_PATH_CHARS: Final[str] = "".join(unprintable_ascii_chars) + _INVALID_FILENAME_CHARS: Final[str] = _INVALID_PATH_CHARS + "/" + _INVALID_WIN_PATH_CHARS: Final[str] = _INVALID_PATH_CHARS + ':*?"<>|\t\n\r\x0b\x0c' + _INVALID_WIN_FILENAME_CHARS: Final[str] = ( + _INVALID_FILENAME_CHARS + _INVALID_WIN_PATH_CHARS + "\\" + ) + + @property + def platform(self) -> Platform: + return self.__platform + + @property + def reserved_keywords(self) -> tuple[str, ...]: + return self._additional_reserved_names + + @property + def max_len(self) -> int: + return self._max_len + + def __init__( + self, + max_len: int, + fs_encoding: Optional[str], + additional_reserved_names: Optional[Sequence[str]] = None, + platform_max_len: Optional[int] = None, + platform: Optional[PlatformType] = None, + ) -> None: + if additional_reserved_names is None: + additional_reserved_names = tuple() + self._additional_reserved_names = tuple(n.upper() for n in additional_reserved_names) + + self.__platform = normalize_platform(platform) + + if platform_max_len is None: + platform_max_len = self._get_default_max_path_len() + + if max_len <= 0: + self._max_len = platform_max_len + else: + self._max_len = max_len + + self._max_len = min(self._max_len, platform_max_len) + + if fs_encoding: + self._fs_encoding = fs_encoding + else: + self._fs_encoding = sys.getfilesystemencoding() + + def _is_posix(self) -> bool: + return self.platform == Platform.POSIX + + def _is_universal(self) -> bool: + return self.platform == Platform.UNIVERSAL + + def _is_linux(self, include_universal: bool = False) -> bool: + if include_universal: + return self.platform in (Platform.UNIVERSAL, Platform.LINUX) + + return self.platform == Platform.LINUX + + def _is_windows(self, include_universal: bool = False) -> bool: + if include_universal: + return self.platform in (Platform.UNIVERSAL, Platform.WINDOWS) + + return self.platform == Platform.WINDOWS + + def _is_macos(self, include_universal: bool = False) -> bool: + if include_universal: + return self.platform in (Platform.UNIVERSAL, Platform.MACOS) + + return self.platform == Platform.MACOS + + def _get_default_max_path_len(self) -> int: + if self._is_linux(): + return 4096 + + if self._is_windows(): + return 260 + + if self._is_posix() or self._is_macos(): + return 1024 + + return 260 # universal + + +class AbstractValidator(BaseFile, metaclass=abc.ABCMeta): + def __init__( + self, + max_len: int, + fs_encoding: Optional[str], + check_reserved: bool, + additional_reserved_names: Optional[Sequence[str]] = None, + platform_max_len: Optional[int] = None, + platform: Optional[PlatformType] = None, + ) -> None: + self._check_reserved = check_reserved + + super().__init__( + max_len, + fs_encoding, + additional_reserved_names=additional_reserved_names, + platform_max_len=platform_max_len, + platform=platform, + ) + + @property + @abc.abstractmethod + def min_len(self) -> int: # pragma: no cover + pass + + @abc.abstractmethod + def validate(self, value: PathType) -> None: # pragma: no cover + pass + + def is_valid(self, value: PathType) -> bool: + try: + self.validate(value) + except (TypeError, ValidationError): + return False + + return True + + def _is_reserved_keyword(self, value: str) -> bool: + return value.upper() in self.reserved_keywords + + +class AbstractSanitizer(BaseFile, metaclass=abc.ABCMeta): + def __init__( + self, + validator: AbstractValidator, + max_len: int, + fs_encoding: Optional[str], + validate_after_sanitize: bool, + null_value_handler: Optional[ValidationErrorHandler] = None, + reserved_name_handler: Optional[ValidationErrorHandler] = None, + additional_reserved_names: Optional[Sequence[str]] = None, + platform_max_len: Optional[int] = None, + platform: Optional[PlatformType] = None, + ) -> None: + super().__init__( + max_len=max_len, + fs_encoding=fs_encoding, + additional_reserved_names=additional_reserved_names, + platform_max_len=platform_max_len, + platform=platform, + ) + + if null_value_handler is None: + null_value_handler = NullValueHandler.return_null_string + self._null_value_handler = null_value_handler + + if reserved_name_handler is None: + reserved_name_handler = ReservedNameHandler.add_trailing_underscore + self._reserved_name_handler = reserved_name_handler + + self._validate_after_sanitize = validate_after_sanitize + + self._validator = validator + + @abc.abstractmethod + def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: # pragma: no cover + pass + + +class BaseValidator(AbstractValidator): + __RE_ROOT_NAME: Final = re.compile(r"([^\.]+)") + __RE_REPEAD_DOT: Final = re.compile(r"^\.{3,}") + + @property + def min_len(self) -> int: + return self._min_len + + def __init__( + self, + min_len: int, + max_len: int, + fs_encoding: Optional[str], + check_reserved: bool, + additional_reserved_names: Optional[Sequence[str]] = None, + platform_max_len: Optional[int] = None, + platform: Optional[PlatformType] = None, + ) -> None: + if min_len <= 0: + min_len = DEFAULT_MIN_LEN + self._min_len = max(min_len, 1) + + super().__init__( + max_len=max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + platform_max_len=platform_max_len, + platform=platform, + ) + + self._validate_max_len() + + def _validate_reserved_keywords(self, name: str) -> None: + if not self._check_reserved: + return + + root_name = self.__extract_root_name(name) + base_name = os.path.basename(name) + + for name in (root_name, base_name): + if self._is_reserved_keyword(name): + raise ReservedNameError( + f"'{root_name}' is a reserved name", + reusable_name=False, + reserved_name=root_name, + platform=self.platform, + ) + + def _validate_max_len(self) -> None: + if self.max_len < 1: + raise ValueError("max_len must be greater or equal to one") + + if self.min_len > self.max_len: + raise ValueError("min_len must be lower than max_len") + + @classmethod + def __extract_root_name(cls, path: str) -> str: + if path in (".", ".."): + return path + + if cls.__RE_REPEAD_DOT.search(path): + return path + + match = cls.__RE_ROOT_NAME.match(os.path.basename(path)) + if match is None: + return "" + + return match.group(1) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_common.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_common.py new file mode 100644 index 0000000000000000000000000000000000000000..440c81ebbade0d1daa2f0bd72dd7dac0b10ea823 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_common.py @@ -0,0 +1,163 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import ntpath +import platform +import re +import string +import sys +from pathlib import PurePath +from typing import Any, Final, Optional + +from ._const import Platform +from ._types import PathType, PlatformType + + +_re_whitespaces: Final = re.compile(r"^[\s]+$") + + +def validate_pathtype( + text: PathType, allow_whitespaces: bool = False, error_msg: Optional[str] = None +) -> None: + from .error import ErrorReason, ValidationError + + if _is_not_null_string(text) or isinstance(text, PurePath): + return + + if allow_whitespaces and _re_whitespaces.search(str(text)): + return + + if is_null_string(text): + raise ValidationError(reason=ErrorReason.NULL_NAME) + + raise TypeError(f"text must be a string: actual={type(text)}") + + +def to_str(name: PathType) -> str: + if isinstance(name, PurePath): + return str(name) + + return name + + +def is_nt_abspath(value: str) -> bool: + ver_info = sys.version_info[:2] + if ver_info <= (3, 10): + if value.startswith("\\\\"): + return True + elif ver_info >= (3, 13): + return ntpath.isabs(value) + + drive, _tail = ntpath.splitdrive(value) + + return ntpath.isabs(value) and len(drive) > 0 + + +def is_null_string(value: Any) -> bool: + if value is None: + return True + + try: + return len(value.strip()) == 0 + except AttributeError: + return False + + +def _is_not_null_string(value: Any) -> bool: + try: + return len(value.strip()) > 0 + except AttributeError: + return False + + +def _get_unprintable_ascii_chars() -> list[str]: + return [chr(c) for c in range(128) if chr(c) not in string.printable] + + +unprintable_ascii_chars: Final = tuple(_get_unprintable_ascii_chars()) + + +def _get_ascii_symbols() -> list[str]: + symbol_list: list[str] = [] + + for i in range(128): + c = chr(i) + + if c in unprintable_ascii_chars or c in string.digits + string.ascii_letters: + continue + + symbol_list.append(c) + + return symbol_list + + +ascii_symbols: Final = tuple(_get_ascii_symbols()) + +__RE_UNPRINTABLE_CHARS: Final = re.compile( + "[{}]".format(re.escape("".join(unprintable_ascii_chars))), re.UNICODE +) +__RE_ANSI_ESCAPE: Final = re.compile( + r"(?:\x1B[@-Z\\-_]|[\x80-\x9A\x9C-\x9F]|(?:\x1B\[|\x9B)[0-?]*[ -/]*[@-~])" +) + + +def validate_unprintable_char(text: str) -> None: + from .error import InvalidCharError + + match_list = __RE_UNPRINTABLE_CHARS.findall(to_str(text)) + if match_list: + raise InvalidCharError(f"unprintable character found: {match_list}") + + +def replace_unprintable_char(text: str, replacement_text: str = "") -> str: + try: + return __RE_UNPRINTABLE_CHARS.sub(replacement_text, text) + except (TypeError, AttributeError): + raise TypeError("text must be a string") + + +def replace_ansi_escape(text: str, replacement_text: str = "") -> str: + try: + return __RE_ANSI_ESCAPE.sub(replacement_text, text) + except (TypeError, AttributeError): + raise TypeError("text must be a string") + + +def normalize_platform(name: Optional[PlatformType]) -> Platform: + if isinstance(name, Platform): + return name + + if not name: + return Platform.UNIVERSAL + + platform_str = name.strip().casefold() + + if platform_str == "posix": + return Platform.POSIX + + if platform_str == "auto": + platform_str = platform.system().casefold() + + if platform_str in ["linux"]: + return Platform.LINUX + + if platform_str and platform_str.startswith("win"): + return Platform.WINDOWS + + if platform_str in ["mac", "macos", "darwin"]: + return Platform.MACOS + + return Platform.UNIVERSAL + + +def findall_to_str(match: list[Any]) -> str: + uniq_list = {repr(text) for text in match} + return ", ".join(uniq_list) + + +def truncate_str(text: str, encoding: str, max_bytes: int) -> str: + str_bytes = text.encode(encoding) + str_bytes = str_bytes[:max_bytes] + # last char might be malformed, ignore it + return str_bytes.decode(encoding, "ignore") diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_const.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_const.py new file mode 100644 index 0000000000000000000000000000000000000000..4e8518ba9d9eead2401fcb1c8ecb3ab1d283bc8c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_const.py @@ -0,0 +1,41 @@ +import enum +from typing import Final + + +DEFAULT_MIN_LEN: Final = 1 +INVALID_CHAR_ERR_MSG_TMPL: Final = "invalids=({invalid})" + + +_NTFS_RESERVED_FILE_NAMES: Final = ( + "$Mft", + "$MftMirr", + "$LogFile", + "$Volume", + "$AttrDef", + "$Bitmap", + "$Boot", + "$BadClus", + "$Secure", + "$Upcase", + "$Extend", + "$Quota", + "$ObjId", + "$Reparse", +) # Only in root directory + + +@enum.unique +class Platform(enum.Enum): + """ + Platform specifier enumeration. + """ + + #: POSIX compatible platform. + POSIX = "POSIX" + + #: platform independent. note that absolute paths cannot specify this. + UNIVERSAL = "universal" + + LINUX = "Linux" + WINDOWS = "Windows" + MACOS = "macOS" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_filename.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_filename.py new file mode 100644 index 0000000000000000000000000000000000000000..f2651eddb8d97c2a2b78dee5103acca88d616d33 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_filename.py @@ -0,0 +1,478 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import itertools +import posixpath +import re +import warnings +from collections.abc import Sequence +from pathlib import Path, PurePath +from re import Pattern +from typing import Final, Optional + +from ._base import AbstractSanitizer, AbstractValidator, BaseFile, BaseValidator +from ._common import findall_to_str, is_nt_abspath, to_str, truncate_str, validate_pathtype +from ._const import DEFAULT_MIN_LEN, INVALID_CHAR_ERR_MSG_TMPL, Platform +from ._types import PathType, PlatformType +from .error import ErrorAttrKey, ErrorReason, InvalidCharError, ValidationError +from .handler import ReservedNameHandler, ValidationErrorHandler + + +_DEFAULT_MAX_FILENAME_LEN: Final = 255 +_RE_INVALID_FILENAME: Final = re.compile( + f"[{re.escape(BaseFile._INVALID_FILENAME_CHARS):s}]", re.UNICODE +) +_RE_INVALID_WIN_FILENAME: Final = re.compile( + f"[{re.escape(BaseFile._INVALID_WIN_FILENAME_CHARS):s}]", re.UNICODE +) + + +class FileNameSanitizer(AbstractSanitizer): + def __init__( + self, + max_len: int = _DEFAULT_MAX_FILENAME_LEN, + fs_encoding: Optional[str] = None, + platform: Optional[PlatformType] = None, + null_value_handler: Optional[ValidationErrorHandler] = None, + reserved_name_handler: Optional[ValidationErrorHandler] = None, + additional_reserved_names: Optional[Sequence[str]] = None, + validate_after_sanitize: bool = False, + validator: Optional[AbstractValidator] = None, + ) -> None: + if validator: + fname_validator = validator + else: + fname_validator = FileNameValidator( + min_len=DEFAULT_MIN_LEN, + max_len=max_len, + fs_encoding=fs_encoding, + check_reserved=True, + additional_reserved_names=additional_reserved_names, + platform=platform, + ) + + super().__init__( + max_len=max_len, + fs_encoding=fs_encoding, + null_value_handler=null_value_handler, + reserved_name_handler=reserved_name_handler, + additional_reserved_names=additional_reserved_names, + platform=platform, + validate_after_sanitize=validate_after_sanitize, + validator=fname_validator, + ) + + self._sanitize_regexp = self._get_sanitize_regexp() + + def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: + try: + validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True)) + except ValidationError as e: + if e.reason == ErrorReason.NULL_NAME: + if isinstance(value, PurePath): + raise + + return self._null_value_handler(e) # type: ignore + raise + + sanitized_filename = self._sanitize_regexp.sub(replacement_text, str(value)) + sanitized_filename = truncate_str(sanitized_filename, self._fs_encoding, self.max_len) + + try: + self._validator.validate(sanitized_filename) + except ValidationError as e: + if e.reason == ErrorReason.RESERVED_NAME: + replacement_word = self._reserved_name_handler(e) + if e.reserved_name != replacement_word: + sanitized_filename = re.sub( + re.escape(e.reserved_name), replacement_word, sanitized_filename + ) + elif e.reason == ErrorReason.INVALID_CHARACTER and self._is_windows( + include_universal=True + ): + # Do not start a file or directory name with a space + sanitized_filename = sanitized_filename.lstrip(" ") + + # Do not end a file or directory name with a space or a period + sanitized_filename = sanitized_filename.rstrip(" ") + if sanitized_filename not in (".", ".."): + sanitized_filename = sanitized_filename.rstrip(" .") + elif e.reason == ErrorReason.NULL_NAME: + sanitized_filename = self._null_value_handler(e) + + if self._validate_after_sanitize: + try: + self._validator.validate(sanitized_filename) + except ValidationError as e: + raise ValidationError( + description=str(e), + reason=ErrorReason.INVALID_AFTER_SANITIZE, + platform=self.platform, + ) + + if isinstance(value, PurePath): + return Path(sanitized_filename) # type: ignore + + return sanitized_filename # type: ignore + + def _get_sanitize_regexp(self) -> Pattern[str]: + if self._is_windows(include_universal=True): + return _RE_INVALID_WIN_FILENAME + + return _RE_INVALID_FILENAME + + +class FileNameValidator(BaseValidator): + _WINDOWS_RESERVED_FILE_NAMES: Final = ( + ("CON", "PRN", "AUX", "CLOCK$", "NUL") + + tuple(f"{name:s}{num:d}" for name, num in itertools.product(("COM", "LPT"), range(0, 10))) + + tuple( + f"{name:s}{ssd:s}" + for name, ssd in itertools.product( + ("COM", "LPT"), + ("\N{SUPERSCRIPT ONE}", "\N{SUPERSCRIPT TWO}", "\N{SUPERSCRIPT THREE}"), + ) + ) + ) + _MACOS_RESERVED_FILE_NAMES: Final = (":",) + + @property + def reserved_keywords(self) -> tuple[str, ...]: + common_keywords = super().reserved_keywords + + if self._is_universal(): + word_set = set( + common_keywords + + self._WINDOWS_RESERVED_FILE_NAMES + + self._MACOS_RESERVED_FILE_NAMES + ) + elif self._is_windows(): + word_set = set(common_keywords + self._WINDOWS_RESERVED_FILE_NAMES) + elif self._is_posix() or self._is_macos(): + word_set = set(common_keywords + self._MACOS_RESERVED_FILE_NAMES) + else: + word_set = set(common_keywords) + + return tuple(sorted(word_set)) + + def __init__( + self, + min_len: int = DEFAULT_MIN_LEN, + max_len: int = _DEFAULT_MAX_FILENAME_LEN, + fs_encoding: Optional[str] = None, + platform: Optional[PlatformType] = None, + check_reserved: bool = True, + additional_reserved_names: Optional[Sequence[str]] = None, + ) -> None: + super().__init__( + min_len=min_len, + max_len=max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + platform=platform, + ) + + def validate(self, value: PathType) -> None: + validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True)) + + unicode_filename = to_str(value) + byte_ct = len(unicode_filename.encode(self._fs_encoding)) + + self.validate_abspath(unicode_filename) + + err_kwargs = { + ErrorAttrKey.REASON: ErrorReason.INVALID_LENGTH, + ErrorAttrKey.PLATFORM: self.platform, + ErrorAttrKey.FS_ENCODING: self._fs_encoding, + ErrorAttrKey.BYTE_COUNT: byte_ct, + ErrorAttrKey.VALUE: unicode_filename, + } + if byte_ct > self.max_len: + raise ValidationError( + [ + f"filename is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes" + ], + **err_kwargs, + ) + if byte_ct < self.min_len: + raise ValidationError( + [ + f"filename is too short: expected>={self.min_len:d} bytes, actual={byte_ct:d} bytes" + ], + **err_kwargs, + ) + + self._validate_reserved_keywords(unicode_filename) + self.__validate_universal_filename(unicode_filename) + + if self._is_windows(include_universal=True): + self.__validate_win_filename(unicode_filename) + + def validate_abspath(self, value: str) -> None: + err = ValidationError( + description=f"found an absolute path ({value!r}), expected a filename", + platform=self.platform, + reason=ErrorReason.FOUND_ABS_PATH, + ) + + if self._is_windows(include_universal=True): + if is_nt_abspath(value): + raise err + + if posixpath.isabs(value): + raise err + + def __validate_universal_filename(self, unicode_filename: str) -> None: + match = _RE_INVALID_FILENAME.findall(unicode_filename) + if match: + raise InvalidCharError( + INVALID_CHAR_ERR_MSG_TMPL.format( + invalid=findall_to_str(match), + ), + platform=Platform.UNIVERSAL, + value=unicode_filename, + ) + + def __validate_win_filename(self, unicode_filename: str) -> None: + match = _RE_INVALID_WIN_FILENAME.findall(unicode_filename) + if match: + raise InvalidCharError( + INVALID_CHAR_ERR_MSG_TMPL.format( + invalid=findall_to_str(match), + ), + platform=Platform.WINDOWS, + value=unicode_filename, + ) + + if unicode_filename in (".", ".."): + return + + KB2829981_err_tmpl = "{}. Refer: https://learn.microsoft.com/en-us/troubleshoot/windows-client/shell-experience/file-folder-name-whitespace-characters" # noqa: E501 + err_kwargs = { + ErrorAttrKey.PLATFORM: Platform.WINDOWS, + ErrorAttrKey.VALUE: unicode_filename, + } + + if unicode_filename[-1] in (" ", "."): + raise InvalidCharError( + INVALID_CHAR_ERR_MSG_TMPL.format(invalid=re.escape(unicode_filename[-1])), + description=KB2829981_err_tmpl.format( + "Do not end a file or directory name with a space or a period" + ), + **err_kwargs, + ) + + if unicode_filename[0] in (" "): + raise InvalidCharError( + INVALID_CHAR_ERR_MSG_TMPL.format(invalid=re.escape(unicode_filename[0])), + description=KB2829981_err_tmpl.format( + "Do not start a file or directory name with a space" + ), + **err_kwargs, + ) + + +def validate_filename( + filename: PathType, + platform: Optional[PlatformType] = None, + min_len: int = DEFAULT_MIN_LEN, + max_len: int = _DEFAULT_MAX_FILENAME_LEN, + fs_encoding: Optional[str] = None, + check_reserved: bool = True, + additional_reserved_names: Optional[Sequence[str]] = None, +) -> None: + """Verifying whether the ``filename`` is a valid file name or not. + + Args: + filename: + Filename to validate. + platform: + Target platform name of the filename. + + .. include:: platform.txt + min_len: + Minimum byte length of the ``filename``. The value must be greater or equal to one. + Defaults to ``1``. + max_len: + Maximum byte length of the ``filename``. The value must be lower than: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 + + Defaults to ``255``. + fs_encoding: + Filesystem encoding that is used to calculate the byte length of the filename. + If |None|, get the encoding from the execution environment. + check_reserved: + If |True|, check the reserved names of the ``platform``. + additional_reserved_names: + Additional reserved names to check. + Case insensitive. + + Raises: + ValidationError (ErrorReason.INVALID_LENGTH): + If the ``filename`` is longer than ``max_len`` characters. + ValidationError (ErrorReason.INVALID_CHARACTER): + If the ``filename`` includes invalid character(s) for a filename: + |invalid_filename_chars|. + The following characters are also invalid for Windows platforms: + |invalid_win_filename_chars|. + ValidationError (ErrorReason.RESERVED_NAME): + If the ``filename`` equals the reserved name by OS. + Windows reserved name is as follows: + ``"CON"``, ``"PRN"``, ``"AUX"``, ``"NUL"``, ``"COM[1-9]"``, ``"LPT[1-9]"``. + + Example: + :ref:`example-validate-filename` + + See Also: + `Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs + `__ + """ + + FileNameValidator( + platform=platform, + min_len=min_len, + max_len=max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + ).validate(filename) + + +def is_valid_filename( + filename: PathType, + platform: Optional[PlatformType] = None, + min_len: int = DEFAULT_MIN_LEN, + max_len: Optional[int] = None, + fs_encoding: Optional[str] = None, + check_reserved: bool = True, + additional_reserved_names: Optional[Sequence[str]] = None, +) -> bool: + """Check whether the ``filename`` is a valid name or not. + + Args: + filename: + A filename to be checked. + platform: + Target platform name of the filename. + + Example: + :ref:`example-is-valid-filename` + + See Also: + :py:func:`.validate_filename()` + """ + + return FileNameValidator( + platform=platform, + min_len=min_len, + max_len=-1 if max_len is None else max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + ).is_valid(filename) + + +def sanitize_filename( + filename: PathType, + replacement_text: str = "", + platform: Optional[PlatformType] = None, + max_len: Optional[int] = _DEFAULT_MAX_FILENAME_LEN, + fs_encoding: Optional[str] = None, + check_reserved: Optional[bool] = None, + null_value_handler: Optional[ValidationErrorHandler] = None, + reserved_name_handler: Optional[ValidationErrorHandler] = None, + additional_reserved_names: Optional[Sequence[str]] = None, + validate_after_sanitize: bool = False, +) -> PathType: + """Make a valid filename from a string. + + To make a valid filename, the function does the following: + + - Replace invalid characters as file names included in the ``filename`` + with the ``replacement_text``. Invalid characters are: + + - unprintable characters + - |invalid_filename_chars| + - for Windows (or universal) only: |invalid_win_filename_chars| + + - Replace a value if a sanitized value is a reserved name by operating systems + with a specified handler by ``reserved_name_handler``. + + Args: + filename: Filename to sanitize. + replacement_text: + Replacement text for invalid characters. Defaults to ``""``. + platform: + Target platform name of the filename. + + .. include:: platform.txt + max_len: + Maximum byte length of the ``filename``. + Truncate the name length if the ``filename`` length exceeds this value. + Defaults to ``255``. + fs_encoding: + Filesystem encoding that is used to calculate the byte length of the filename. + If |None|, get the encoding from the execution environment. + check_reserved: + [Deprecated] Use 'reserved_name_handler' instead. + null_value_handler: + Function called when a value after sanitization is an empty string. + You can specify predefined handlers: + + - :py:func:`~.handler.NullValueHandler.return_null_string` + - :py:func:`~.handler.NullValueHandler.return_timestamp` + - :py:func:`~.handler.raise_error` + + Defaults to :py:func:`.handler.NullValueHandler.return_null_string` that just return ``""``. + reserved_name_handler: + Function called when a value after sanitization is a reserved name. + You can specify predefined handlers: + + - :py:meth:`~.handler.ReservedNameHandler.add_leading_underscore` + - :py:meth:`~.handler.ReservedNameHandler.add_trailing_underscore` + - :py:meth:`~.handler.ReservedNameHandler.as_is` + - :py:func:`~.handler.raise_error` + + Defaults to :py:func:`.handler.add_trailing_underscore`. + additional_reserved_names: + Additional reserved names to sanitize. + Case insensitive. + validate_after_sanitize: + Execute validation after sanitization to the file name. + + Returns: + Same type as the ``filename`` (str or PathLike object): + Sanitized filename. + + Raises: + ValueError: + If the ``filename`` is an invalid filename. + + Example: + :ref:`example-sanitize-filename` + """ + + if check_reserved is not None: + warnings.warn( + "'check_reserved' is deprecated. Use 'reserved_name_handler' instead.", + DeprecationWarning, + ) + + if check_reserved is False: + reserved_name_handler = ReservedNameHandler.as_is + + return FileNameSanitizer( + platform=platform, + max_len=-1 if max_len is None else max_len, + fs_encoding=fs_encoding, + null_value_handler=null_value_handler, + reserved_name_handler=reserved_name_handler, + additional_reserved_names=additional_reserved_names, + validate_after_sanitize=validate_after_sanitize, + ).sanitize(filename, replacement_text) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_filepath.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_filepath.py new file mode 100644 index 0000000000000000000000000000000000000000..df49ed437b13c2aff27ea72978bea4ccaee0c729 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_filepath.py @@ -0,0 +1,516 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import ntpath +import os.path +import posixpath +import re +import warnings +from collections.abc import Sequence +from pathlib import Path, PurePath +from re import Pattern +from typing import Final, Optional + +from ._base import AbstractSanitizer, AbstractValidator, BaseFile, BaseValidator +from ._common import findall_to_str, is_nt_abspath, to_str, validate_pathtype +from ._const import _NTFS_RESERVED_FILE_NAMES, DEFAULT_MIN_LEN, INVALID_CHAR_ERR_MSG_TMPL, Platform +from ._filename import FileNameSanitizer, FileNameValidator +from ._types import PathType, PlatformType +from .error import ErrorAttrKey, ErrorReason, InvalidCharError, ReservedNameError, ValidationError +from .handler import ReservedNameHandler, ValidationErrorHandler + + +_RE_INVALID_PATH: Final = re.compile(f"[{re.escape(BaseFile._INVALID_PATH_CHARS):s}]", re.UNICODE) +_RE_INVALID_WIN_PATH: Final = re.compile( + f"[{re.escape(BaseFile._INVALID_WIN_PATH_CHARS):s}]", re.UNICODE +) + + +class FilePathSanitizer(AbstractSanitizer): + def __init__( + self, + max_len: int = -1, + fs_encoding: Optional[str] = None, + platform: Optional[PlatformType] = None, + null_value_handler: Optional[ValidationErrorHandler] = None, + reserved_name_handler: Optional[ValidationErrorHandler] = None, + additional_reserved_names: Optional[Sequence[str]] = None, + normalize: bool = True, + validate_after_sanitize: bool = False, + validator: Optional[AbstractValidator] = None, + ) -> None: + if validator: + fpath_validator = validator + else: + fpath_validator = FilePathValidator( + min_len=DEFAULT_MIN_LEN, + max_len=max_len, + fs_encoding=fs_encoding, + check_reserved=True, + additional_reserved_names=additional_reserved_names, + platform=platform, + ) + super().__init__( + max_len=max_len, + fs_encoding=fs_encoding, + validator=fpath_validator, + null_value_handler=null_value_handler, + reserved_name_handler=reserved_name_handler, + additional_reserved_names=additional_reserved_names, + platform=platform, + validate_after_sanitize=validate_after_sanitize, + ) + + self._sanitize_regexp = self._get_sanitize_regexp() + self.__fname_sanitizer = FileNameSanitizer( + max_len=self.max_len, + fs_encoding=fs_encoding, + null_value_handler=null_value_handler, + reserved_name_handler=reserved_name_handler, + additional_reserved_names=additional_reserved_names, + platform=self.platform, + validate_after_sanitize=validate_after_sanitize, + ) + self.__normalize = normalize + + if self._is_windows(include_universal=True): + self.__split_drive = ntpath.splitdrive + else: + self.__split_drive = posixpath.splitdrive + + def sanitize(self, value: PathType, replacement_text: str = "") -> PathType: + try: + validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True)) + except ValidationError as e: + if e.reason == ErrorReason.NULL_NAME: + if isinstance(value, PurePath): + raise + + return self._null_value_handler(e) # type: ignore + raise + + unicode_filepath = to_str(value) + drive, unicode_filepath = self.__split_drive(unicode_filepath) + unicode_filepath = self._sanitize_regexp.sub(replacement_text, unicode_filepath) + if self.__normalize and unicode_filepath: + unicode_filepath = os.path.normpath(unicode_filepath) + sanitized_path = unicode_filepath + + sanitized_entries: list[str] = [] + if drive: + sanitized_entries.append(drive) + for entry in sanitized_path.replace("\\", "/").split("/"): + if entry in _NTFS_RESERVED_FILE_NAMES: + sanitized_entries.append(f"{entry}_") + continue + + sanitized_entry = str( + self.__fname_sanitizer.sanitize(entry, replacement_text=replacement_text) + ) + if not sanitized_entry: + if not sanitized_entries: + sanitized_entries.append("") + continue + + sanitized_entries.append(sanitized_entry) + + sanitized_path = self.__get_path_separator().join(sanitized_entries) + try: + self._validator.validate(sanitized_path) + except ValidationError as e: + if e.reason == ErrorReason.NULL_NAME: + sanitized_path = self._null_value_handler(e) + + if self._validate_after_sanitize: + self._validator.validate(sanitized_path) + + if isinstance(value, PurePath): + return Path(sanitized_path) # type: ignore + + return sanitized_path # type: ignore + + def _get_sanitize_regexp(self) -> Pattern[str]: + if self._is_windows(include_universal=True): + return _RE_INVALID_WIN_PATH + + return _RE_INVALID_PATH + + def __get_path_separator(self) -> str: + if self._is_windows(): + return "\\" + + return "/" + + +class FilePathValidator(BaseValidator): + _RE_NTFS_RESERVED: Final = re.compile( + "|".join(f"^/{re.escape(pattern)}$" for pattern in _NTFS_RESERVED_FILE_NAMES), + re.IGNORECASE, + ) + _MACOS_RESERVED_FILE_PATHS: Final = ("/", ":") + + @property + def reserved_keywords(self) -> tuple[str, ...]: + common_keywords = super().reserved_keywords + + if any([self._is_universal(), self._is_posix(), self._is_macos()]): + return common_keywords + self._MACOS_RESERVED_FILE_PATHS + + if self._is_linux(): + return common_keywords + ("/",) + + return common_keywords + + def __init__( + self, + min_len: int = DEFAULT_MIN_LEN, + max_len: int = -1, + fs_encoding: Optional[str] = None, + platform: Optional[PlatformType] = None, + check_reserved: bool = True, + additional_reserved_names: Optional[Sequence[str]] = None, + ) -> None: + super().__init__( + min_len=min_len, + max_len=max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + platform=platform, + ) + + self.__fname_validator = FileNameValidator( + min_len=min_len, + max_len=self.max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + platform=platform, + ) + + if self._is_windows(include_universal=True): + self.__split_drive = ntpath.splitdrive + else: + self.__split_drive = posixpath.splitdrive + + def validate(self, value: PathType) -> None: + validate_pathtype(value, allow_whitespaces=not self._is_windows(include_universal=True)) + self.validate_abspath(value) + + _drive, tail = self.__split_drive(value) + if not tail: + return + + unicode_filepath = to_str(tail) + byte_ct = len(unicode_filepath.encode(self._fs_encoding)) + err_kwargs = { + ErrorAttrKey.REASON: ErrorReason.INVALID_LENGTH, + ErrorAttrKey.PLATFORM: self.platform, + ErrorAttrKey.FS_ENCODING: self._fs_encoding, + ErrorAttrKey.BYTE_COUNT: byte_ct, + ErrorAttrKey.VALUE: unicode_filepath, + } + + if byte_ct > self.max_len: + raise ValidationError( + [ + f"file path is too long: expected<={self.max_len:d} bytes, actual={byte_ct:d} bytes" + ], + **err_kwargs, + ) + if byte_ct < self.min_len: + raise ValidationError( + [ + "file path is too short: expected>={:d} bytes, actual={:d} bytes".format( + self.min_len, byte_ct + ) + ], + **err_kwargs, + ) + + self._validate_reserved_keywords(unicode_filepath) + unicode_filepath = unicode_filepath.replace("\\", "/") + for entry in unicode_filepath.split("/"): + if not entry or entry in (".", ".."): + continue + + self.__fname_validator.validate(entry) + + if self._is_windows(include_universal=True): + self.__validate_win_filepath(unicode_filepath) + else: + self.__validate_unix_filepath(unicode_filepath) + + def validate_abspath(self, value: PathType) -> None: + is_posix_abs = posixpath.isabs(value) + is_nt_abs = is_nt_abspath(to_str(value)) + + if any([self._is_windows() and is_nt_abs, self._is_posix() and is_posix_abs]): + return + + if self._is_universal() and any([is_nt_abs, is_posix_abs]): + ValidationError( + "platform-independent absolute file path is not supported", + platform=self.platform, + reason=ErrorReason.MALFORMED_ABS_PATH, + ) + + err_object = ValidationError( + description=( + f"an invalid absolute file path ({value!r}) for the platform ({self.platform.value})." + + " to avoid the error, specify an appropriate platform corresponding to" + + " the path format or 'auto'." + ), + platform=self.platform, + reason=ErrorReason.MALFORMED_ABS_PATH, + ) + + if self._is_windows(include_universal=True) and is_posix_abs: + raise err_object + + if not self._is_windows(): + drive, _tail = ntpath.splitdrive(value) + if drive and is_nt_abs: + raise err_object + + def __validate_unix_filepath(self, unicode_filepath: str) -> None: + match = _RE_INVALID_PATH.findall(unicode_filepath) + if match: + raise InvalidCharError( + INVALID_CHAR_ERR_MSG_TMPL.format(invalid=findall_to_str(match)), + value=unicode_filepath, + ) + + def __validate_win_filepath(self, unicode_filepath: str) -> None: + match = _RE_INVALID_WIN_PATH.findall(unicode_filepath) + if match: + raise InvalidCharError( + INVALID_CHAR_ERR_MSG_TMPL.format(invalid=findall_to_str(match)), + platform=Platform.WINDOWS, + value=unicode_filepath, + ) + + _drive, value = self.__split_drive(unicode_filepath) + if value: + match_reserved = self._RE_NTFS_RESERVED.search(value) + if match_reserved: + reserved_name = match_reserved.group() + raise ReservedNameError( + f"'{reserved_name}' is a reserved name", + reusable_name=False, + reserved_name=reserved_name, + platform=self.platform, + ) + + +def validate_filepath( + file_path: PathType, + platform: Optional[PlatformType] = None, + min_len: int = DEFAULT_MIN_LEN, + max_len: Optional[int] = None, + fs_encoding: Optional[str] = None, + check_reserved: bool = True, + additional_reserved_names: Optional[Sequence[str]] = None, +) -> None: + """Verifying whether the ``file_path`` is a valid file path or not. + + Args: + file_path (PathType): + File path to be validated. + platform (Optional[PlatformType], optional): + Target platform name of the file path. + + .. include:: platform.txt + min_len (int, optional): + Minimum byte length of the ``file_path``. The value must be greater or equal to one. + Defaults to ``1``. + max_len (Optional[int], optional): + Maximum byte length of the ``file_path``. If the value is |None| or minus, + automatically determined by the ``platform``: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 + fs_encoding (Optional[str], optional): + Filesystem encoding that is used to calculate the byte length of the file path. + If |None|, get the encoding from the execution environment. + check_reserved (bool, optional): + If |True|, check the reserved names of the ``platform``. + Defaults to |True|. + additional_reserved_names (Optional[Sequence[str]], optional): + Additional reserved names to check. + + Raises: + ValidationError (ErrorReason.INVALID_CHARACTER): + If the ``file_path`` includes invalid char(s): + |invalid_file_path_chars|. + The following characters are also invalid for Windows platforms: + |invalid_win_file_path_chars| + ValidationError (ErrorReason.INVALID_LENGTH): + If the ``file_path`` is longer than ``max_len`` characters. + ValidationError: + If ``file_path`` includes invalid values. + + Example: + :ref:`example-validate-file-path` + + See Also: + `Naming Files, Paths, and Namespaces - Win32 apps | Microsoft Docs + `__ + """ + + FilePathValidator( + platform=platform, + min_len=min_len, + max_len=-1 if max_len is None else max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + ).validate(file_path) + + +def is_valid_filepath( + file_path: PathType, + platform: Optional[PlatformType] = None, + min_len: int = DEFAULT_MIN_LEN, + max_len: Optional[int] = None, + fs_encoding: Optional[str] = None, + check_reserved: bool = True, + additional_reserved_names: Optional[Sequence[str]] = None, +) -> bool: + """Check whether the ``file_path`` is a valid name or not. + + Args: + file_path: + A filepath to be checked. + platform: + Target platform name of the file path. + + Example: + :ref:`example-is-valid-filepath` + + See Also: + :py:func:`.validate_filepath()` + """ + + return FilePathValidator( + platform=platform, + min_len=min_len, + max_len=-1 if max_len is None else max_len, + fs_encoding=fs_encoding, + check_reserved=check_reserved, + additional_reserved_names=additional_reserved_names, + ).is_valid(file_path) + + +def sanitize_filepath( + file_path: PathType, + replacement_text: str = "", + platform: Optional[PlatformType] = None, + max_len: Optional[int] = None, + fs_encoding: Optional[str] = None, + check_reserved: Optional[bool] = None, + null_value_handler: Optional[ValidationErrorHandler] = None, + reserved_name_handler: Optional[ValidationErrorHandler] = None, + additional_reserved_names: Optional[Sequence[str]] = None, + normalize: bool = True, + validate_after_sanitize: bool = False, +) -> PathType: + """Make a valid file path from a string. + + To make a valid file path, the function does the following: + + - Replace invalid characters for a file path within the ``file_path`` + with the ``replacement_text``. Invalid characters are as follows: + + - unprintable characters + - |invalid_file_path_chars| + - for Windows (or universal) only: |invalid_win_file_path_chars| + + - Replace a value if a sanitized value is a reserved name by operating systems + with a specified handler by ``reserved_name_handler``. + + Args: + file_path: + File path to sanitize. + replacement_text: + Replacement text for invalid characters. + Defaults to ``""``. + platform: + Target platform name of the file path. + + .. include:: platform.txt + max_len: + Maximum byte length of the file path. + Truncate the path if the value length exceeds the `max_len`. + If the value is |None| or minus, ``max_len`` will automatically determined by the ``platform``: + + - ``Linux``: 4096 + - ``macOS``: 1024 + - ``Windows``: 260 + - ``universal``: 260 + fs_encoding: + Filesystem encoding that is used to calculate the byte length of the file path. + If |None|, get the encoding from the execution environment. + check_reserved: + [Deprecated] Use 'reserved_name_handler' instead. + null_value_handler: + Function called when a value after sanitization is an empty string. + You can specify predefined handlers: + + - :py:func:`.handler.NullValueHandler.return_null_string` + - :py:func:`.handler.NullValueHandler.return_timestamp` + - :py:func:`.handler.raise_error` + + Defaults to :py:func:`.handler.NullValueHandler.return_null_string` that just return ``""``. + reserved_name_handler: + Function called when a value after sanitization is one of the reserved names. + You can specify predefined handlers: + + - :py:meth:`~.handler.ReservedNameHandler.add_leading_underscore` + - :py:meth:`~.handler.ReservedNameHandler.add_trailing_underscore` + - :py:meth:`~.handler.ReservedNameHandler.as_is` + - :py:func:`~.handler.raise_error` + + Defaults to :py:func:`.handler.add_trailing_underscore`. + additional_reserved_names: + Additional reserved names to sanitize. + Case insensitive. + normalize: + If |True|, normalize the the file path. + validate_after_sanitize: + Execute validation after sanitization to the file path. + + Returns: + Same type as the argument (str or PathLike object): + Sanitized filepath. + + Raises: + ValueError: + If the ``file_path`` is an invalid file path. + + Example: + :ref:`example-sanitize-file-path` + """ + + if check_reserved is not None: + warnings.warn( + "'check_reserved' is deprecated. Use 'reserved_name_handler' instead.", + DeprecationWarning, + ) + + if check_reserved is False: + reserved_name_handler = ReservedNameHandler.as_is + + return FilePathSanitizer( + platform=platform, + max_len=-1 if max_len is None else max_len, + fs_encoding=fs_encoding, + normalize=normalize, + null_value_handler=null_value_handler, + reserved_name_handler=reserved_name_handler, + additional_reserved_names=additional_reserved_names, + validate_after_sanitize=validate_after_sanitize, + ).sanitize(file_path, replacement_text) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_ltsv.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_ltsv.py new file mode 100644 index 0000000000000000000000000000000000000000..134224a52a3ed8e9a8422c230f58e095a0104bd1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_ltsv.py @@ -0,0 +1,44 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import re +from typing import Final + +from ._common import to_str, validate_pathtype +from .error import InvalidCharError + + +__RE_INVALID_LTSV_LABEL: Final = re.compile("[^0-9A-Za-z_.-]", re.UNICODE) + + +def validate_ltsv_label(label: str) -> None: + """ + Verifying whether ``label`` is a valid + `Labeled Tab-separated Values (LTSV) `__ label or not. + + :param label: Label to validate. + :raises pathvalidate.ValidationError: + If invalid character(s) found in the ``label`` for a LTSV format label. + """ + + validate_pathtype(label, allow_whitespaces=False) + + match_list = __RE_INVALID_LTSV_LABEL.findall(to_str(label)) + if match_list: + raise InvalidCharError(f"invalid character found for a LTSV format label: {match_list}") + + +def sanitize_ltsv_label(label: str, replacement_text: str = "") -> str: + """ + Replace all of the symbols in text. + + :param label: Input text. + :param replacement_text: Replacement text. + :return: A replacement string. + :rtype: str + """ + + validate_pathtype(label, allow_whitespaces=False) + + return __RE_INVALID_LTSV_LABEL.sub(replacement_text, to_str(label)) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_symbol.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_symbol.py new file mode 100644 index 0000000000000000000000000000000000000000..29cf608552ac4234143d5fa35992719a2ceded39 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_symbol.py @@ -0,0 +1,93 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import re +from collections.abc import Sequence +from typing import Final + +from ._common import ascii_symbols, to_str, unprintable_ascii_chars +from .error import InvalidCharError + + +__RE_SYMBOL: Final = re.compile( + "[{}]".format(re.escape("".join(ascii_symbols + unprintable_ascii_chars))), re.UNICODE +) + + +def validate_symbol(text: str) -> None: + """ + Verifying whether symbol(s) included in the ``text`` or not. + + Args: + text: + Input text to validate. + + Raises: + ValidationError (ErrorReason.INVALID_CHARACTER): + If symbol(s) included in the ``text``. + """ + + match_list = __RE_SYMBOL.findall(to_str(text)) + if match_list: + raise InvalidCharError(f"invalid symbols found: {match_list}") + + +def replace_symbol( + text: str, + replacement_text: str = "", + exclude_symbols: Sequence[str] = [], + is_replace_consecutive_chars: bool = False, + is_strip: bool = False, +) -> str: + """ + Replace all of the symbols in the ``text``. + + Args: + text: + Input text. + replacement_text: + Replacement text. + exclude_symbols: + Symbols that were excluded from the replacement. + is_replace_consecutive_chars: + If |True|, replace consecutive multiple ``replacement_text`` characters + to a single character. + is_strip: + If |True|, strip ``replacement_text`` from the beginning/end of the replacement text. + + Returns: + A replacement string. + + Example: + + :ref:`example-sanitize-symbol` + """ + + if exclude_symbols: + regexp = re.compile( + "[{}]".format( + re.escape( + "".join(set(ascii_symbols + unprintable_ascii_chars) - set(exclude_symbols)) + ) + ), + re.UNICODE, + ) + else: + regexp = __RE_SYMBOL + + try: + new_text = regexp.sub(replacement_text, to_str(text)) + except TypeError: + raise TypeError("text must be a string") + + if not replacement_text: + return new_text + + if is_replace_consecutive_chars: + new_text = re.sub(f"{re.escape(replacement_text)}+", replacement_text, new_text) + + if is_strip: + new_text = new_text.strip(replacement_text) + + return new_text diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_types.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_types.py new file mode 100644 index 0000000000000000000000000000000000000000..c946842489cb1998c4e9a2dbcb95ec9286e0a528 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/_types.py @@ -0,0 +1,8 @@ +from pathlib import Path +from typing import TypeVar + +from ._const import Platform + + +PathType = TypeVar("PathType", str, Path) +PlatformType = TypeVar("PlatformType", str, Platform) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/argparse.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/argparse.py new file mode 100644 index 0000000000000000000000000000000000000000..baeafe2751f007276ed14fa273f4fe206673d744 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/argparse.py @@ -0,0 +1,47 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +from argparse import ArgumentTypeError + +from ._filename import sanitize_filename, validate_filename +from ._filepath import sanitize_filepath, validate_filepath +from .error import ValidationError + + +def validate_filename_arg(value: str) -> str: + if not value: + return "" + + try: + validate_filename(value) + except ValidationError as e: + raise ArgumentTypeError(e) + + return value + + +def validate_filepath_arg(value: str) -> str: + if not value: + return "" + + try: + validate_filepath(value, platform="auto") + except ValidationError as e: + raise ArgumentTypeError(e) + + return value + + +def sanitize_filename_arg(value: str) -> str: + if not value: + return "" + + return sanitize_filename(value) + + +def sanitize_filepath_arg(value: str) -> str: + if not value: + return "" + + return sanitize_filepath(value, platform="auto") diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/click.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/click.py new file mode 100644 index 0000000000000000000000000000000000000000..9de84ebe2280a1f13f5eed5c929c5b2a182dfa61 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/click.py @@ -0,0 +1,50 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +from typing import Union + +import click +from click import Context, Option, Parameter + +from ._filename import sanitize_filename, validate_filename +from ._filepath import sanitize_filepath, validate_filepath +from .error import ValidationError + + +def validate_filename_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str: + if not value: + return "" + + try: + validate_filename(value) + except ValidationError as e: + raise click.BadParameter(str(e)) + + return value + + +def validate_filepath_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str: + if not value: + return "" + + try: + validate_filepath(value) + except ValidationError as e: + raise click.BadParameter(str(e)) + + return value + + +def sanitize_filename_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str: + if not value: + return "" + + return sanitize_filename(value) + + +def sanitize_filepath_arg(ctx: Context, param: Union[Option, Parameter], value: str) -> str: + if not value: + return "" + + return sanitize_filepath(value) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/error.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/error.py new file mode 100644 index 0000000000000000000000000000000000000000..3331a72900c29d167a71c916c9269cb59affc15b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/error.py @@ -0,0 +1,259 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import enum +from typing import Final, Optional + +from ._const import Platform + + +def _to_error_code(code: int) -> str: + return f"PV{code:04d}" + + +class ErrorAttrKey: + BYTE_COUNT: Final = "byte_count" + DESCRIPTION: Final = "description" + FS_ENCODING: Final = "fs_encoding" + PLATFORM: Final = "platform" + REASON: Final = "reason" + RESERVED_NAME: Final = "reserved_name" + REUSABLE_NAME: Final = "reusable_name" + VALUE: Final = "value" + + +@enum.unique +class ErrorReason(enum.Enum): + """ + Validation error reasons. + """ + + NULL_NAME = (_to_error_code(1001), "NULL_NAME", "the value must not be an empty string") + RESERVED_NAME = ( + _to_error_code(1002), + "RESERVED_NAME", + "found a reserved name by a platform", + ) + INVALID_CHARACTER = ( + _to_error_code(1100), + "INVALID_CHARACTER", + "invalid characters found", + ) + INVALID_LENGTH = ( + _to_error_code(1101), + "INVALID_LENGTH", + "found an invalid string length", + ) + FOUND_ABS_PATH = ( + _to_error_code(1200), + "FOUND_ABS_PATH", + "found an absolute path where must be a relative path", + ) + MALFORMED_ABS_PATH = ( + _to_error_code(1201), + "MALFORMED_ABS_PATH", + "found a malformed absolute path", + ) + INVALID_AFTER_SANITIZE = ( + _to_error_code(2000), + "INVALID_AFTER_SANITIZE", + "found invalid value after sanitizing", + ) + + @property + def code(self) -> str: + """str: Error code.""" + return self.__code + + @property + def name(self) -> str: + """str: Error reason name.""" + return self.__name + + @property + def description(self) -> str: + """str: Error reason description.""" + return self.__description + + def __init__(self, code: str, name: str, description: str) -> None: + self.__name = name + self.__code = code + self.__description = description + + def __str__(self) -> str: + return f"[{self.__code}] {self.__description}" + + +class ValidationError(ValueError): + """ + Exception class of validation errors. + """ + + @property + def platform(self) -> Optional[Platform]: + """ + :py:class:`~pathvalidate.Platform`: Platform information. + """ + return self.__platform + + @property + def reason(self) -> ErrorReason: + """ + :py:class:`~pathvalidate.error.ErrorReason`: The cause of the error. + """ + return self.__reason + + @property + def description(self) -> Optional[str]: + """Optional[str]: Error description.""" + return self.__description + + @property + def reserved_name(self) -> str: + """str: Reserved name.""" + return self.__reserved_name + + @property + def reusable_name(self) -> Optional[bool]: + """Optional[bool]: Whether the name is reusable or not.""" + return self.__reusable_name + + @property + def fs_encoding(self) -> Optional[str]: + """Optional[str]: File system encoding.""" + return self.__fs_encoding + + @property + def byte_count(self) -> Optional[int]: + """Optional[int]: Byte count of the path.""" + return self.__byte_count + + def __init__(self, *args, **kwargs) -> None: # type: ignore + if ErrorAttrKey.REASON not in kwargs: + raise ValueError(f"{ErrorAttrKey.REASON} must be specified") + + self.__reason: ErrorReason = kwargs.pop(ErrorAttrKey.REASON) + self.__byte_count: Optional[int] = kwargs.pop(ErrorAttrKey.BYTE_COUNT, None) + self.__platform: Optional[Platform] = kwargs.pop(ErrorAttrKey.PLATFORM, None) + self.__description: Optional[str] = kwargs.pop(ErrorAttrKey.DESCRIPTION, None) + self.__reserved_name: str = kwargs.pop(ErrorAttrKey.RESERVED_NAME, "") + self.__reusable_name: Optional[bool] = kwargs.pop(ErrorAttrKey.REUSABLE_NAME, None) + self.__fs_encoding: Optional[str] = kwargs.pop(ErrorAttrKey.FS_ENCODING, None) + self.__value: Optional[str] = kwargs.pop(ErrorAttrKey.VALUE, None) + + try: + super().__init__(*args[0], **kwargs) + except IndexError: + super().__init__(*args, **kwargs) + + def as_slog(self) -> dict[str, str]: + """Return a dictionary representation of the error. + + Returns: + Dict[str, str]: A dictionary representation of the error. + """ + + slog: dict[str, str] = { + "code": self.reason.code, + ErrorAttrKey.DESCRIPTION: self.reason.description, + } + if self.platform: + slog[ErrorAttrKey.PLATFORM] = self.platform.value + if self.description: + slog[ErrorAttrKey.DESCRIPTION] = self.description + if self.__reusable_name is not None: + slog[ErrorAttrKey.REUSABLE_NAME] = str(self.__reusable_name) + if self.__fs_encoding: + slog[ErrorAttrKey.FS_ENCODING] = self.__fs_encoding + if self.__byte_count: + slog[ErrorAttrKey.BYTE_COUNT] = str(self.__byte_count) + if self.__value: + slog[ErrorAttrKey.VALUE] = self.__value + + return slog + + def __str__(self) -> str: + item_list = [] + header = str(self.reason) + + if Exception.__str__(self): + item_list.append(Exception.__str__(self)) + + if self.platform: + item_list.append(f"{ErrorAttrKey.PLATFORM}={self.platform.value}") + if self.description: + item_list.append(f"{ErrorAttrKey.DESCRIPTION}={self.description}") + if self.__reusable_name is not None: + item_list.append(f"{ErrorAttrKey.REUSABLE_NAME}={self.reusable_name}") + if self.__fs_encoding: + item_list.append(f"{ErrorAttrKey.FS_ENCODING}={self.__fs_encoding}") + if self.__byte_count is not None: + item_list.append(f"{ErrorAttrKey.BYTE_COUNT}={self.__byte_count:,d}") + if self.__value: + item_list.append(f"{ErrorAttrKey.VALUE}={self.__value!r}") + + if item_list: + header += ": " + + return header + ", ".join(item_list).strip() + + def __repr__(self) -> str: + return self.__str__() + + +class NullNameError(ValidationError): + """[Deprecated] + Exception raised when a name is empty. + """ + + def __init__(self, *args, **kwargs) -> None: # type: ignore + kwargs[ErrorAttrKey.REASON] = ErrorReason.NULL_NAME + + super().__init__(args, **kwargs) + + +class InvalidCharError(ValidationError): + """ + Exception raised when includes invalid character(s) within a string. + """ + + def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] + kwargs[ErrorAttrKey.REASON] = ErrorReason.INVALID_CHARACTER + + super().__init__(args, **kwargs) + + +class ReservedNameError(ValidationError): + """ + Exception raised when a string matched a reserved name. + """ + + def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] + kwargs[ErrorAttrKey.REASON] = ErrorReason.RESERVED_NAME + + super().__init__(args, **kwargs) + + +class ValidReservedNameError(ReservedNameError): + """[Deprecated] + Exception raised when a string matched a reserved name. + However, it can be used as a name. + """ + + def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] + kwargs[ErrorAttrKey.REUSABLE_NAME] = True + + super().__init__(args, **kwargs) + + +class InvalidReservedNameError(ReservedNameError): + """[Deprecated] + Exception raised when a string matched a reserved name. + Moreover, the reserved name is invalid as a name. + """ + + def __init__(self, *args, **kwargs) -> None: # type: ignore[no-untyped-def] + kwargs[ErrorAttrKey.REUSABLE_NAME] = False + + super().__init__(args, **kwargs) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/handler.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/handler.py new file mode 100644 index 0000000000000000000000000000000000000000..0e44bbfd2cea5f1424fe47f67f7f456b216ad4db --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/handler.py @@ -0,0 +1,138 @@ +""" +.. codeauthor:: Tsuyoshi Hombashi +""" + +import warnings +from datetime import datetime +from typing import Callable + +from .error import ValidationError + + +ValidationErrorHandler = Callable[[ValidationError], str] + + +def return_null_string(e: ValidationError) -> str: + """Null value handler that always returns an empty string. + + Args: + e (ValidationError): A validation error. + + Returns: + str: An empty string. + """ + + warnings.warn( + "'return_null_string' is deprecated. Use 'NullValueHandler.return_null_string' instead.", + DeprecationWarning, + ) + + return "" + + +def return_timestamp(e: ValidationError) -> str: + """Null value handler that returns a timestamp of when the function was called. + + Args: + e (ValidationError): A validation error. + + Returns: + str: A timestamp. + """ + + warnings.warn( + "'return_timestamp' is deprecated. Use 'NullValueHandler.reserved_name_handler' instead.", + DeprecationWarning, + ) + + return str(datetime.now().timestamp()) + + +def raise_error(e: ValidationError) -> str: + """Null value handler that always raises an exception. + + Args: + e (ValidationError): A validation error. + + Raises: + ValidationError: Always raised. + """ + + raise e + + +class NullValueHandler: + @classmethod + def return_null_string(cls, e: ValidationError) -> str: + """Null value handler that always returns an empty string. + + Args: + e (ValidationError): A validation error. + + Returns: + str: An empty string. + """ + + return "" + + @classmethod + def return_timestamp(cls, e: ValidationError) -> str: + """Null value handler that returns a timestamp of when the function was called. + + Args: + e (ValidationError): A validation error. + + Returns: + str: A timestamp. + """ + + return str(datetime.now().timestamp()) + + +class ReservedNameHandler: + @classmethod + def add_leading_underscore(cls, e: ValidationError) -> str: + """Reserved name handler that adds a leading underscore (``"_"``) to the name + except for ``"."`` and ``".."``. + + Args: + e (ValidationError): A reserved name error. + + Returns: + str: The converted name. + """ + + if e.reserved_name in (".", "..") or e.reusable_name: + return e.reserved_name + + return f"_{e.reserved_name}" + + @classmethod + def add_trailing_underscore(cls, e: ValidationError) -> str: + """Reserved name handler that adds a trailing underscore (``"_"``) to the name + except for ``"."`` and ``".."``. + + Args: + e (ValidationError): A reserved name error. + + Returns: + str: The converted name. + """ + + if e.reserved_name in (".", "..") or e.reusable_name: + return e.reserved_name + + return f"{e.reserved_name}_" + + @classmethod + def as_is(cls, e: ValidationError) -> str: + """Reserved name handler that returns the name as is. + + Args: + e (ValidationError): A reserved name error. + + Returns: + str: The name as is. + """ + + return e.reserved_name diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/py.typed b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pathvalidate/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..40e70323a6dfcf9fda7d6a31c645dc85118bf623 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/METADATA @@ -0,0 +1,116 @@ +Metadata-Version: 2.4 +Name: pyarrow-hotfix +Version: 0.7 +Project-URL: Documentation, https://github.com/pitrou/pyarrow-hotfix#readme +Project-URL: Issues, https://github.com/pitrou/pyarrow-hotfix/issues +Project-URL: Source, https://github.com/pitrou/pyarrow-hotfix +Author-email: Antoine Pitrou +License: Apache License, Version 2.0 +License-File: LICENSE.txt +Classifier: Development Status :: 4 - Beta +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.5 +Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Requires-Python: >=3.5 +Description-Content-Type: text/x-rst + +PyArrow Hotfix +============== + +.. image:: https://img.shields.io/pypi/v/pyarrow-hotfix.svg + :alt: pyarrow_hotfix package on PyPI + :target: https://pypi.org/project/pyarrow-hotfix + +.. image:: https://img.shields.io/pypi/pyversions/pyarrow-hotfix.svg + :alt: pyarrow_hotfix supported Python versions + :target: https://pypi.org/project/pyarrow-hotfix + +.. image:: https://github.com/pitrou/pyarrow-hotfix/actions/workflows/tests.yml/badge.svg + :alt: latest unit test results + :target: https://github.com/pitrou/pyarrow-hotfix/actions/workflows/tests.yml + + +Description +----------- + +This is a hotfix for the PyArrow security vulnerability +`CVE-2023-47248 `__. + +We generally recommend upgrading to PyArrow 14.0.1 or later, but if you +cannot upgrade, this package disables the vulnerability on older versions. + +Installation +------------ + +Use ``pip`` to install: + +.. code-block:: console + + pip install pyarrow_hotfix + +.. note:: + Both ``pyarrow-hotfix`` and ``pyarrow_hotfix`` are accepted and point to + the same package. + +Usage +----- + +``pyarrow_hotfix`` must be imported in your application or library code for +it to take effect: + +.. code-block:: python + + import pyarrow_hotfix + +Supported versions +------------------ + +``pyarrow_hotfix`` supports all Python versions starting from Python 3.5, +and all PyArrow versions starting from 0.14.0. + +Dependencies +------------ + +``pyarrow_hotfix`` is a pure Python package that does not have any explicit +dependencies, and assumes you have installed ``pyarrow`` through other means +(such as ``pip`` or ``conda``). + +Example +------- + +.. code-block:: pycon + + >>> import pyarrow as pa + >>> import pyarrow_hotfix + >>> + >>> pa.ipc.open_file('data.arrow') + Traceback (most recent call last): + [ ... ] + RuntimeError: forbidden deserialization of 'arrow.py_extension_type': storage_type = null, serialized = b"\x80\x03cbuiltins\neval\nq\x00X\x15\x00\x00\x00print('hello world!')q\x01\x85q\x02Rq\x03.", pickle disassembly: + 0: \x80 PROTO 3 + 2: c GLOBAL 'builtins eval' + 17: q BINPUT 0 + 19: X BINUNICODE "print('hello world!')" + 45: q BINPUT 1 + 47: \x85 TUPLE1 + 48: q BINPUT 2 + 50: R REDUCE + 51: q BINPUT 3 + 53: . STOP + highest protocol among opcodes = 2 + + +License +------- + +Like ``pyarrow``, ``pyarrow_hotfix`` is distributed under the terms of the +`Apache License, version 2.0 `_. diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..63f2dedf75f9105fb8696ff30d48158395a3a193 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/RECORD @@ -0,0 +1,9 @@ +pyarrow_hotfix-0.7.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +pyarrow_hotfix-0.7.dist-info/METADATA,sha256=fO7W82ZH6rgnlyyF7jo4yPdrDgGHNK-J4gHElCJNLpE,3604 +pyarrow_hotfix-0.7.dist-info/RECORD,, +pyarrow_hotfix-0.7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87 +pyarrow_hotfix-0.7.dist-info/licenses/LICENSE.txt,sha256=z8d0m5b2O9McPEK1xHG_dWgUBT6EfBDz6wA0F7xSPTA,11358 +pyarrow_hotfix/__about__.py,sha256=Agujaq3cBtkA51m3gWW1a4zVczgjpxfBNrHN1mRu6ms,136 +pyarrow_hotfix/__init__.py,sha256=uhtqRd_GRb3e63G6ExCMOcQutv7R6Jv4HxevnIy0e-E,3793 +pyarrow_hotfix/__pycache__/__about__.cpython-312.pyc,, +pyarrow_hotfix/__pycache__/__init__.cpython-312.pyc,, diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..12228d414b6cfed7c39d3781c85c63256a1d7fb5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/pyarrow_hotfix-0.7.dist-info/WHEEL @@ -0,0 +1,4 @@ +Wheel-Version: 1.0 +Generator: hatchling 1.27.0 +Root-Is-Purelib: true +Tag: py3-none-any diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..43d270e36cb14c59a34a3f73e11fa6ea62d83ad0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/__init__.py @@ -0,0 +1,852 @@ +from __future__ import absolute_import, print_function, division, unicode_literals + +import _io +from http import client +from http import cookies +import json as json_module +import logging +import re +from itertools import groupby + + +from collections import namedtuple +from functools import wraps +from requests.adapters import HTTPAdapter +from requests.exceptions import ConnectionError +from requests.utils import cookiejar_from_dict +from responses.matchers import json_params_matcher as _json_params_matcher +from responses.matchers import urlencoded_params_matcher as _urlencoded_params_matcher +from responses.registries import FirstMatchRegistry +from responses.matchers import query_string_matcher as _query_string_matcher +from warnings import warn + +from collections.abc import Sequence, Sized + +try: + from requests.packages.urllib3.response import HTTPResponse +except ImportError: # pragma: no cover + from urllib3.response import HTTPResponse # pragma: no cover +try: + from requests.packages.urllib3.connection import HTTPHeaderDict +except ImportError: # pragma: no cover + from urllib3.response import HTTPHeaderDict # pragma: no cover +try: + from requests.packages.urllib3.util.url import parse_url +except ImportError: # pragma: no cover + from urllib3.util.url import parse_url # pragma: no cover + + +from urllib.parse import ( + urlparse, + urlunparse, + parse_qsl, + urlsplit, + urlunsplit, + quote, +) + +from io import BytesIO as BufferIO + +from unittest import mock as std_mock + + +Pattern = re.Pattern + +UNSET = object() + +Call = namedtuple("Call", ["request", "response"]) + +_real_send = HTTPAdapter.send + +logger = logging.getLogger("responses") + + +class FalseBool: + # used for backwards compatibility, see + # https://github.com/getsentry/responses/issues/464 + def __bool__(self): + return False + + __nonzero__ = __bool__ + + +def urlencoded_params_matcher(params): + warn( + "Function is deprecated. Use 'from responses.matchers import urlencoded_params_matcher'", + DeprecationWarning, + ) + return _urlencoded_params_matcher(params) + + +def json_params_matcher(params): + warn( + "Function is deprecated. Use 'from responses.matchers import json_params_matcher'", + DeprecationWarning, + ) + return _json_params_matcher(params) + + +def _has_unicode(s): + return any(ord(char) > 128 for char in s) + + +def _clean_unicode(url): + # Clean up domain names, which use punycode to handle unicode chars + urllist = list(urlsplit(url)) + netloc = urllist[1] + if _has_unicode(netloc): + domains = netloc.split(".") + for i, d in enumerate(domains): + if _has_unicode(d): + d = "xn--" + d.encode("punycode").decode("ascii") + domains[i] = d + urllist[1] = ".".join(domains) + url = urlunsplit(urllist) + + # Clean up path/query/params, which use url-encoding to handle unicode chars + chars = list(url) + for i, x in enumerate(chars): + if ord(x) > 128: + chars[i] = quote(x) + + return "".join(chars) + + +def _cookies_from_headers(headers): + resp_cookie = cookies.SimpleCookie() + resp_cookie.load(headers["set-cookie"]) + cookies_dict = {name: v.value for name, v in resp_cookie.items()} + + return cookiejar_from_dict(cookies_dict) + + +def get_wrapped(func, responses, registry=None): + if registry is not None: + responses._set_registry(registry) + + @wraps(func) + def wrapper(*args, **kwargs): + with responses: + return func(*args, **kwargs) + + return wrapper + + +class CallList(Sequence, Sized): + def __init__(self): + self._calls = [] + + def __iter__(self): + return iter(self._calls) + + def __len__(self): + return len(self._calls) + + def __getitem__(self, idx): + return self._calls[idx] + + def add(self, request, response): + self._calls.append(Call(request, response)) + + def reset(self): + self._calls = [] + + +def _ensure_url_default_path(url): + if isinstance(url, str): + url_parts = list(urlsplit(url)) + if url_parts[2] == "": + url_parts[2] = "/" + url = urlunsplit(url_parts) + return url + + +def _get_url_and_path(url): + url_parsed = urlparse(url) + url_and_path = urlunparse( + [url_parsed.scheme, url_parsed.netloc, url_parsed.path, None, None, None] + ) + return parse_url(url_and_path).url + + +def _handle_body(body): + if isinstance(body, str): + body = body.encode("utf-8") + if isinstance(body, _io.BufferedReader): + return body + + data = BufferIO(body) + + def is_closed(): + """ + Real Response uses HTTPResponse as body object. + Thus, when method is_closed is called first to check if there is any more + content to consume and the file-like object is still opened + + This method ensures stability to work for both: + https://github.com/getsentry/responses/issues/438 + https://github.com/getsentry/responses/issues/394 + + where file should be intentionally be left opened to continue consumption + """ + if not data.closed and data.read(1): + # if there is more bytes to read then keep open, but return pointer + data.seek(-1, 1) + return False + else: + if not data.closed: + # close but return False to mock like is still opened + data.close() + return False + + # only if file really closed (by us) return True + return True + + data.isclosed = is_closed + return data + + +class BaseResponse(object): + passthrough = False + content_type = None + headers = None + + stream = False + + def __init__(self, method, url, match_querystring=None, match=()): + self.method = method + # ensure the url has a default path set if the url is a string + self.url = _ensure_url_default_path(url) + + if self._should_match_querystring(match_querystring): + match = tuple(match) + (_query_string_matcher(urlparse(self.url).query),) + + self.match = match + self.call_count = 0 + + def __eq__(self, other): + if not isinstance(other, BaseResponse): + return False + + if self.method != other.method: + return False + + # Can't simply do an equality check on the objects directly here since __eq__ isn't + # implemented for regex. It might seem to work as regex is using a cache to return + # the same regex instances, but it doesn't in all cases. + self_url = self.url.pattern if isinstance(self.url, Pattern) else self.url + other_url = other.url.pattern if isinstance(other.url, Pattern) else other.url + + return self_url == other_url + + def __ne__(self, other): + return not self.__eq__(other) + + def _should_match_querystring(self, match_querystring_argument): + if isinstance(self.url, Pattern): + # the old default from <= 0.9.0 + return False + + if match_querystring_argument is not None: + if not isinstance(match_querystring_argument, FalseBool): + warn( + ( + "Argument 'match_querystring' is deprecated. " + "Use 'responses.matchers.query_param_matcher' or " + "'responses.matchers.query_string_matcher'" + ), + DeprecationWarning, + ) + return match_querystring_argument + + return bool(urlparse(self.url).query) + + def _url_matches(self, url, other): + if isinstance(url, str): + if _has_unicode(url): + url = _clean_unicode(url) + + return _get_url_and_path(url) == _get_url_and_path(other) + + elif isinstance(url, Pattern) and url.match(other): + return True + + else: + return False + + @staticmethod + def _req_attr_matches(match, request): + for matcher in match: + valid, reason = matcher(request) + if not valid: + return False, reason + + return True, "" + + def get_headers(self): + headers = HTTPHeaderDict() # Duplicate headers are legal + if self.content_type is not None: + headers["Content-Type"] = self.content_type + if self.headers: + headers.extend(self.headers) + return headers + + def get_response(self, request): + raise NotImplementedError + + def matches(self, request): + if request.method != self.method: + return False, "Method does not match" + + if not self._url_matches(self.url, request.url): + return False, "URL does not match" + + valid, reason = self._req_attr_matches(self.match, request) + if not valid: + return False, reason + + return True, "" + + +class Response(BaseResponse): + def __init__( + self, + method, + url, + body="", + json=None, + status=200, + headers=None, + stream=None, + content_type=UNSET, + auto_calculate_content_length=False, + **kwargs + ): + # if we were passed a `json` argument, + # override the body and content_type + if json is not None: + assert not body + body = json_module.dumps(json) + if content_type is UNSET: + content_type = "application/json" + + if content_type is UNSET: + if isinstance(body, str) and _has_unicode(body): + content_type = "text/plain; charset=utf-8" + else: + content_type = "text/plain" + + self.body = body + self.status = status + self.headers = headers + + if stream is not None: + warn( + "stream argument is deprecated. Use stream parameter in request directly", + DeprecationWarning, + ) + + self.stream = stream + self.content_type = content_type + self.auto_calculate_content_length = auto_calculate_content_length + super(Response, self).__init__(method, url, **kwargs) + + def get_response(self, request): + if self.body and isinstance(self.body, Exception): + raise self.body + + headers = self.get_headers() + status = self.status + body = _handle_body(self.body) + + if ( + self.auto_calculate_content_length + and isinstance(body, BufferIO) + and "Content-Length" not in headers + ): + content_length = len(body.getvalue()) + headers["Content-Length"] = str(content_length) + + return HTTPResponse( + status=status, + reason=client.responses.get(status, None), + body=body, + headers=headers, + original_response=OriginalResponseShim(headers), + preload_content=False, + ) + + def __repr__(self): + return ( + "".format( + url=self.url, + status=self.status, + content_type=self.content_type, + headers=json_module.dumps(self.headers), + ) + ) + + +class CallbackResponse(BaseResponse): + def __init__( + self, method, url, callback, stream=None, content_type="text/plain", **kwargs + ): + self.callback = callback + + if stream is not None: + warn( + "stream argument is deprecated. Use stream parameter in request directly", + DeprecationWarning, + ) + self.stream = stream + self.content_type = content_type + super(CallbackResponse, self).__init__(method, url, **kwargs) + + def get_response(self, request): + headers = self.get_headers() + + result = self.callback(request) + if isinstance(result, Exception): + raise result + + status, r_headers, body = result + if isinstance(body, Exception): + raise body + + # If the callback set a content-type remove the one + # set in add_callback() so that we don't have multiple + # content type values. + has_content_type = False + if isinstance(r_headers, dict) and "Content-Type" in r_headers: + has_content_type = True + elif isinstance(r_headers, list): + has_content_type = any( + [h for h in r_headers if h and h[0].lower() == "content-type"] + ) + if has_content_type: + headers.pop("Content-Type", None) + + body = _handle_body(body) + headers.extend(r_headers) + + return HTTPResponse( + status=status, + reason=client.responses.get(status, None), + body=body, + headers=headers, + original_response=OriginalResponseShim(headers), + preload_content=False, + ) + + +class PassthroughResponse(BaseResponse): + passthrough = True + + +class OriginalResponseShim(object): + """ + Shim for compatibility with older versions of urllib3 + + requests cookie handling depends on responses having a property chain of + `response._original_response.msg` which contains the response headers [1] + + Using HTTPResponse() for this purpose causes compatibility errors with + urllib3<1.23.0. To avoid adding more dependencies we can use this shim. + + [1]: https://github.com/psf/requests/blob/75bdc998e2d/requests/cookies.py#L125 + """ + + def __init__(self, headers): + self.msg = headers + + def isclosed(self): + return True + + def close(self): + return + + +class RequestsMock(object): + DELETE = "DELETE" + GET = "GET" + HEAD = "HEAD" + OPTIONS = "OPTIONS" + PATCH = "PATCH" + POST = "POST" + PUT = "PUT" + response_callback = None + + def __init__( + self, + assert_all_requests_are_fired=True, + response_callback=None, + passthru_prefixes=(), + target="requests.adapters.HTTPAdapter.send", + registry=FirstMatchRegistry, + ): + self._calls = CallList() + self.reset() + self._registry = registry() # call only after reset + self.assert_all_requests_are_fired = assert_all_requests_are_fired + self.response_callback = response_callback + self.passthru_prefixes = tuple(passthru_prefixes) + self.target = target + self._patcher = None + + def _get_registry(self): + return self._registry + + def _set_registry(self, new_registry): + if self.registered(): + err_msg = ( + "Cannot replace Registry, current registry has responses.\n" + "Run 'responses.registry.reset()' first" + ) + raise AttributeError(err_msg) + + self._registry = new_registry() + + def reset(self): + self._registry = FirstMatchRegistry() + self._calls.reset() + self.passthru_prefixes = () + + def add( + self, + method=None, # method or ``Response`` + url=None, + body="", + adding_headers=None, + *args, + **kwargs + ): + """ + >>> import responses + + A basic request: + >>> responses.add(responses.GET, 'http://example.com') + + You can also directly pass an object which implements the + ``BaseResponse`` interface: + + >>> responses.add(Response(...)) + + A JSON payload: + + >>> responses.add( + >>> method='GET', + >>> url='http://example.com', + >>> json={'foo': 'bar'}, + >>> ) + + Custom headers: + + >>> responses.add( + >>> method='GET', + >>> url='http://example.com', + >>> headers={'X-Header': 'foo'}, + >>> ) + + """ + if isinstance(method, BaseResponse): + self._registry.add(method) + return + + if adding_headers is not None: + kwargs.setdefault("headers", adding_headers) + + self._registry.add(Response(method=method, url=url, body=body, **kwargs)) + + def add_passthru(self, prefix): + """ + Register a URL prefix or regex to passthru any non-matching mock requests to. + + For example, to allow any request to 'https://example.com', but require + mocks for the remainder, you would add the prefix as so: + + >>> import responses + >>> responses.add_passthru('https://example.com') + + Regex can be used like: + + >>> responses.add_passthru(re.compile('https://example.com/\\w+')) + """ + if not isinstance(prefix, Pattern) and _has_unicode(prefix): + prefix = _clean_unicode(prefix) + self.passthru_prefixes += (prefix,) + + def remove(self, method_or_response=None, url=None): + """ + Removes a response previously added using ``add()``, identified + either by a response object inheriting ``BaseResponse`` or + ``method`` and ``url``. Removes all matching responses. + + >>> import responses + >>> responses.add(responses.GET, 'http://example.org') + >>> responses.remove(responses.GET, 'http://example.org') + """ + if isinstance(method_or_response, BaseResponse): + response = method_or_response + else: + response = BaseResponse(method=method_or_response, url=url) + + self._registry.remove(response) + + def replace(self, method_or_response=None, url=None, body="", *args, **kwargs): + """ + Replaces a response previously added using ``add()``. The signature + is identical to ``add()``. The response is identified using ``method`` + and ``url``, and the first matching response is replaced. + + >>> import responses + >>> responses.add(responses.GET, 'http://example.org', json={'data': 1}) + >>> responses.replace(responses.GET, 'http://example.org', json={'data': 2}) + """ + if isinstance(method_or_response, BaseResponse): + url = method_or_response.url + response = method_or_response + else: + response = Response(method=method_or_response, url=url, body=body, **kwargs) + + self._registry.replace(response) + + def upsert(self, method_or_response=None, url=None, body="", *args, **kwargs): + """ + Replaces a response previously added using ``add()``, or adds the response + if no response exists. Responses are matched using ``method``and ``url``. + The first matching response is replaced. + + >>> import responses + >>> responses.add(responses.GET, 'http://example.org', json={'data': 1}) + >>> responses.upsert(responses.GET, 'http://example.org', json={'data': 2}) + """ + try: + self.replace(method_or_response, url, body, *args, **kwargs) + except ValueError: + self.add(method_or_response, url, body, *args, **kwargs) + + def add_callback( + self, + method, + url, + callback, + match_querystring=FalseBool(), + content_type="text/plain", + match=(), + ): + # ensure the url has a default path set if the url is a string + # url = _ensure_url_default_path(url, match_querystring) + + self._registry.add( + CallbackResponse( + url=url, + method=method, + callback=callback, + content_type=content_type, + match_querystring=match_querystring, + match=match, + ) + ) + + def registered(self): + return self._registry.registered + + @property + def calls(self): + return self._calls + + def __enter__(self): + self.start() + return self + + def __exit__(self, type, value, traceback): + success = type is None + self.stop(allow_assert=success) + self.reset() + return success + + def activate(self, func=None, registry=None): + if func is not None: + return get_wrapped(func, self) + + def deco_activate(func): + return get_wrapped(func, self, registry) + + return deco_activate + + def _find_match(self, request): + """ + Iterates through all available matches and validates if any of them matches the request + + :param request: (PreparedRequest), request object + :return: + (Response) found match. If multiple found, then remove & return the first match. + (list) list with reasons why other matches don't match + """ + return self._registry.find(request) + + def _parse_request_params(self, url): + params = {} + for key, val in groupby(parse_qsl(urlparse(url).query), lambda kv: kv[0]): + values = list(map(lambda x: x[1], val)) + if len(values) == 1: + values = values[0] + params[key] = values + return params + + def _on_request(self, adapter, request, **kwargs): + # add attributes params and req_kwargs to 'request' object for further match comparison + # original request object does not have these attributes + request.params = self._parse_request_params(request.path_url) + request.req_kwargs = kwargs + + match, match_failed_reasons = self._find_match(request) + resp_callback = self.response_callback + + if match is None: + if any( + [ + p.match(request.url) + if isinstance(p, Pattern) + else request.url.startswith(p) + for p in self.passthru_prefixes + ] + ): + logger.info("request.allowed-passthru", extra={"url": request.url}) + return _real_send(adapter, request, **kwargs) + + error_msg = ( + "Connection refused by Responses - the call doesn't " + "match any registered mock.\n\n" + "Request: \n" + "- %s %s\n\n" + "Available matches:\n" % (request.method, request.url) + ) + for i, m in enumerate(self.registered()): + error_msg += "- {} {} {}\n".format( + m.method, m.url, match_failed_reasons[i] + ) + + response = ConnectionError(error_msg) + response.request = request + + self._calls.add(request, response) + response = resp_callback(response) if resp_callback else response + raise response + + if match.passthrough: + logger.info("request.passthrough-response", extra={"url": request.url}) + response = _real_send(adapter, request, **kwargs) + else: + try: + response = adapter.build_response(request, match.get_response(request)) + except BaseException as response: + match.call_count += 1 + self._calls.add(request, response) + response = resp_callback(response) if resp_callback else response + raise + + response = resp_callback(response) if resp_callback else response + match.call_count += 1 + self._calls.add(request, response) + return response + + def start(self): + def unbound_on_send(adapter, request, *a, **kwargs): + return self._on_request(adapter, request, *a, **kwargs) + + self._patcher = std_mock.patch(target=self.target, new=unbound_on_send) + self._patcher.start() + + def stop(self, allow_assert=True): + self._patcher.stop() + if not self.assert_all_requests_are_fired: + return + + if not allow_assert: + return + + not_called = [m for m in self.registered() if m.call_count == 0] + if not_called: + raise AssertionError( + "Not all requests have been executed {0!r}".format( + [(match.method, match.url) for match in not_called] + ) + ) + + def assert_call_count(self, url, count): + call_count = len( + [ + 1 + for call in self.calls + if call.request.url == _ensure_url_default_path(url) + ] + ) + if call_count == count: + return True + else: + raise AssertionError( + "Expected URL '{0}' to be called {1} times. Called {2} times.".format( + url, count, call_count + ) + ) + + +# expose default mock namespace +mock = _default_mock = RequestsMock(assert_all_requests_are_fired=False) +__all__ = [ + "CallbackResponse", + "Response", + "RequestsMock", + # Exposed by the RequestsMock class: + "activate", + "add", + "add_callback", + "add_passthru", + "assert_all_requests_are_fired", + "assert_call_count", + "calls", + "DELETE", + "GET", + "HEAD", + "OPTIONS", + "passthru_prefixes", + "PATCH", + "POST", + "PUT", + "registered", + "remove", + "replace", + "reset", + "response_callback", + "start", + "stop", + "target", + "upsert", +] + +activate = _default_mock.activate +add = _default_mock.add +add_callback = _default_mock.add_callback +add_passthru = _default_mock.add_passthru +assert_all_requests_are_fired = _default_mock.assert_all_requests_are_fired +assert_call_count = _default_mock.assert_call_count +calls = _default_mock.calls +DELETE = _default_mock.DELETE +GET = _default_mock.GET +HEAD = _default_mock.HEAD +OPTIONS = _default_mock.OPTIONS +passthru_prefixes = _default_mock.passthru_prefixes +PATCH = _default_mock.PATCH +POST = _default_mock.POST +PUT = _default_mock.PUT +registered = _default_mock.registered +remove = _default_mock.remove +replace = _default_mock.replace +reset = _default_mock.reset +response_callback = _default_mock.response_callback +start = _default_mock.start +stop = _default_mock.stop +target = _default_mock.target +upsert = _default_mock.upsert diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/__init__.pyi b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/__init__.pyi new file mode 100644 index 0000000000000000000000000000000000000000..183f452b3ec66c89af9f97027e8ed1e7d1c08708 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/__init__.pyi @@ -0,0 +1,352 @@ +from collections import Sequence, Sized +from typing import ( + Any, + Callable, + Iterator, + Mapping, + Optional, + NamedTuple, + Protocol, + TypeVar, + Dict, + List, + Tuple, + Union, + Iterable, + overload, + Type +) + +from io import BufferedReader, BytesIO +from re import Pattern +from requests.adapters import HTTPResponse, PreparedRequest +from requests.cookies import RequestsCookieJar +from typing_extensions import Literal +from unittest import mock as std_mock +from urllib.parse import quote as quote +from urllib3.response import HTTPHeaderDict # type: ignore # Not currently exposed in typestubs. + +from .matchers import urlencoded_params_matcher, json_params_matcher + + +def _clean_unicode(url: str) -> str: ... +def _cookies_from_headers(headers: Dict[str, str]) -> RequestsCookieJar: ... +def _ensure_str(s: str) -> str: ... +def _ensure_url_default_path( + url: Union[Pattern[str], str] +) -> Union[Pattern[str], str]: ... +def _get_url_and_path(url: str) -> str: ... +def _handle_body( + body: Optional[Union[bytes, BufferedReader, str]] +) -> Union[BufferedReader, BytesIO]: ... +def _has_unicode(s: str) -> bool: ... +def _is_string(s: Union[Pattern[str], str]) -> bool: ... +def get_wrapped( + func: Callable[..., Any], responses: RequestsMock, registry: Optional[Any] +) -> Callable[..., Any]: ... + + +class Call(NamedTuple): + request: PreparedRequest + response: Any + +_Body = Union[str, BaseException, "Response", BufferedReader, bytes] + +MatcherIterable = Iterable[Callable[[Any], Callable[..., Any]]] + +class CallList(Sequence[Call], Sized): + def __init__(self) -> None: + self._calls = List[Call] + ... + def __iter__(self) -> Iterator[Call]: ... + def __len__(self) -> int: ... + def __getitem__(self, idx: int) -> Call: ... # type: ignore [override] + def add(self, request: PreparedRequest, response: _Body) -> None: ... + def reset(self) -> None: ... + +class FalseBool: + def __bool__(self) -> bool: ... + +class BaseResponse: + passthrough: bool = ... + content_type: Optional[str] = ... + headers: Optional[Mapping[str, str]] = ... + stream: bool = ... + method: Any = ... + url: Any = ... + match_querystring: Any = ... + match: MatcherIterable = ... + call_count: int = ... + def __init__( + self, + method: str, + url: Union[Pattern[str], str], + match_querystring: Union[bool, object] = ..., + match: MatcherIterable = ..., + ) -> None: ... + def __eq__(self, other: Any) -> bool: ... + def __ne__(self, other: Any) -> bool: ... + def _req_attr_matches( + self, match: MatcherIterable, request: PreparedRequest + ) -> Tuple[bool, str]: ... + def _should_match_querystring( + self, match_querystring_argument: Union[bool, object] + ) -> bool: ... + def _url_matches( + self, url: Union[Pattern[str], str], other: str, match_querystring: bool = ... + ) -> bool: ... + def _url_matches_strict(self, url: str, other: str) -> bool: ... + def get_headers(self) -> HTTPHeaderDict: ... # type: ignore + def get_response(self, request: PreparedRequest) -> None: ... + def matches(self, request: PreparedRequest) -> Tuple[bool, str]: ... + +class Response(BaseResponse): + body: _Body = ... + status: int = ... + headers: Optional[Mapping[str, str]] = ... + stream: bool = ... + content_type: Optional[str] = ... + auto_calculate_content_length: bool = ... + def __init__( + self, + method: str, + url: Union[Pattern[str], str], + body: _Body = ..., + json: Optional[Any] = ..., + status: int = ..., + headers: Optional[Mapping[str, str]] = ..., + stream: bool = ..., + content_type: Optional[str] = ..., + auto_calculate_content_length: bool = ..., + match_querystring: bool = ..., + match: MatcherIterable = ..., + ) -> None: ... + def get_response( # type: ignore [override] + self, request: PreparedRequest + ) -> HTTPResponse: ... + +class CallbackResponse(BaseResponse): + callback: Callable[[Any], Any] = ... + stream: bool = ... + content_type: Optional[str] = ... + def __init__( + self, + method: str, + url: Union[Pattern[str], str], + callback: Callable[[Any], Any], + stream: bool = ..., + content_type: Optional[str] = ..., + match_querystring: Union[bool, FalseBool] = ..., + match: MatcherIterable = ..., + ) -> None: ... + def get_response( # type: ignore [override] + self, request: PreparedRequest + ) -> HTTPResponse: ... + +class PassthroughResponse(BaseResponse): + passthrough: bool = ... + +class OriginalResponseShim: + msg: Any = ... + def __init__( # type: ignore [no-any-unimported] + self, headers: HTTPHeaderDict + ) -> None: ... + def isclosed(self) -> bool: ... + +_F = TypeVar("_F", bound=Callable[..., Any]) + +class RequestsMock: + DELETE: Literal["DELETE"] + GET: Literal["GET"] + HEAD: Literal["HEAD"] + OPTIONS: Literal["OPTIONS"] + PATCH: Literal["PATCH"] + POST: Literal["POST"] + PUT: Literal["PUT"] + response_callback: Optional[Callable[[Any], Any]] = ... + assert_all_requests_are_fired: Any = ... + passthru_prefixes: Tuple[Union[str, Pattern[str]], ...] = ... + target: Any = ... + _matches: List[Any] + def __init__( + self, + assert_all_requests_are_fired: bool = ..., + response_callback: Optional[Callable[[Any], Any]] = ..., + passthru_prefixes: Tuple[str, ...] = ..., + target: str = ..., + registry: Any = ..., + ) -> None: + self._patcher = Callable[[Any], Any] + self._calls = CallList + ... + def reset(self) -> None: ... + add: _Add + add_passthru: _AddPassthru + def remove( + self, + method_or_response: Optional[Union[str, Response]] = ..., + url: Optional[Union[Pattern[str], str]] = ..., + ) -> None: ... + replace: _Replace + upsert: _Upsert + add_callback: _AddCallback + @property + def calls(self) -> CallList: ... + def __enter__(self) -> RequestsMock: ... + def __exit__(self, type: Any, value: Any, traceback: Any) -> bool: ... + def activate(self, func: Optional[_F], registry: Optional[Any]) -> _F: ... + def start(self) -> None: ... + def stop(self, allow_assert: bool = ...) -> None: ... + def assert_call_count(self, url: str, count: int) -> bool: ... + def registered(self) -> List[Any]: ... + def _set_registry(self, registry: Any) -> None: ... + def _get_registry(self) -> Any: ... + + +HeaderSet = Optional[Union[Mapping[str, str], List[Tuple[str, str]]]] + +class _Add(Protocol): + def __call__( + self, + method: Optional[Union[str, BaseResponse]] = ..., + url: Optional[Union[Pattern[str], str]] = ..., + body: _Body = ..., + json: Optional[Any] = ..., + status: int = ..., + headers: HeaderSet = ..., + stream: bool = ..., + content_type: Optional[str] = ..., + auto_calculate_content_length: bool = ..., + adding_headers: HeaderSet = ..., + match_querystring: bool = ..., + match: MatcherIterable = ..., + ) -> None: ... + +class _AddCallback(Protocol): + def __call__( + self, + method: str, + url: Union[Pattern[str], str], + callback: Callable[[PreparedRequest], Union[Exception, Tuple[int, Mapping[str, str], _Body]]], + match_querystring: bool = ..., + content_type: Optional[str] = ..., + match: MatcherIterable = ..., + ) -> None: ... + +class _AddPassthru(Protocol): + def __call__( + self, prefix: Union[Pattern[str], str] + ) -> None: ... + +class _Remove(Protocol): + def __call__( + self, + method_or_response: Optional[Union[str, BaseResponse]] = ..., + url: Optional[Union[Pattern[str], str]] = ..., + ) -> None: ... + +class _Replace(Protocol): + def __call__( + self, + method_or_response: Optional[Union[str, BaseResponse]] = ..., + url: Optional[Union[Pattern[str], str]] = ..., + body: _Body = ..., + json: Optional[Any] = ..., + status: int = ..., + headers: HeaderSet = ..., + stream: bool = ..., + content_type: Optional[str] = ..., + adding_headers: HeaderSet = ..., + match_querystring: bool = ..., + match: MatcherIterable = ..., + ) -> None: ... + +class _Upsert(Protocol): + def __call__( + self, + method: Optional[Union[str, BaseResponse]] = ..., + url: Optional[Union[Pattern[str], str]] = ..., + body: _Body = ..., + json: Optional[Any] = ..., + status: int = ..., + headers: HeaderSet = ..., + stream: bool = ..., + content_type: Optional[str] = ..., + adding_headers: HeaderSet = ..., + match_querystring: bool = ..., + match: MatcherIterable = ..., + ) -> None: ... + +class _Registered(Protocol): + def __call__(self) -> List[Response]: ... + + +class _Activate(Protocol): + # see https://github.com/getsentry/responses/pull/469 for more details + + @overload + def __call__(self, func: _F = ...) -> _F: ... + # use this overload for scenario when 'responses.activate' is used + + @overload + def __call__(self, registry: Type[Any] = ...) -> Callable[['_F'], '_F']: ... + # use this overload for scenario when 'responses.activate(registry=)' is used + + +activate: _Activate +add: _Add +add_callback: _AddCallback +add_passthru: _AddPassthru +assert_all_requests_are_fired: bool +assert_call_count: Callable[[str, int], bool] +calls: CallList +DELETE: Literal["DELETE"] +GET: Literal["GET"] +HEAD: Literal["HEAD"] +mock: RequestsMock +_default_mock: RequestsMock +OPTIONS: Literal["OPTIONS"] +passthru_prefixes: Tuple[str, ...] +PATCH: Literal["PATCH"] +POST: Literal["POST"] +PUT: Literal["PUT"] +registered: _Registered +remove: _Remove +replace: _Replace +reset: Callable[[], None] +response_callback: Callable[[Any], Any] +start: Callable[[], None] +stop: Callable[..., None] +target: Any +upsert: _Upsert + +__all__ = [ + "CallbackResponse", + "Response", + "RequestsMock", + # Exposed by the RequestsMock class: + "activate", + "add", + "add_callback", + "add_passthru", + "assert_all_requests_are_fired", + "assert_call_count", + "calls", + "DELETE", + "GET", + "HEAD", + "OPTIONS", + "passthru_prefixes", + "PATCH", + "POST", + "PUT", + "registered", + "remove", + "replace", + "reset", + "response_callback", + "start", + "stop", + "target", + "upsert", +] diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/matchers.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/matchers.py new file mode 100644 index 0000000000000000000000000000000000000000..893edc19206e637689e5016a9900afef6b472ecc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/matchers.py @@ -0,0 +1,325 @@ +import json as json_module + +from requests import PreparedRequest +from urllib.parse import parse_qsl, urlparse +from requests.packages.urllib3.util.url import parse_url +from json.decoder import JSONDecodeError + + +def _create_key_val_str(input_dict): + """ + Returns string of format {'key': val, 'key2': val2} + Function is called recursively for nested dictionaries + + :param input_dict: dictionary to transform + :return: (str) reformatted string + """ + + def list_to_str(input_list): + """ + Convert all list items to string. + Function is called recursively for nested lists + """ + converted_list = [] + for item in sorted(input_list, key=lambda x: str(x)): + if isinstance(item, dict): + item = _create_key_val_str(item) + elif isinstance(item, list): + item = list_to_str(item) + + converted_list.append(str(item)) + list_str = ", ".join(converted_list) + return "[" + list_str + "]" + + items_list = [] + for key in sorted(input_dict.keys(), key=lambda x: str(x)): + val = input_dict[key] + if isinstance(val, dict): + val = _create_key_val_str(val) + elif isinstance(val, list): + val = list_to_str(input_list=val) + + items_list.append("{}: {}".format(key, val)) + + key_val_str = "{{{}}}".format(", ".join(items_list)) + return key_val_str + + +def urlencoded_params_matcher(params): + """ + Matches URL encoded data + + :param params: (dict) data provided to 'data' arg of request + :return: (func) matcher + """ + + def match(request): + reason = "" + request_body = request.body + qsl_body = dict(parse_qsl(request_body)) if request_body else {} + params_dict = params or {} + valid = params is None if request_body is None else params_dict == qsl_body + if not valid: + reason = "request.body doesn't match: {} doesn't match {}".format( + _create_key_val_str(qsl_body), _create_key_val_str(params_dict) + ) + + return valid, reason + + return match + + +def json_params_matcher(params): + """ + Matches JSON encoded data + + :param params: (dict) JSON data provided to 'json' arg of request + :return: (func) matcher + """ + + def match(request): + reason = "" + request_body = request.body + params_dict = params or {} + try: + if isinstance(request_body, bytes): + request_body = request_body.decode("utf-8") + json_body = json_module.loads(request_body) if request_body else {} + + valid = params is None if request_body is None else params_dict == json_body + + if not valid: + reason = "request.body doesn't match: {} doesn't match {}".format( + _create_key_val_str(json_body), _create_key_val_str(params_dict) + ) + + except JSONDecodeError: + valid = False + reason = ( + "request.body doesn't match: JSONDecodeError: Cannot parse request.body" + ) + + return valid, reason + + return match + + +def fragment_identifier_matcher(identifier): + def match(request): + reason = "" + url_fragment = urlparse(request.url).fragment + if identifier: + url_fragment_qsl = sorted(parse_qsl(url_fragment)) + identifier_qsl = sorted(parse_qsl(identifier)) + valid = identifier_qsl == url_fragment_qsl + else: + valid = not url_fragment + + if not valid: + reason = "URL fragment identifier is different: {} doesn't match {}".format( + identifier, url_fragment + ) + return valid, reason + + return match + + +def query_param_matcher(params): + """ + Matcher to match 'params' argument in request + + :param params: (dict), same as provided to request + :return: (func) matcher + """ + + def match(request): + reason = "" + request_params = request.params + request_params_dict = request_params or {} + params_dict = params or {} + valid = ( + params is None + if request_params is None + else params_dict == request_params_dict + ) + + if not valid: + reason = "Parameters do not match. {} doesn't match {}".format( + _create_key_val_str(request_params_dict), + _create_key_val_str(params_dict), + ) + + return valid, reason + + return match + + +def query_string_matcher(query): + """ + Matcher to match query string part of request + + :param query: (str), same as constructed by request + :return: (func) matcher + """ + + def match(request): + reason = "" + data = parse_url(request.url) + request_query = data.query + + request_qsl = sorted(parse_qsl(request_query)) if request_query else {} + matcher_qsl = sorted(parse_qsl(query)) if query else {} + + valid = not query if request_query is None else request_qsl == matcher_qsl + + if not valid: + reason = "Query string doesn't match. {} doesn't match {}".format( + _create_key_val_str(dict(request_qsl)), + _create_key_val_str(dict(matcher_qsl)), + ) + + return valid, reason + + return match + + +def request_kwargs_matcher(kwargs): + """ + Matcher to match keyword arguments provided to request + + :param kwargs: (dict), keyword arguments, same as provided to request + :return: (func) matcher + """ + + def match(request): + reason = "" + kwargs_dict = kwargs or {} + # validate only kwargs that were requested for comparison, skip defaults + request_kwargs = { + k: v for k, v in request.req_kwargs.items() if k in kwargs_dict + } + + valid = ( + not kwargs_dict + if not request_kwargs + else sorted(kwargs.items()) == sorted(request_kwargs.items()) + ) + + if not valid: + reason = "Arguments don't match: {} doesn't match {}".format( + _create_key_val_str(request_kwargs), _create_key_val_str(kwargs_dict) + ) + + return valid, reason + + return match + + +def multipart_matcher(files, data=None): + """ + Matcher to match 'multipart/form-data' content-type. + This function constructs request body and headers from provided 'data' and 'files' + arguments and compares to actual request + + :param files: (dict), same as provided to request + :param data: (dict), same as provided to request + :return: (func) matcher + """ + if not files: + raise TypeError("files argument cannot be empty") + + prepared = PreparedRequest() + prepared.headers = {"Content-Type": ""} + prepared.prepare_body(data=data, files=files) + + def get_boundary(content_type): + """ + Parse 'boundary' value from header. + + :param content_type: (str) headers["Content-Type"] value + :return: (str) boundary value + """ + if "boundary=" not in content_type: + return "" + + return content_type.split("boundary=")[1] + + def match(request): + reason = "multipart/form-data doesn't match. " + if "Content-Type" not in request.headers: + return False, reason + "Request is missing the 'Content-Type' header" + + request_boundary = get_boundary(request.headers["Content-Type"]) + prepared_boundary = get_boundary(prepared.headers["Content-Type"]) + + # replace boundary value in header and in body, since by default + # urllib3.filepost.encode_multipart_formdata dynamically calculates + # random boundary alphanumeric value + request_content_type = request.headers["Content-Type"] + prepared_content_type = prepared.headers["Content-Type"].replace( + prepared_boundary, request_boundary + ) + + request_body = request.body + prepared_body = prepared.body + + if isinstance(prepared_body, bytes): + # since headers always come as str, need to convert to bytes + prepared_boundary = prepared_boundary.encode("utf-8") + request_boundary = request_boundary.encode("utf-8") + + prepared_body = prepared_body.replace(prepared_boundary, request_boundary) + + headers_valid = prepared_content_type == request_content_type + if not headers_valid: + return ( + False, + reason + + "Request headers['Content-Type'] is different. {} isn't equal to {}".format( + request_content_type, prepared_content_type + ), + ) + + body_valid = prepared_body == request_body + if not body_valid: + return False, reason + "Request body differs. {} aren't equal {}".format( + request_body, prepared_body + ) + + return True, "" + + return match + + +def header_matcher(headers, strict_match=False): + """ + Matcher to match 'headers' argument in request using the responses library. + + Because ``requests`` will send several standard headers in addition to what + was specified by your code, request headers that are additional to the ones + passed to the matcher are ignored by default. You can change this behaviour + by passing ``strict_match=True``. + + :param headers: (dict), same as provided to request + :param strict_match: (bool), whether headers in addition to those specified + in the matcher should cause the match to fail. + :return: (func) matcher + """ + + def match(request): + request_headers = request.headers or {} + + if not strict_match: + # filter down to just the headers specified in the matcher + request_headers = {k: v for k, v in request_headers.items() if k in headers} + + valid = sorted(headers.items()) == sorted(request_headers.items()) + + if not valid: + return False, "Headers do not match: {} doesn't match {}".format( + _create_key_val_str(request_headers), _create_key_val_str(headers) + ) + + return valid, "" + + return match diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/matchers.pyi b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/matchers.pyi new file mode 100644 index 0000000000000000000000000000000000000000..188de2e34896c79ba7249e97c158ef718a507717 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/matchers.pyi @@ -0,0 +1,44 @@ +from typing import ( + Any, + Callable, + Optional, + Dict, +) + +JSONDecodeError = ValueError + + +def _create_key_val_str(input_dict: Dict[Any, Any]) -> str: ... + +def json_params_matcher( + params: Optional[Dict[str, Any]] +) -> Callable[..., Any]: ... + +def urlencoded_params_matcher( + params: Optional[Dict[str, str]] +) -> Callable[..., Any]: ... + +def query_param_matcher( + params: Optional[Dict[str, str]] +) -> Callable[..., Any]: ... + +def query_string_matcher( + query: Optional[str] +) -> Callable[..., Any]: ... + +def request_kwargs_matcher( + kwargs: Optional[Dict[str, Any]] +) -> Callable[..., Any]: ... + +def multipart_matcher( + files: Dict[str, Any], data: Optional[Dict[str, str]] = ... +) -> Callable[..., Any]: ... + +def header_matcher( + headers: Dict[str, str], + strict_match: bool = ... +) -> Callable[..., Any]: ... + +def fragment_identifier_matcher( + identifier: Optional[str] +) -> Callable[..., Any]: ... diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/py.typed b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/registries.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/registries.py new file mode 100644 index 0000000000000000000000000000000000000000..22f79519a1100db4a163d1615c4c3300824972e7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/registries.py @@ -0,0 +1,63 @@ +from typing import ( + TYPE_CHECKING, + List, + Optional, + Tuple, +) + +if TYPE_CHECKING: # pragma: no cover + # import only for linter run + from requests import PreparedRequest + from responses import BaseResponse + + +class FirstMatchRegistry(object): + def __init__(self) -> None: + self._responses: List["BaseResponse"] = [] + + @property + def registered(self) -> List["BaseResponse"]: + return self._responses + + def reset(self) -> None: + self._responses = [] + + def find( + self, request: "PreparedRequest" + ) -> Tuple[Optional["BaseResponse"], List[str]]: + found = None + found_match = None + match_failed_reasons = [] + for i, response in enumerate(self.registered): + match_result, reason = response.matches(request) + if match_result: + if found is None: + found = i + found_match = response + else: + if self.registered[found].call_count > 0: + # that assumes that some responses were added between calls + self.registered.pop(found) + found_match = response + break + # Multiple matches found. Remove & return the first response. + return self.registered.pop(found), match_failed_reasons + else: + match_failed_reasons.append(reason) + return found_match, match_failed_reasons + + def add(self, response: "BaseResponse") -> None: + self.registered.append(response) + + def remove(self, response: "BaseResponse") -> None: + while response in self.registered: + self.registered.remove(response) + + def replace(self, response: "BaseResponse") -> None: + try: + index = self.registered.index(response) + except ValueError: + raise ValueError( + "Response is not registered for URL {}".format(response.url) + ) + self.registered[index] = response diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_matchers.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_matchers.py new file mode 100644 index 0000000000000000000000000000000000000000..d061d97b98b2c09f65c4e7105b3d70a8e018cff4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_matchers.py @@ -0,0 +1,625 @@ +from __future__ import absolute_import, print_function, division, unicode_literals + +import pytest +import requests +import responses +from requests.exceptions import ConnectionError +from responses import matchers + + +def assert_response(resp, body=None, content_type="text/plain"): + assert resp.status_code == 200 + assert resp.reason == "OK" + assert resp.headers["Content-Type"] == content_type + assert resp.text == body + + +def assert_reset(): + assert len(responses._default_mock.registered()) == 0 + assert len(responses.calls) == 0 + + +def test_query_string_matcher(): + @responses.activate + def run(): + url = "http://example.com?test=1&foo=bar" + responses.add( + responses.GET, + url, + body=b"test", + match=[matchers.query_string_matcher("test=1&foo=bar")], + ) + resp = requests.get("http://example.com?test=1&foo=bar") + assert_response(resp, "test") + resp = requests.get("http://example.com?foo=bar&test=1") + assert_response(resp, "test") + resp = requests.get("http://example.com/?foo=bar&test=1") + assert_response(resp, "test") + + run() + assert_reset() + + +def test_request_matches_post_params(): + @responses.activate + def run(deprecated): + if deprecated: + json_params_matcher = getattr(responses, "json_params_matcher") + urlencoded_params_matcher = getattr(responses, "urlencoded_params_matcher") + else: + json_params_matcher = matchers.json_params_matcher + urlencoded_params_matcher = matchers.urlencoded_params_matcher + + responses.add( + method=responses.POST, + url="http://example.com/", + body="one", + match=[json_params_matcher({"page": {"name": "first", "type": "json"}})], + ) + responses.add( + method=responses.POST, + url="http://example.com/", + body="two", + match=[urlencoded_params_matcher({"page": "second", "type": "urlencoded"})], + ) + + resp = requests.request( + "POST", + "http://example.com/", + headers={"Content-Type": "x-www-form-urlencoded"}, + data={"page": "second", "type": "urlencoded"}, + ) + assert_response(resp, "two") + + resp = requests.request( + "POST", + "http://example.com/", + headers={"Content-Type": "application/json"}, + json={"page": {"name": "first", "type": "json"}}, + ) + assert_response(resp, "one") + + with pytest.deprecated_call(): + run(deprecated=True) + assert_reset() + + run(deprecated=False) + assert_reset() + + +def test_request_matches_empty_body(): + def run(): + with responses.RequestsMock(assert_all_requests_are_fired=True) as rsps: + # test that both json and urlencoded body are empty in matcher and in request + rsps.add( + method=responses.POST, + url="http://example.com/", + body="one", + match=[matchers.json_params_matcher(None)], + ) + + rsps.add( + method=responses.POST, + url="http://example.com/", + body="two", + match=[matchers.urlencoded_params_matcher(None)], + ) + + resp = requests.request("POST", "http://example.com/") + assert_response(resp, "one") + + resp = requests.request( + "POST", + "http://example.com/", + headers={"Content-Type": "x-www-form-urlencoded"}, + ) + assert_response(resp, "two") + + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + # test exception raise if matcher body is None but request data is not None + rsps.add( + method=responses.POST, + url="http://example.com/", + body="one", + match=[matchers.json_params_matcher(None)], + ) + + with pytest.raises(ConnectionError) as excinfo: + resp = requests.request( + "POST", + "http://example.com/", + json={"my": "data"}, + headers={"Content-Type": "application/json"}, + ) + + msg = str(excinfo.value) + assert "request.body doesn't match: {my: data} doesn't match {}" in msg + + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add( + method=responses.POST, + url="http://example.com/", + body="two", + match=[matchers.urlencoded_params_matcher(None)], + ) + with pytest.raises(ConnectionError) as excinfo: + resp = requests.request( + "POST", + "http://example.com/", + headers={"Content-Type": "x-www-form-urlencoded"}, + data={"page": "second", "type": "urlencoded"}, + ) + msg = str(excinfo.value) + assert ( + "request.body doesn't match: {page: second, type: urlencoded} doesn't match {}" + in msg + ) + + run() + assert_reset() + + +def test_request_matches_params(): + @responses.activate + def run(): + url = "http://example.com/test" + params = {"hello": "world", "I am": "a big test"} + responses.add( + method=responses.GET, + url=url, + body="test", + match=[matchers.query_param_matcher(params)], + match_querystring=False, + ) + + # exchange parameter places for the test + params = { + "I am": "a big test", + "hello": "world", + } + resp = requests.get(url, params=params) + + constructed_url = r"http://example.com/test?I+am=a+big+test&hello=world" + assert resp.url == constructed_url + assert resp.request.url == constructed_url + + resp_params = getattr(resp.request, "params") + assert resp_params == params + + run() + assert_reset() + + +def test_fail_matchers_error(): + """ + Validate that Exception is raised if request does not match responses.matchers + validate matchers.urlencoded_params_matcher + validate matchers.json_params_matcher + validate matchers.query_param_matcher + validate matchers.request_kwargs_matcher + :return: None + """ + + def run(): + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add( + "POST", + "http://example.com", + match=[matchers.urlencoded_params_matcher({"foo": "bar"})], + ) + rsps.add( + "POST", + "http://example.com", + match=[matchers.json_params_matcher({"fail": "json"})], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.post("http://example.com", data={"id": "bad"}) + + msg = str(excinfo.value) + assert ( + "request.body doesn't match: {id: bad} doesn't match {foo: bar}" in msg + ) + + assert ( + "request.body doesn't match: JSONDecodeError: Cannot parse request.body" + in msg + ) + + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add( + "GET", + "http://111.com", + match=[matchers.query_param_matcher({"my": "params"})], + ) + + rsps.add( + method=responses.GET, + url="http://111.com/", + body="two", + match=[matchers.json_params_matcher({"page": "one"})], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.get( + "http://111.com", params={"id": "bad"}, json={"page": "two"} + ) + + msg = str(excinfo.value) + assert ( + "Parameters do not match. {id: bad} doesn't match {my: params}" in msg + ) + assert ( + "request.body doesn't match: {page: two} doesn't match {page: one}" + in msg + ) + + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + req_kwargs = { + "stream": True, + "verify": False, + } + rsps.add( + "GET", + "http://111.com", + match=[matchers.request_kwargs_matcher(req_kwargs)], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.get("http://111.com", stream=True) + + msg = str(excinfo.value) + assert ( + "Arguments don't match: " + "{stream: True, verify: True} doesn't match {stream: True, verify: False}" + ) in msg + + run() + assert_reset() + + +@pytest.mark.parametrize( + "req_file,match_file", + [ + (b"Old World!", "Old World!"), + ("Old World!", b"Old World!"), + (b"Old World!", b"Old World!"), + ("Old World!", "Old World!"), + (b"\xacHello World!", b"\xacHello World!"), + ], +) +def test_multipart_matcher(req_file, match_file): + @responses.activate + def run(): + req_data = {"some": "other", "data": "fields"} + responses.add( + responses.POST, + url="http://httpbin.org/post", + match=[ + matchers.multipart_matcher( + files={"file_name": match_file}, data=req_data + ) + ], + ) + resp = requests.post( + "http://httpbin.org/post", data=req_data, files={"file_name": req_file} + ) + assert resp.status_code == 200 + + with pytest.raises(TypeError): + responses.add( + responses.POST, + url="http://httpbin.org/post", + match=[matchers.multipart_matcher(files={})], + ) + + run() + assert_reset() + + +def test_multipart_matcher_fail(): + """ + Validate that Exception is raised if request does not match responses.matchers + validate matchers.multipart_matcher + :return: None + """ + + def run(): + # different file contents + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + req_data = {"some": "other", "data": "fields"} + req_files = {"file_name": b"Old World!"} + rsps.add( + responses.POST, + url="http://httpbin.org/post", + match=[matchers.multipart_matcher(req_files, data=req_data)], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.post( + "http://httpbin.org/post", + data=req_data, + files={"file_name": b"New World!"}, + ) + + msg = str(excinfo.value) + assert "multipart/form-data doesn't match. Request body differs." in msg + + assert ( + r'\r\nContent-Disposition: form-data; name="file_name"; ' + r'filename="file_name"\r\n\r\nOld World!\r\n' + ) in msg + assert ( + r'\r\nContent-Disposition: form-data; name="file_name"; ' + r'filename="file_name"\r\n\r\nNew World!\r\n' + ) in msg + + # x-www-form-urlencoded request + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + req_data = {"some": "other", "data": "fields"} + req_files = {"file_name": b"Old World!"} + rsps.add( + responses.POST, + url="http://httpbin.org/post", + match=[matchers.multipart_matcher(req_files, data=req_data)], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.post("http://httpbin.org/post", data=req_data) + + msg = str(excinfo.value) + assert ( + "multipart/form-data doesn't match. Request headers['Content-Type'] is different." + in msg + ) + assert ( + "application/x-www-form-urlencoded isn't equal to multipart/form-data; boundary=" + in msg + ) + + # empty body request + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + req_files = {"file_name": b"Old World!"} + rsps.add( + responses.POST, + url="http://httpbin.org/post", + match=[matchers.multipart_matcher(req_files)], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.post("http://httpbin.org/post") + + msg = str(excinfo.value) + assert "Request is missing the 'Content-Type' header" in msg + + run() + assert_reset() + + +def test_query_string_matcher_raises(): + """ + Validate that Exception is raised if request does not match responses.matchers + validate matchers.query_string_matcher + :return: None + """ + + def run(): + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add( + "GET", + "http://111.com", + match=[matchers.query_string_matcher("didi=pro")], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.get("http://111.com", params={"test": "1", "didi": "pro"}) + + msg = str(excinfo.value) + assert ( + "Query string doesn't match. {didi: pro, test: 1} doesn't match {didi: pro}" + in msg + ) + + run() + assert_reset() + + +def test_request_matches_headers(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add( + method=responses.GET, + url=url, + json={"success": True}, + match=[matchers.header_matcher({"Accept": "application/json"})], + ) + + responses.add( + method=responses.GET, + url=url, + body="success", + match=[matchers.header_matcher({"Accept": "text/plain"})], + ) + + # the actual request can contain extra headers (requests always adds some itself anyway) + resp = requests.get( + url, headers={"Accept": "application/json", "Accept-Charset": "utf-8"} + ) + assert_response(resp, body='{"success": true}', content_type="application/json") + + resp = requests.get(url, headers={"Accept": "text/plain"}) + assert_response(resp, body="success", content_type="text/plain") + + run() + assert_reset() + + +def test_request_matches_headers_no_match(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add( + method=responses.GET, + url=url, + json={"success": True}, + match=[matchers.header_matcher({"Accept": "application/json"})], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.get(url, headers={"Accept": "application/xml"}) + + msg = str(excinfo.value) + assert ( + "Headers do not match: {Accept: application/xml} doesn't match " + "{Accept: application/json}" + ) in msg + + run() + assert_reset() + + +def test_request_matches_headers_strict_match(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add( + method=responses.GET, + url=url, + body="success", + match=[ + matchers.header_matcher({"Accept": "text/plain"}, strict_match=True) + ], + ) + + # requests will add some extra headers of its own, so we have to use prepared requests + session = requests.Session() + + # make sure we send *just* the header we're expectin + prepped = session.prepare_request( + requests.Request( + method="GET", + url=url, + ) + ) + prepped.headers.clear() + prepped.headers["Accept"] = "text/plain" + + resp = session.send(prepped) + assert_response(resp, body="success", content_type="text/plain") + + # include the "Accept-Charset" header, which will fail to match + prepped = session.prepare_request( + requests.Request( + method="GET", + url=url, + ) + ) + prepped.headers.clear() + prepped.headers["Accept"] = "text/plain" + prepped.headers["Accept-Charset"] = "utf-8" + + with pytest.raises(ConnectionError) as excinfo: + session.send(prepped) + + msg = str(excinfo.value) + assert ( + "Headers do not match: {Accept: text/plain, Accept-Charset: utf-8} " + "doesn't match {Accept: text/plain}" + ) in msg + + run() + assert_reset() + + +def test_fragment_identifier_matcher(): + @responses.activate + def run(): + responses.add( + responses.GET, + "http://example.com", + match=[matchers.fragment_identifier_matcher("test=1&foo=bar")], + body=b"test", + ) + + resp = requests.get("http://example.com#test=1&foo=bar") + assert_response(resp, "test") + + run() + assert_reset() + + +def test_fragment_identifier_matcher_error(): + @responses.activate + def run(): + responses.add( + responses.GET, + "http://example.com/", + match=[matchers.fragment_identifier_matcher("test=1")], + ) + responses.add( + responses.GET, + "http://example.com/", + match=[matchers.fragment_identifier_matcher(None)], + ) + + with pytest.raises(ConnectionError) as excinfo: + requests.get("http://example.com/#test=2") + + msg = str(excinfo.value) + assert ( + "URL fragment identifier is different: test=1 doesn't match test=2" + ) in msg + assert ( + "URL fragment identifier is different: None doesn't match test=2" + ) in msg + + run() + assert_reset() + + +def test_fragment_identifier_matcher_and_match_querystring(): + @responses.activate + def run(): + url = "http://example.com?ab=xy&zed=qwe#test=1&foo=bar" + responses.add( + responses.GET, + url, + match_querystring=True, + match=[matchers.fragment_identifier_matcher("test=1&foo=bar")], + body=b"test", + ) + + # two requests to check reversed order of fragment identifier + resp = requests.get("http://example.com?ab=xy&zed=qwe#test=1&foo=bar") + assert_response(resp, "test") + resp = requests.get("http://example.com?zed=qwe&ab=xy#foo=bar&test=1") + assert_response(resp, "test") + + run() + assert_reset() + + +def test_matchers_create_key_val_str(): + """ + Test that matchers._create_key_val_str does recursive conversion + """ + data = { + "my_list": [ + 1, + 2, + "a", + {"key1": "val1", "key2": 2, 3: "test"}, + "!", + [["list", "nested"], {"nested": "dict"}], + ], + 1: 4, + "test": "val", + "high": {"nested": "nested_dict"}, + } + conv_str = matchers._create_key_val_str(data) + reference = ( + "{1: 4, high: {nested: nested_dict}, my_list: [!, 1, 2, [[list, nested], {nested: dict}], " + "a, {3: test, key1: val1, key2: 2}], test: val}" + ) + assert conv_str == reference diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_registries.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_registries.py new file mode 100644 index 0000000000000000000000000000000000000000..b4dd8cc5600403da68c89378646dee33b7e6b7f8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_registries.py @@ -0,0 +1,70 @@ +import pytest + +import responses +from responses import registries +from responses.test_responses import assert_reset + + +def test_set_registry_not_empty(): + class CustomRegistry(registries.FirstMatchRegistry): + pass + + @responses.activate + def run(): + url = "http://fizzbuzz/foo" + responses.add(method=responses.GET, url=url) + with pytest.raises(AttributeError) as excinfo: + responses.mock._set_registry(CustomRegistry) + msg = str(excinfo.value) + assert "Cannot replace Registry, current registry has responses" in msg + + run() + assert_reset() + + +def test_set_registry(): + class CustomRegistry(registries.FirstMatchRegistry): + pass + + @responses.activate(registry=CustomRegistry) + def run_with_registry(): + assert type(responses.mock._get_registry()) == CustomRegistry + + @responses.activate + def run(): + # test that registry does not leak to another test + assert type(responses.mock._get_registry()) == registries.FirstMatchRegistry + + run_with_registry() + run() + assert_reset() + + +def test_set_registry_context_manager(): + def run(): + class CustomRegistry(registries.FirstMatchRegistry): + pass + + with responses.RequestsMock( + assert_all_requests_are_fired=False, registry=CustomRegistry + ) as rsps: + assert type(rsps._get_registry()) == CustomRegistry + assert type(responses.mock._get_registry()) == registries.FirstMatchRegistry + + run() + assert_reset() + + +def test_registry_reset(): + def run(): + class CustomRegistry(registries.FirstMatchRegistry): + pass + + with responses.RequestsMock( + assert_all_requests_are_fired=False, registry=CustomRegistry + ) as rsps: + rsps._get_registry().reset() + assert not rsps.registered() + + run() + assert_reset() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_responses.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_responses.py new file mode 100644 index 0000000000000000000000000000000000000000..dd6f62249382a39c06a2cbc086a94a5481d60e4d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/responses/test_responses.py @@ -0,0 +1,1927 @@ +# coding: utf-8 + +from __future__ import absolute_import, print_function, division, unicode_literals + +import inspect +import os +import re +from io import BufferedReader, BytesIO + +import pytest +import requests +import responses +from requests.exceptions import ConnectionError, HTTPError, ChunkedEncodingError +from responses import ( + BaseResponse, + Response, + PassthroughResponse, + matchers, + CallbackResponse, +) + + +try: + from mock import patch, Mock +except ImportError: + from unittest.mock import patch, Mock # type: ignore + + +def assert_reset(): + assert len(responses._default_mock.registered()) == 0 + assert len(responses.calls) == 0 + + +def assert_response(resp, body=None, content_type="text/plain"): + assert resp.status_code == 200 + assert resp.reason == "OK" + if content_type is not None: + assert resp.headers["Content-Type"] == content_type + else: + assert "Content-Type" not in resp.headers + assert resp.text == body + + +def assert_params(resp, expected): + assert hasattr(resp, "request"), "Missing request" + assert hasattr( + resp.request, "params" + ), "Missing params on request that responses should add" + assert getattr(resp.request, "params") == expected, "Incorrect parameters" + + +def test_response(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com", body=b"test") + resp = requests.get("http://example.com") + assert_response(resp, "test") + assert len(responses.calls) == 1 + assert responses.calls[0].request.url == "http://example.com/" + assert responses.calls[0].response.content == b"test" + + resp = requests.get("http://example.com?foo=bar") + assert_response(resp, "test") + assert len(responses.calls) == 2 + assert responses.calls[1].request.url == "http://example.com/?foo=bar" + assert responses.calls[1].response.content == b"test" + + run() + assert_reset() + + +def test_response_encoded(): + @responses.activate + def run(): + # Path contains urlencoded =/()[] + url = "http://example.org/foo.bar%3D%2F%28%29%5B%5D" + responses.add(responses.GET, url, body="it works", status=200) + resp = requests.get(url) + assert_response(resp, "it works") + + run() + assert_reset() + + +def test_response_with_instance(): + @responses.activate + def run(): + responses.add( + responses.Response(method=responses.GET, url="http://example.com") + ) + resp = requests.get("http://example.com") + assert_response(resp, "") + assert len(responses.calls) == 1 + assert responses.calls[0].request.url == "http://example.com/" + + resp = requests.get("http://example.com?foo=bar") + assert_response(resp, "") + assert len(responses.calls) == 2 + assert responses.calls[1].request.url == "http://example.com/?foo=bar" + + run() + assert_reset() + + +@pytest.mark.parametrize( + "original,replacement", + [ + ("http://example.com/two", "http://example.com/two"), + ( + Response(method=responses.GET, url="http://example.com/two"), + Response( + method=responses.GET, url="http://example.com/two", body="testtwo" + ), + ), + ( + re.compile(r"http://example\.com/two"), + re.compile(r"http://example\.com/two"), + ), + ], +) +def test_replace(original, replacement): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com/one", body="test1") + + if isinstance(original, BaseResponse): + responses.add(original) + else: + responses.add(responses.GET, original, body="test2") + + responses.add(responses.GET, "http://example.com/three", body="test3") + responses.add( + responses.GET, re.compile(r"http://example\.com/four"), body="test3" + ) + + if isinstance(replacement, BaseResponse): + responses.replace(replacement) + else: + responses.replace(responses.GET, replacement, body="testtwo") + + resp = requests.get("http://example.com/two") + assert_response(resp, "testtwo") + + run() + assert_reset() + + +@pytest.mark.parametrize( + "original,replacement", + [ + ("http://example.com/one", re.compile(r"http://example\.com/one")), + (re.compile(r"http://example\.com/one"), "http://example.com/one"), + ], +) +def test_replace_error(original, replacement): + @responses.activate + def run(): + responses.add(responses.GET, original) + with pytest.raises(ValueError) as excinfo: + responses.replace(responses.GET, replacement) + assert "Response is not registered for URL %s" % replacement in str( + excinfo.value + ) + + run() + assert_reset() + + +def test_replace_response_object_error(): + @responses.activate + def run(): + responses.add(Response(method=responses.GET, url="http://example.com/one")) + with pytest.raises(ValueError) as excinfo: + responses.replace( + Response(method=responses.GET, url="http://example.com/two") + ) + assert "Response is not registered for URL http://example.com/two" in str( + excinfo.value + ) + + run() + assert_reset() + + +@pytest.mark.parametrize( + "original,replacement", + [ + ("http://example.com/two", "http://example.com/two"), + ( + Response(method=responses.GET, url="http://example.com/two"), + Response( + method=responses.GET, url="http://example.com/two", body="testtwo" + ), + ), + ( + re.compile(r"http://example\.com/two"), + re.compile(r"http://example\.com/two"), + ), + ], +) +def test_upsert_replace(original, replacement): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com/one", body="test1") + + if isinstance(original, BaseResponse): + responses.add(original) + else: + responses.add(responses.GET, original, body="test2") + + if isinstance(replacement, BaseResponse): + responses.upsert(replacement) + else: + responses.upsert(responses.GET, replacement, body="testtwo") + + resp = requests.get("http://example.com/two") + assert_response(resp, "testtwo") + + run() + assert_reset() + + +@pytest.mark.parametrize( + "original,replacement", + [ + ("http://example.com/two", "http://example.com/two"), + ( + Response(method=responses.GET, url="http://example.com/two"), + Response( + method=responses.GET, url="http://example.com/two", body="testtwo" + ), + ), + ( + re.compile(r"http://example\.com/two"), + re.compile(r"http://example\.com/two"), + ), + ], +) +def test_upsert_add(original, replacement): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com/one", body="test1") + + if isinstance(replacement, BaseResponse): + responses.upsert(replacement) + else: + responses.upsert(responses.GET, replacement, body="testtwo") + + resp = requests.get("http://example.com/two") + assert_response(resp, "testtwo") + + run() + assert_reset() + + +def test_remove(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com/zero") + responses.add(responses.GET, "http://example.com/one") + responses.add(responses.GET, "http://example.com/two") + responses.add(responses.GET, re.compile(r"http://example\.com/three")) + responses.add(responses.GET, re.compile(r"http://example\.com/four")) + re.purge() + responses.remove(responses.GET, "http://example.com/two") + responses.remove(Response(method=responses.GET, url="http://example.com/zero")) + responses.remove(responses.GET, re.compile(r"http://example\.com/four")) + + with pytest.raises(ConnectionError): + requests.get("http://example.com/zero") + requests.get("http://example.com/one") + with pytest.raises(ConnectionError): + requests.get("http://example.com/two") + requests.get("http://example.com/three") + with pytest.raises(ConnectionError): + requests.get("http://example.com/four") + + run() + assert_reset() + + +@pytest.mark.parametrize( + "args1,kwargs1,args2,kwargs2,expected", + [ + ((responses.GET, "a"), {}, (responses.GET, "a"), {}, True), + ((responses.GET, "a"), {}, (responses.GET, "b"), {}, False), + ((responses.GET, "a"), {}, (responses.POST, "a"), {}, False), + ( + (responses.GET, "a"), + {"match_querystring": True}, + (responses.GET, "a"), + {}, + True, + ), + ], +) +def test_response_equality(args1, kwargs1, args2, kwargs2, expected): + o1 = BaseResponse(*args1, **kwargs1) + o2 = BaseResponse(*args2, **kwargs2) + assert (o1 == o2) is expected + assert (o1 != o2) is not expected + + +def test_response_equality_different_objects(): + o1 = BaseResponse(method=responses.GET, url="a") + o2 = "str" + assert (o1 == o2) is False + assert (o1 != o2) is True + + +def test_connection_error(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com") + + with pytest.raises(ConnectionError): + requests.get("http://example.com/foo") + + assert len(responses.calls) == 1 + assert responses.calls[0].request.url == "http://example.com/foo" + assert type(responses.calls[0].response) is ConnectionError + assert responses.calls[0].response.request + + run() + assert_reset() + + +def test_match_querystring(): + @responses.activate + def run(): + url = "http://example.com?test=1&foo=bar" + responses.add(responses.GET, url, match_querystring=True, body=b"test") + resp = requests.get("http://example.com?test=1&foo=bar") + assert_response(resp, "test") + resp = requests.get("http://example.com?foo=bar&test=1") + assert_response(resp, "test") + resp = requests.get("http://example.com/?foo=bar&test=1") + assert_response(resp, "test") + + run() + assert_reset() + + +def test_match_querystring_empty(): + @responses.activate + def run(): + responses.add( + responses.GET, "http://example.com", body=b"test", match_querystring=True + ) + resp = requests.get("http://example.com") + assert_response(resp, "test") + resp = requests.get("http://example.com/") + assert_response(resp, "test") + with pytest.raises(ConnectionError): + requests.get("http://example.com?query=foo") + + run() + assert_reset() + + +def test_match_querystring_error(): + @responses.activate + def run(): + responses.add( + responses.GET, "http://example.com/?test=1", match_querystring=True + ) + + with pytest.raises(ConnectionError): + requests.get("http://example.com/foo/?test=2") + + run() + assert_reset() + + +def test_match_querystring_regex(): + @responses.activate + def run(): + """Note that `match_querystring` value shouldn't matter when passing a + regular expression""" + + responses.add( + responses.GET, + re.compile(r"http://example\.com/foo/\?test=1"), + body="test1", + match_querystring=True, + ) + + resp = requests.get("http://example.com/foo/?test=1") + assert_response(resp, "test1") + + responses.add( + responses.GET, + re.compile(r"http://example\.com/foo/\?test=2"), + body="test2", + match_querystring=False, + ) + + resp = requests.get("http://example.com/foo/?test=2") + assert_response(resp, "test2") + + run() + assert_reset() + + +def test_match_querystring_error_regex(): + @responses.activate + def run(): + """Note that `match_querystring` value shouldn't matter when passing a + regular expression""" + + responses.add( + responses.GET, + re.compile(r"http://example\.com/foo/\?test=1"), + match_querystring=True, + ) + + with pytest.raises(ConnectionError): + requests.get("http://example.com/foo/?test=3") + + responses.add( + responses.GET, + re.compile(r"http://example\.com/foo/\?test=2"), + match_querystring=False, + ) + + with pytest.raises(ConnectionError): + requests.get("http://example.com/foo/?test=4") + + run() + assert_reset() + + +def test_match_querystring_auto_activates(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com?test=1", body=b"test") + resp = requests.get("http://example.com?test=1") + assert_response(resp, "test") + with pytest.raises(ConnectionError): + requests.get("http://example.com/?test=2") + + run() + assert_reset() + + +def test_match_querystring_missing_key(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com?foo=1&bar=2", body=b"test") + with pytest.raises(ConnectionError): + requests.get("http://example.com/?foo=1&baz=2") + + with pytest.raises(ConnectionError): + requests.get("http://example.com/?bar=2&fez=1") + + run() + assert_reset() + + +def test_accept_string_body(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add(responses.GET, url, body="test") + resp = requests.get(url) + assert_response(resp, "test") + + run() + assert_reset() + + +def test_accept_json_body(): + @responses.activate + def run(): + content_type = "application/json" + + url = "http://example.com/" + responses.add(responses.GET, url, json={"message": "success"}) + resp = requests.get(url) + assert_response(resp, '{"message": "success"}', content_type) + + url = "http://example.com/1/" + responses.add(responses.GET, url, json=[]) + resp = requests.get(url) + assert_response(resp, "[]", content_type) + + run() + assert_reset() + + +def test_no_content_type(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add(responses.GET, url, body="test", content_type=None) + resp = requests.get(url) + assert_response(resp, "test", content_type=None) + + run() + assert_reset() + + +def test_arbitrary_status_code(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add(responses.GET, url, body="test", status=419) + resp = requests.get(url) + assert resp.status_code == 419 + assert resp.reason is None + + run() + assert_reset() + + +def test_throw_connection_error_explicit(): + @responses.activate + def run(): + url = "http://example.com" + exception = HTTPError("HTTP Error") + responses.add(responses.GET, url, exception) + + with pytest.raises(HTTPError) as HE: + requests.get(url) + + assert str(HE.value) == "HTTP Error" + + run() + assert_reset() + + +def test_callback(): + body = b"test callback" + status = 400 + reason = "Bad Request" + headers = { + "foo": "bar", + "Content-Type": "application/json", + "Content-Length": "13", + } + url = "http://example.com/" + + def request_callback(_request): + return status, headers, body + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback) + resp = requests.get(url) + assert resp.text == "test callback" + assert resp.status_code == status + assert resp.reason == reason + assert "bar" == resp.headers.get("foo") + assert "application/json" == resp.headers.get("Content-Type") + assert "13" == resp.headers.get("Content-Length") + + run() + assert_reset() + + +def test_callback_deprecated_stream_argument(): + with pytest.deprecated_call(): + CallbackResponse(responses.GET, "url", lambda x: x, stream=False) + + +def test_callback_deprecated_match_querystring_argument(): + with pytest.deprecated_call(): + CallbackResponse(responses.GET, "url", lambda x: x, match_querystring=False) + + +def test_callback_match_querystring_default_false(): + """ + Test to ensure that by default 'match_querystring' in 'add_callback' is set to False + and does not raise deprecation + see: https://github.com/getsentry/responses/issues/464 and related PR + """ + body = b"test callback" + status = 200 + params = {"hello": "world", "I am": "a big test"} + headers = {"foo": "bar"} + url = "http://example.com/" + + def request_callback(_request): + return status, headers, body + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback, content_type=None) + resp = requests.get(url, params=params) + assert resp.text == "test callback" + assert resp.status_code == status + assert "foo" in resp.headers + + with pytest.warns(None) as record: + run() + + # check that no deprecation warning was raised + assert not record + + assert_reset() + + +def test_callback_exception_result(): + result = Exception() + url = "http://example.com/" + + def request_callback(request): + return result + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback) + + with pytest.raises(Exception) as e: + requests.get(url) + + assert e.value is result + + run() + assert_reset() + + +def test_callback_exception_body(): + body = Exception() + url = "http://example.com/" + + def request_callback(request): + return 200, {}, body + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback) + + with pytest.raises(Exception) as e: + requests.get(url) + + assert e.value is body + + run() + assert_reset() + + +def test_callback_no_content_type(): + body = b"test callback" + status = 400 + reason = "Bad Request" + headers = {"foo": "bar"} + url = "http://example.com/" + + def request_callback(_request): + return status, headers, body + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback, content_type=None) + resp = requests.get(url) + assert resp.text == "test callback" + assert resp.status_code == status + assert resp.reason == reason + assert "foo" in resp.headers + assert "Content-Type" not in resp.headers + + run() + assert_reset() + + +def test_callback_content_type_dict(): + def request_callback(request): + return ( + 200, + {"Content-Type": "application/json"}, + b"foo", + ) + + @responses.activate + def run(): + responses.add_callback("GET", "http://mockhost/.foo", callback=request_callback) + resp = requests.get("http://mockhost/.foo") + assert resp.text == "foo" + assert resp.headers["content-type"] == "application/json" + + run() + assert_reset() + + +def test_callback_matchers(): + def request_callback(request): + return ( + 200, + {"Content-Type": "application/json"}, + b"foo", + ) + + @responses.activate + def run(): + req_data = {"some": "other", "data": "fields"} + req_files = {"file_name": b"Old World!"} + + responses.add_callback( + responses.POST, + url="http://httpbin.org/post", + match=[matchers.multipart_matcher(req_files, data=req_data)], + callback=request_callback, + ) + resp = requests.post("http://httpbin.org/post", data=req_data, files=req_files) + assert resp.text == "foo" + assert resp.headers["content-type"] == "application/json" + + run() + assert_reset() + + +def test_callback_matchers_fail(): + @responses.activate + def run(): + req_data = {"some": "other", "data": "fields"} + req_files = {"file_name": b"Old World!"} + + responses.add_callback( + responses.POST, + url="http://httpbin.org/post", + match=[matchers.multipart_matcher(req_files, data=req_data)], + callback=lambda x: ( + 0, + {"a": ""}, + "", + ), + ) + with pytest.raises(ConnectionError) as exc: + requests.post( + "http://httpbin.org/post", + data={"some": "other", "data": "wrong"}, + files=req_files, + ) + + assert "multipart/form-data doesn't match." in str(exc.value) + + run() + assert_reset() + + +def test_callback_content_type_tuple(): + def request_callback(request): + return ( + 200, + [("Content-Type", "application/json")], + b"foo", + ) + + @responses.activate + def run(): + responses.add_callback("GET", "http://mockhost/.foo", callback=request_callback) + resp = requests.get("http://mockhost/.foo") + assert resp.text == "foo" + assert resp.headers["content-type"] == "application/json" + + run() + assert_reset() + + +def test_regular_expression_url(): + @responses.activate + def run(): + url = re.compile(r"https?://(.*\.)?example.com") + responses.add(responses.GET, url, body=b"test") + + resp = requests.get("http://example.com") + assert_response(resp, "test") + + resp = requests.get("https://example.com") + assert_response(resp, "test") + + resp = requests.get("https://uk.example.com") + assert_response(resp, "test") + + with pytest.raises(ConnectionError): + requests.get("https://uk.exaaample.com") + + run() + assert_reset() + + +def test_base_response_get_response(): + resp = BaseResponse("GET", ".com") + with pytest.raises(NotImplementedError): + resp.get_response(requests.PreparedRequest()) + + +def test_custom_adapter(): + @responses.activate + def run(): + url = "http://example.com" + responses.add(responses.GET, url, body=b"test") + + calls = [0] + + class DummyAdapter(requests.adapters.HTTPAdapter): + def send(self, *a, **k): + calls[0] += 1 + return super(DummyAdapter, self).send(*a, **k) + + # Test that the adapter is actually used + session = requests.Session() + session.mount("http://", DummyAdapter()) + + resp = session.get(url, allow_redirects=False) + assert calls[0] == 1 + + # Test that the response is still correctly emulated + session = requests.Session() + session.mount("http://", DummyAdapter()) + + resp = session.get(url) + assert_response(resp, "test") + + run() + + +def test_responses_as_context_manager(): + def run(): + with responses.mock: + responses.add(responses.GET, "http://example.com", body=b"test") + resp = requests.get("http://example.com") + assert_response(resp, "test") + assert len(responses.calls) == 1 + assert responses.calls[0].request.url == "http://example.com/" + assert responses.calls[0].response.content == b"test" + + resp = requests.get("http://example.com?foo=bar") + assert_response(resp, "test") + assert len(responses.calls) == 2 + assert responses.calls[1].request.url == "http://example.com/?foo=bar" + assert responses.calls[1].response.content == b"test" + + run() + assert_reset() + + +def test_activate_doesnt_change_signature(): + def test_function(a, b=None): + return (a, b) + + decorated_test_function = responses.activate(test_function) + assert inspect.signature(test_function) == inspect.signature( + decorated_test_function + ) + + assert decorated_test_function(1, 2) == test_function(1, 2) + assert decorated_test_function(3) == test_function(3) + + +@pytest.fixture +def my_fruit(): + return "apple" + + +@pytest.fixture +def fruit_basket(my_fruit): + return ["banana", my_fruit] + + +@pytest.mark.usefixtures("my_fruit", "fruit_basket") +class TestFixtures(object): + """ + Test that pytest fixtures work well with 'activate' decorator + """ + + def test_function(self, my_fruit, fruit_basket): + assert my_fruit in fruit_basket + assert my_fruit == "apple" + + test_function_decorated = responses.activate(test_function) + + +def test_activate_mock_interaction(): + @patch("sys.stdout") + def test_function(mock_stdout): + return mock_stdout + + decorated_test_function = responses.activate(test_function) + assert inspect.signature(test_function) == inspect.signature( + decorated_test_function + ) + + value = test_function() + assert isinstance(value, Mock) + + value = decorated_test_function() + assert isinstance(value, Mock) + + +def test_activate_doesnt_change_signature_with_return_type(): + def test_function(a, b=None): + return a, b + + # Add type annotations as they are syntax errors in py2. + # Use a class to test for import errors in evaled code. + test_function.__annotations__["return"] = Mock + test_function.__annotations__["a"] = Mock + + decorated_test_function = responses.activate(test_function) + assert inspect.signature(test_function) == inspect.signature( + decorated_test_function + ) + + assert decorated_test_function(1, 2) == test_function(1, 2) + assert decorated_test_function(3) == test_function(3) + + +def test_activate_doesnt_change_signature_for_method(): + class TestCase(object): + def test_function(self, a, b=None): + return (self, a, b) + + decorated_test_function = responses.activate(test_function) + + test_case = TestCase() + assert test_case.decorated_test_function(1, 2) == test_case.test_function(1, 2) + assert test_case.decorated_test_function(3) == test_case.test_function(3) + + +def test_response_cookies(): + body = b"test callback" + status = 200 + headers = {"set-cookie": "session_id=12345; a=b; c=d"} + url = "http://example.com/" + + def request_callback(request): + return (status, headers, body) + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback) + resp = requests.get(url) + assert resp.text == "test callback" + assert resp.status_code == status + assert "session_id" in resp.cookies + assert resp.cookies["session_id"] == "12345" + assert set(resp.cookies.keys()) == set(["session_id"]) + + run() + assert_reset() + + +def test_response_cookies_secure(): + body = b"test callback" + status = 200 + headers = {"set-cookie": "session_id=12345; a=b; c=d; secure"} + url = "http://example.com/" + + def request_callback(request): + return (status, headers, body) + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback) + resp = requests.get(url) + assert resp.text == "test callback" + assert resp.status_code == status + assert "session_id" in resp.cookies + assert resp.cookies["session_id"] == "12345" + assert set(resp.cookies.keys()) == set(["session_id"]) + + run() + assert_reset() + + +def test_response_cookies_multiple(): + body = b"test callback" + status = 200 + headers = [ + ("set-cookie", "1P_JAR=2019-12-31-23; path=/; domain=.example.com; HttpOnly"), + ("set-cookie", "NID=some=value; path=/; domain=.example.com; secure"), + ] + url = "http://example.com/" + + def request_callback(request): + return (status, headers, body) + + @responses.activate + def run(): + responses.add_callback(responses.GET, url, request_callback) + resp = requests.get(url) + assert resp.text == "test callback" + assert resp.status_code == status + assert set(resp.cookies.keys()) == set(["1P_JAR", "NID"]) + assert resp.cookies["1P_JAR"] == "2019-12-31-23" + assert resp.cookies["NID"] == "some=value" + + run() + assert_reset() + + +@pytest.mark.parametrize("request_stream", (True, False, None)) +@pytest.mark.parametrize("responses_stream", (True, False, None)) +def test_response_cookies_session(request_stream, responses_stream): + @responses.activate + def run(): + url = "https://example.com/path" + responses.add( + responses.GET, + url, + headers=[ + ("Set-cookie", "mycookie=cookieval; path=/; secure"), + ], + body="ok", + stream=responses_stream, + ) + session = requests.session() + resp = session.get(url, stream=request_stream) + assert resp.text == "ok" + assert resp.status_code == 200 + + assert "mycookie" in resp.cookies + assert resp.cookies["mycookie"] == "cookieval" + assert set(resp.cookies.keys()) == set(["mycookie"]) + + assert "mycookie" in session.cookies + assert session.cookies["mycookie"] == "cookieval" + assert set(session.cookies.keys()) == set(["mycookie"]) + + run() + assert_reset() + + +def test_response_callback(): + """adds a callback to decorate the response, then checks it""" + + def run(): + def response_callback(resp): + resp._is_mocked = True + return resp + + with responses.RequestsMock(response_callback=response_callback) as m: + m.add(responses.GET, "http://example.com", body=b"test") + resp = requests.get("http://example.com") + assert resp.text == "test" + assert hasattr(resp, "_is_mocked") + assert getattr(resp, "_is_mocked") is True + + run() + assert_reset() + + +def test_response_filebody(): + """ Adds the possibility to use actual (binary) files as responses """ + + def run(): + current_file = os.path.abspath(__file__) + with responses.RequestsMock() as m: + with open(current_file, "r") as out: + m.add(responses.GET, "http://example.com", body=out.read(), stream=True) + resp = requests.get("http://example.com", stream=True) + with open(current_file, "r") as out: + assert resp.text == out.read() + + run() + assert_reset() + + +def test_use_stream_twice_to_double_raw_io(): + @responses.activate + def run(): + url = "http://example.com" + responses.add(responses.GET, url, body=b"42", stream=True) + resp = requests.get(url, stream=True) + assert resp.raw.read() == b"42" + + run() + assert_reset() + + +def test_assert_all_requests_are_fired(): + def request_callback(request): + raise BaseException() + + def run(): + with pytest.raises(AssertionError) as excinfo: + with responses.RequestsMock(assert_all_requests_are_fired=True) as m: + m.add(responses.GET, "http://example.com", body=b"test") + assert "http://example.com" in str(excinfo.value) + assert responses.GET in str(excinfo.value) + + # check that assert_all_requests_are_fired default to True + with pytest.raises(AssertionError): + with responses.RequestsMock() as m: + m.add(responses.GET, "http://example.com", body=b"test") + + # check that assert_all_requests_are_fired doesn't swallow exceptions + with pytest.raises(ValueError): + with responses.RequestsMock() as m: + m.add(responses.GET, "http://example.com", body=b"test") + raise ValueError() + + # check that assert_all_requests_are_fired=True doesn't remove urls + with responses.RequestsMock(assert_all_requests_are_fired=True) as m: + m.add(responses.GET, "http://example.com", body=b"test") + assert len(m.registered()) == 1 + requests.get("http://example.com") + assert len(m.registered()) == 1 + + # check that assert_all_requests_are_fired=True counts mocked errors + with responses.RequestsMock(assert_all_requests_are_fired=True) as m: + m.add(responses.GET, "http://example.com", body=Exception()) + assert len(m.registered()) == 1 + with pytest.raises(Exception): + requests.get("http://example.com") + assert len(m.registered()) == 1 + + with responses.RequestsMock(assert_all_requests_are_fired=True) as m: + m.add_callback(responses.GET, "http://example.com", request_callback) + assert len(m.registered()) == 1 + with pytest.raises(BaseException): + requests.get("http://example.com") + assert len(m.registered()) == 1 + + run() + assert_reset() + + +def test_allow_redirects_samehost(): + redirecting_url = "http://example.com" + final_url_path = "/1" + final_url = "{0}{1}".format(redirecting_url, final_url_path) + url_re = re.compile(r"^http://example.com(/)?(\d+)?$") + + def request_callback(request): + # endpoint of chained redirect + if request.url.endswith(final_url_path): + return 200, (), b"test" + + # otherwise redirect to an integer path + else: + if request.url.endswith("/0"): + n = 1 + else: + n = 0 + redirect_headers = {"location": "/{0!s}".format(n)} + return 301, redirect_headers, None + + def run(): + # setup redirect + with responses.mock: + responses.add_callback(responses.GET, url_re, request_callback) + resp_no_redirects = requests.get(redirecting_url, allow_redirects=False) + assert resp_no_redirects.status_code == 301 + assert len(responses.calls) == 1 # 1x300 + assert responses.calls[0][1].status_code == 301 + assert_reset() + + with responses.mock: + responses.add_callback(responses.GET, url_re, request_callback) + resp_yes_redirects = requests.get(redirecting_url, allow_redirects=True) + assert len(responses.calls) == 3 # 2x300 + 1x200 + assert len(resp_yes_redirects.history) == 2 + assert resp_yes_redirects.status_code == 200 + assert final_url == resp_yes_redirects.url + status_codes = [call[1].status_code for call in responses.calls] + assert status_codes == [301, 301, 200] + assert_reset() + + run() + assert_reset() + + +def test_handles_unicode_querystring(): + url = "http://example.com/test?type=2&ie=utf8&query=汉字" + + @responses.activate + def run(): + responses.add(responses.GET, url, body="test", match_querystring=True) + + resp = requests.get(url) + + assert_response(resp, "test") + + run() + assert_reset() + + +def test_handles_unicode_url(): + url = "http://www.संजाल.भारत/hi/वेबसाइट-डिजाइन" + + @responses.activate + def run(): + responses.add(responses.GET, url, body="test") + + resp = requests.get(url) + + assert_response(resp, "test") + + run() + assert_reset() + + +def test_handles_unicode_body(): + url = "http://example.com/test" + + @responses.activate + def run(): + responses.add(responses.GET, url, body="михољско лето") + + resp = requests.get(url) + + assert_response(resp, "михољско лето", content_type="text/plain; charset=utf-8") + + run() + assert_reset() + + +def test_handles_buffered_reader_body(): + url = "http://example.com/test" + + @responses.activate + def run(): + responses.add(responses.GET, url, body=BufferedReader(BytesIO(b"test"))) # type: ignore + + resp = requests.get(url) + + assert_response(resp, "test") + + run() + assert_reset() + + +def test_headers(): + @responses.activate + def run(): + responses.add( + responses.GET, "http://example.com", body="", headers={"X-Test": "foo"} + ) + resp = requests.get("http://example.com") + assert resp.headers["X-Test"] == "foo" + + run() + assert_reset() + + +def test_content_length_error(monkeypatch): + """ + Currently 'requests' does not enforce content length validation, + (validation that body length matches header). However, this could + be expected in next major version, see + https://github.com/psf/requests/pull/3563 + + Now user can manually patch URL3 lib to achieve the same + + See discussion in + https://github.com/getsentry/responses/issues/394 + """ + + @responses.activate + def run(): + responses.add( + responses.GET, + "http://example.com/api/123", + json={"message": "this body is too large"}, + adding_headers={"content-length": "2"}, + ) + with pytest.raises(ChunkedEncodingError) as exc: + requests.get("http://example.com/api/123") + + assert "IncompleteRead" in str(exc.value) + + # Type errors here and on 1250 are ignored because the stubs for requests + # are off https://github.com/python/typeshed/blob/f8501d33c737482a829c6db557a0be26895c5941 + # /stubs/requests/requests/packages/__init__.pyi#L1 + original_init = getattr(requests.packages.urllib3.HTTPResponse, "__init__") # type: ignore + + def patched_init(self, *args, **kwargs): + kwargs["enforce_content_length"] = True + original_init(self, *args, **kwargs) + + monkeypatch.setattr( + requests.packages.urllib3.HTTPResponse, "__init__", patched_init # type: ignore + ) + + run() + assert_reset() + + +def test_stream_with_none_chunk_size(): + """ + See discussion in + https://github.com/getsentry/responses/issues/438 + """ + + @responses.activate + def run(): + responses.add( + responses.GET, + "https://example.com", + status=200, + content_type="application/octet-stream", + body=b"This is test", + auto_calculate_content_length=True, + ) + res = requests.get("https://example.com", stream=True) + for chunk in res.iter_content(chunk_size=None): + assert chunk == b"This is test" + + run() + assert_reset() + + +def test_legacy_adding_headers(): + @responses.activate + def run(): + responses.add( + responses.GET, + "http://example.com", + body="", + adding_headers={"X-Test": "foo"}, + ) + resp = requests.get("http://example.com") + assert resp.headers["X-Test"] == "foo" + + run() + assert_reset() + + +def test_auto_calculate_content_length_string_body(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add( + responses.GET, url, body="test", auto_calculate_content_length=True + ) + resp = requests.get(url) + assert_response(resp, "test") + assert resp.headers["Content-Length"] == "4" + + run() + assert_reset() + + +def test_auto_calculate_content_length_bytes_body(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add( + responses.GET, url, body=b"test bytes", auto_calculate_content_length=True + ) + resp = requests.get(url) + assert_response(resp, "test bytes") + assert resp.headers["Content-Length"] == "10" + + run() + assert_reset() + + +def test_auto_calculate_content_length_json_body(): + @responses.activate + def run(): + content_type = "application/json" + + url = "http://example.com/" + responses.add( + responses.GET, + url, + json={"message": "success"}, + auto_calculate_content_length=True, + ) + resp = requests.get(url) + assert_response(resp, '{"message": "success"}', content_type) + assert resp.headers["Content-Length"] == "22" + + url = "http://example.com/1/" + responses.add(responses.GET, url, json=[], auto_calculate_content_length=True) + resp = requests.get(url) + assert_response(resp, "[]", content_type) + assert resp.headers["Content-Length"] == "2" + + run() + assert_reset() + + +def test_auto_calculate_content_length_unicode_body(): + @responses.activate + def run(): + url = "http://example.com/test" + responses.add( + responses.GET, url, body="михољско лето", auto_calculate_content_length=True + ) + resp = requests.get(url) + assert_response(resp, "михољско лето", content_type="text/plain; charset=utf-8") + assert resp.headers["Content-Length"] == "25" + + run() + assert_reset() + + +def test_auto_calculate_content_length_doesnt_work_for_buffered_reader_body(): + @responses.activate + def run(): + url = "http://example.com/test" + responses.add( + responses.GET, + url, + body=BufferedReader(BytesIO(b"testing")), # type: ignore + auto_calculate_content_length=True, + ) + resp = requests.get(url) + assert_response(resp, "testing") + assert "Content-Length" not in resp.headers + + run() + assert_reset() + + +def test_auto_calculate_content_length_doesnt_override_existing_value(): + @responses.activate + def run(): + url = "http://example.com/" + responses.add( + responses.GET, + url, + body="test", + headers={"Content-Length": "2"}, + auto_calculate_content_length=True, + ) + resp = requests.get(url) + assert_response(resp, "test") + assert resp.headers["Content-Length"] == "2" + + run() + assert_reset() + + +def test_multiple_responses(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com", body="test") + responses.add(responses.GET, "http://example.com", body="rest") + responses.add(responses.GET, "http://example.com", body="fest") + responses.add(responses.GET, "http://example.com", body="best") + + resp = requests.get("http://example.com") + assert_response(resp, "test") + + resp = requests.get("http://example.com") + assert_response(resp, "rest") + + resp = requests.get("http://example.com") + assert_response(resp, "fest") + + resp = requests.get("http://example.com") + assert_response(resp, "best") + + # After all responses are used, last response should be repeated + resp = requests.get("http://example.com") + assert_response(resp, "best") + + run() + assert_reset() + + +def test_multiple_responses_intermixed(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com", body="test") + resp = requests.get("http://example.com") + assert_response(resp, "test") + + responses.add(responses.GET, "http://example.com", body="rest") + resp = requests.get("http://example.com") + assert_response(resp, "rest") + + responses.add(responses.GET, "http://example.com", body="best") + resp = requests.get("http://example.com") + assert_response(resp, "best") + + # After all responses are used, last response should be repeated + resp = requests.get("http://example.com") + assert_response(resp, "best") + + run() + assert_reset() + + +def test_multiple_urls(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com/one", body="one") + responses.add(responses.GET, "http://example.com/two", body="two") + + resp = requests.get("http://example.com/two") + assert_response(resp, "two") + resp = requests.get("http://example.com/one") + assert_response(resp, "one") + + run() + assert_reset() + + +def test_multiple_methods(): + @responses.activate + def run(): + responses.add(responses.GET, "http://example.com/one", body="gotcha") + responses.add(responses.POST, "http://example.com/one", body="posted") + + resp = requests.get("http://example.com/one") + assert_response(resp, "gotcha") + resp = requests.post("http://example.com/one") + assert_response(resp, "posted") + + run() + assert_reset() + + +def test_passthrough_flag(httpserver): + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + response = Response(responses.GET, httpserver.url, body="MOCK") + + @responses.activate + def run_passthrough(): + responses.add(response) + resp = requests.get(httpserver.url) + assert_response(resp, "OK") + + @responses.activate + def run_mocked(): + responses.add(response) + resp = requests.get(httpserver.url) + assert_response(resp, "MOCK") + + run_mocked() + assert_reset() + + response.passthrough = True + run_passthrough() + assert_reset() + + +def test_passthrough_response(httpserver): + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + + @responses.activate + def run(): + responses.add(PassthroughResponse(responses.GET, httpserver.url)) + responses.add(responses.GET, "{}/one".format(httpserver.url), body="one") + responses.add(responses.GET, "http://example.com/two", body="two") + + resp = requests.get("http://example.com/two") + assert_response(resp, "two") + resp = requests.get("{}/one".format(httpserver.url)) + assert_response(resp, "one") + resp = requests.get(httpserver.url) + assert_response(resp, "OK") + + assert len(responses.calls) == 3 + responses.assert_call_count(httpserver.url, 1) + + run() + assert_reset() + + +def test_passthrough_response_stream(httpserver): + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + + @responses.activate + def run(): + responses.add(PassthroughResponse(responses.GET, httpserver.url)) + content_1 = requests.get(httpserver.url).content + with requests.get(httpserver.url, stream=True) as resp: + content_2 = resp.raw.read() + assert content_1 == content_2 + + run() + assert_reset() + + +def test_passthru_prefixes(httpserver): + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + + @responses.activate + def run_constructor_argument(): + with responses.RequestsMock(passthru_prefixes=(httpserver.url,)): + resp = requests.get(httpserver.url) + assert_response(resp, "OK") + + @responses.activate + def run_property_setter(): + with responses.RequestsMock() as m: + m.passthru_prefixes = tuple([httpserver.url]) + resp = requests.get(httpserver.url) + assert_response(resp, "OK") + + run_constructor_argument() + assert_reset() + run_property_setter() + assert_reset() + + +def test_passthru(httpserver): + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + + @responses.activate + def run(): + responses.add_passthru(httpserver.url) + responses.add(responses.GET, "{}/one".format(httpserver.url), body="one") + responses.add(responses.GET, "http://example.com/two", body="two") + + resp = requests.get("http://example.com/two") + assert_response(resp, "two") + resp = requests.get("{}/one".format(httpserver.url)) + assert_response(resp, "one") + resp = requests.get(httpserver.url) + assert_response(resp, "OK") + + run() + assert_reset() + + +def test_passthru_regex(httpserver): + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + + @responses.activate + def run(): + responses.add_passthru(re.compile("{}/\\w+".format(httpserver.url))) + responses.add(responses.GET, "{}/one".format(httpserver.url), body="one") + responses.add(responses.GET, "http://example.com/two", body="two") + + resp = requests.get("http://example.com/two") + assert_response(resp, "two") + resp = requests.get("{}/one".format(httpserver.url)) + assert_response(resp, "one") + resp = requests.get("{}/two".format(httpserver.url)) + assert_response(resp, "OK") + resp = requests.get("{}/three".format(httpserver.url)) + assert_response(resp, "OK") + + run() + assert_reset() + + +def test_passthru_does_not_persist_across_tests(httpserver): + """ + passthru should be erased on exit from context manager + see: + https://github.com/getsentry/responses/issues/322 + """ + httpserver.serve_content("OK", headers={"Content-Type": "text/plain"}) + + @responses.activate + def with_a_passthru(): + assert not responses._default_mock.passthru_prefixes + responses.add_passthru(re.compile(".*")) + try: + response = requests.get("https://example.com") + except ConnectionError as err: # pragma: no cover + if "Failed to establish" in str(err): # pragma: no cover + pytest.skip("Cannot resolve DNS for example.com") # pragma: no cover + raise err # pragma: no cover + + assert response.status_code == 200 + + @responses.activate + def without_a_passthru(): + assert not responses._default_mock.passthru_prefixes + with pytest.raises(requests.exceptions.ConnectionError): + requests.get("https://example.com") + + with_a_passthru() + without_a_passthru() + + +def test_method_named_param(): + @responses.activate + def run(): + responses.add(method=responses.GET, url="http://example.com", body="OK") + resp = requests.get("http://example.com") + assert_response(resp, "OK") + + run() + assert_reset() + + +def test_passthru_unicode(): + @responses.activate + def run(): + with responses.RequestsMock() as m: + url = "http://موقع.وزارة-الاتصالات.مصر/" + clean_url = "http://xn--4gbrim.xn----ymcbaaajlc6dj7bxne2c.xn--wgbh1c/" + m.add_passthru(url) + assert m.passthru_prefixes[0] == clean_url + + run() + assert_reset() + + +def test_custom_target(monkeypatch): + requests_mock = responses.RequestsMock(target="something.else") + std_mock_mock = responses.std_mock.MagicMock() + patch_mock = std_mock_mock.patch + monkeypatch.setattr(responses, "std_mock", std_mock_mock) + requests_mock.start() + assert len(patch_mock.call_args_list) == 1 + assert patch_mock.call_args[1]["target"] == "something.else" + + +def test_cookies_from_headers(): + text = "こんにちは/世界" + quoted_text = responses.quote(text) + expected = {"x": "a", "y": quoted_text} + headers = {"set-cookie": "; ".join(k + "=" + v for k, v in expected.items())} + cookiejar = responses._cookies_from_headers(headers) + for k, v in cookiejar.items(): + assert isinstance(v, str) + assert v == expected[k] + + +@pytest.mark.parametrize( + "url", + ( + "http://example.com", + "http://example.com/some/path", + "http://example.com/other/path/", + ), +) +def test_request_param(url): + @responses.activate + def run(): + params = {"hello": "world", "example": "params"} + responses.add( + method=responses.GET, + url="{0}?hello=world".format(url), + body="test", + match_querystring=False, + ) + resp = requests.get(url, params=params) + assert_response(resp, "test") + assert_params(resp, params) + + resp = requests.get(url) + assert_response(resp, "test") + assert_params(resp, {}) + + run() + assert_reset() + + +def test_request_param_with_multiple_values_for_the_same_key(): + @responses.activate + def run(): + url = "http://example.com" + params = {"key1": ["one", "two"], "key2": "three"} + responses.add( + method=responses.GET, + url=url, + body="test", + ) + resp = requests.get(url, params=params) + assert_response(resp, "test") + assert_params(resp, params) + + run() + assert_reset() + + +@pytest.mark.parametrize( + "url", ("http://example.com", "http://example.com?hello=world") +) +def test_assert_call_count(url): + @responses.activate + def run(): + responses.add(responses.GET, url) + responses.add(responses.GET, "http://example1.com") + + assert responses.assert_call_count(url, 0) is True + + with pytest.raises(AssertionError) as excinfo: + responses.assert_call_count(url, 2) + assert "Expected URL '{0}' to be called 2 times. Called 0 times.".format( + url + ) in str(excinfo.value) + + requests.get(url) + assert responses.assert_call_count(url, 1) is True + + requests.get("http://example1.com") + assert responses.assert_call_count(url, 1) is True + + requests.get(url) + with pytest.raises(AssertionError) as excinfo: + responses.assert_call_count(url, 3) + assert "Expected URL '{0}' to be called 3 times. Called 2 times.".format( + url + ) in str(excinfo.value) + + run() + assert_reset() + + +def test_fail_request_error(): + """ + Validate that exception is raised if request URL/Method/kwargs don't match + :return: + """ + + def run(): + with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: + rsps.add("POST", "http://example1.com") + rsps.add("GET", "http://example.com") + + with pytest.raises(ConnectionError) as excinfo: + requests.post("http://example.com", data={"id": "bad"}) + + msg = str(excinfo.value) + assert "- POST http://example1.com/ URL does not match" in msg + assert "- GET http://example.com/ Method does not match" in msg + + run() + assert_reset() + + +@pytest.mark.parametrize( + "response_params, expected_representation", + [ + ( + {"method": responses.GET, "url": "http://example.com/"}, + ( + "" + ), + ), + ( + { + "method": responses.POST, + "url": "http://another-domain.com/", + "content_type": "application/json", + "status": 404, + }, + ( + "" + ), + ), + ( + { + "method": responses.PUT, + "url": "http://abcd.com/", + "content_type": "text/html", + "status": 500, + "headers": {"X-Test": "foo"}, + "body": {"it_wont_be": "considered"}, + }, + ( + "" + ), + ), + ], +) +def test_response_representations(response_params, expected_representation): + response = Response(**response_params) + + assert str(response) == expected_representation + assert repr(response) == expected_representation + + +def test_mocked_responses_list_registered(): + @responses.activate + def run(): + first_response = Response( + responses.GET, + "http://example.com/", + body="", + headers={"X-Test": "foo"}, + status=404, + ) + second_response = Response( + responses.GET, "http://example.com/", body="", headers={"X-Test": "foo"} + ) + third_response = Response( + responses.POST, + "http://anotherdomain.com/", + ) + responses.add(first_response) + responses.add(second_response) + responses.add(third_response) + + mocks_list = responses.registered() + + assert mocks_list == responses.mock.registered() + assert mocks_list == [first_response, second_response, third_response] + + run() + assert_reset() + + +@pytest.mark.parametrize( + "url,other_url", + [ + ("http://service-A/foo?q=fizz", "http://service-a/foo?q=fizz"), + ("http://service-a/foo", "http://service-A/foo"), + ("http://someHost-AwAy/", "http://somehost-away/"), + ("http://fizzbuzz/foo", "http://fizzbuzz/foo"), + ], +) +def test_rfc_compliance(url, other_url): + @responses.activate + def run(): + responses.add(method=responses.GET, url=url) + resp = requests.request("GET", other_url) + assert_response(resp, "") + + run() + assert_reset() + + +def test_requests_between_add(): + @responses.activate + def run(): + responses.add(responses.GET, "https://example.com/", json={"response": "old"}) + assert requests.get("https://example.com/").content == b'{"response": "old"}' + assert requests.get("https://example.com/").content == b'{"response": "old"}' + assert requests.get("https://example.com/").content == b'{"response": "old"}' + + responses.add(responses.GET, "https://example.com/", json={"response": "new"}) + + assert requests.get("https://example.com/").content == b'{"response": "new"}' + assert requests.get("https://example.com/").content == b'{"response": "new"}' + assert requests.get("https://example.com/").content == b'{"response": "new"}' + + run() + assert_reset() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..64464dfaa38a82e9647a095263d70f12961a1f92 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/__init__.py @@ -0,0 +1,286 @@ +"""Extensions to the 'distutils' for large or complex distributions""" +# mypy: disable_error_code=override +# Command.reinitialize_command has an extra **kw param that distutils doesn't have +# Can't disable on the exact line because distutils doesn't exists on Python 3.12 +# and mypy isn't aware of distutils_hack, causing distutils.core.Command to be Any, +# and a [unused-ignore] to be raised on 3.12+ + +from __future__ import annotations + +import functools +import os +import re +import sys +from abc import abstractmethod +from collections.abc import Mapping +from typing import TYPE_CHECKING, TypeVar, overload + +sys.path.extend(((vendor_path := os.path.join(os.path.dirname(os.path.dirname(__file__)), 'setuptools', '_vendor')) not in sys.path) * [vendor_path]) # fmt: skip +# workaround for #4476 +sys.modules.pop('backports', None) + +import _distutils_hack.override # noqa: F401 + +from . import logging, monkey +from .depends import Require +from .discovery import PackageFinder, PEP420PackageFinder +from .dist import Distribution +from .extension import Extension +from .version import __version__ as __version__ +from .warnings import SetuptoolsDeprecationWarning + +import distutils.core +from distutils.errors import DistutilsOptionError + +__all__ = [ + 'setup', + 'Distribution', + 'Command', + 'Extension', + 'Require', + 'SetuptoolsDeprecationWarning', + 'find_packages', + 'find_namespace_packages', +] + +_CommandT = TypeVar("_CommandT", bound="_Command") + +bootstrap_install_from = None + +find_packages = PackageFinder.find +find_namespace_packages = PEP420PackageFinder.find + + +def _install_setup_requires(attrs): + # Note: do not use `setuptools.Distribution` directly, as + # our PEP 517 backend patch `distutils.core.Distribution`. + class MinimalDistribution(distutils.core.Distribution): + """ + A minimal version of a distribution for supporting the + fetch_build_eggs interface. + """ + + def __init__(self, attrs: Mapping[str, object]) -> None: + _incl = 'dependency_links', 'setup_requires' + filtered = {k: attrs[k] for k in set(_incl) & set(attrs)} + super().__init__(filtered) + # Prevent accidentally triggering discovery with incomplete set of attrs + self.set_defaults._disable() + + def _get_project_config_files(self, filenames=None): + """Ignore ``pyproject.toml``, they are not related to setup_requires""" + try: + cfg, _toml = super()._split_standard_project_metadata(filenames) + except Exception: + return filenames, () + return cfg, () + + def finalize_options(self): + """ + Disable finalize_options to avoid building the working set. + Ref #2158. + """ + + dist = MinimalDistribution(attrs) + + # Honor setup.cfg's options. + dist.parse_config_files(ignore_option_errors=True) + if dist.setup_requires: + _fetch_build_eggs(dist) + + +def _fetch_build_eggs(dist: Distribution): + try: + dist.fetch_build_eggs(dist.setup_requires) + except Exception as ex: + msg = """ + It is possible a package already installed in your system + contains an version that is invalid according to PEP 440. + You can try `pip install --use-pep517` as a workaround for this problem, + or rely on a new virtual environment. + + If the problem refers to a package that is not installed yet, + please contact that package's maintainers or distributors. + """ + if "InvalidVersion" in ex.__class__.__name__: + if hasattr(ex, "add_note"): + ex.add_note(msg) # PEP 678 + else: + dist.announce(f"\n{msg}\n") + raise + + +def setup(**attrs): + logging.configure() + # Make sure we have any requirements needed to interpret 'attrs'. + _install_setup_requires(attrs) + return distutils.core.setup(**attrs) + + +setup.__doc__ = distutils.core.setup.__doc__ + +if TYPE_CHECKING: + # Work around a mypy issue where type[T] can't be used as a base: https://github.com/python/mypy/issues/10962 + from distutils.core import Command as _Command +else: + _Command = monkey.get_unpatched(distutils.core.Command) + + +class Command(_Command): + """ + Setuptools internal actions are organized using a *command design pattern*. + This means that each action (or group of closely related actions) executed during + the build should be implemented as a ``Command`` subclass. + + These commands are abstractions and do not necessarily correspond to a command that + can (or should) be executed via a terminal, in a CLI fashion (although historically + they would). + + When creating a new command from scratch, custom defined classes **SHOULD** inherit + from ``setuptools.Command`` and implement a few mandatory methods. + Between these mandatory methods, are listed: + :meth:`initialize_options`, :meth:`finalize_options` and :meth:`run`. + + A useful analogy for command classes is to think of them as subroutines with local + variables called "options". The options are "declared" in :meth:`initialize_options` + and "defined" (given their final values, aka "finalized") in :meth:`finalize_options`, + both of which must be defined by every command class. The "body" of the subroutine, + (where it does all the work) is the :meth:`run` method. + Between :meth:`initialize_options` and :meth:`finalize_options`, ``setuptools`` may set + the values for options/attributes based on user's input (or circumstance), + which means that the implementation should be careful to not overwrite values in + :meth:`finalize_options` unless necessary. + + Please note that other commands (or other parts of setuptools) may also overwrite + the values of the command's options/attributes multiple times during the build + process. + Therefore it is important to consistently implement :meth:`initialize_options` and + :meth:`finalize_options`. For example, all derived attributes (or attributes that + depend on the value of other attributes) **SHOULD** be recomputed in + :meth:`finalize_options`. + + When overwriting existing commands, custom defined classes **MUST** abide by the + same APIs implemented by the original class. They also **SHOULD** inherit from the + original class. + """ + + command_consumes_arguments = False + distribution: Distribution # override distutils.dist.Distribution with setuptools.dist.Distribution + + def __init__(self, dist: Distribution, **kw) -> None: + """ + Construct the command for dist, updating + vars(self) with any keyword parameters. + """ + super().__init__(dist) + vars(self).update(kw) + + def _ensure_stringlike(self, option, what, default=None): + val = getattr(self, option) + if val is None: + setattr(self, option, default) + return default + elif not isinstance(val, str): + raise DistutilsOptionError(f"'{option}' must be a {what} (got `{val}`)") + return val + + def ensure_string_list(self, option: str) -> None: + r"""Ensure that 'option' is a list of strings. If 'option' is + currently a string, we split it either on /,\s*/ or /\s+/, so + "foo bar baz", "foo,bar,baz", and "foo, bar baz" all become + ["foo", "bar", "baz"]. + + .. + TODO: This method seems to be similar to the one in ``distutils.cmd`` + Probably it is just here for backward compatibility with old Python versions? + + :meta private: + """ + val = getattr(self, option) + if val is None: + return + elif isinstance(val, str): + setattr(self, option, re.split(r',\s*|\s+', val)) + else: + if isinstance(val, list): + ok = all(isinstance(v, str) for v in val) + else: + ok = False + if not ok: + raise DistutilsOptionError( + f"'{option}' must be a list of strings (got {val!r})" + ) + + @overload + def reinitialize_command( + self, command: str, reinit_subcommands: bool = False, **kw + ) -> _Command: ... + @overload + def reinitialize_command( + self, command: _CommandT, reinit_subcommands: bool = False, **kw + ) -> _CommandT: ... + def reinitialize_command( + self, command: str | _Command, reinit_subcommands: bool = False, **kw + ) -> _Command: + cmd = _Command.reinitialize_command(self, command, reinit_subcommands) + vars(cmd).update(kw) + return cmd # pyright: ignore[reportReturnType] # pypa/distutils#307 + + @abstractmethod + def initialize_options(self) -> None: + """ + Set or (reset) all options/attributes/caches used by the command + to their default values. Note that these values may be overwritten during + the build. + """ + raise NotImplementedError + + @abstractmethod + def finalize_options(self) -> None: + """ + Set final values for all options/attributes used by the command. + Most of the time, each option/attribute/cache should only be set if it does not + have any value yet (e.g. ``if self.attr is None: self.attr = val``). + """ + raise NotImplementedError + + @abstractmethod + def run(self) -> None: + """ + Execute the actions intended by the command. + (Side effects **SHOULD** only take place when :meth:`run` is executed, + for example, creating new files or writing to the terminal output). + """ + raise NotImplementedError + + +def _find_all_simple(path): + """ + Find all files under 'path' + """ + results = ( + os.path.join(base, file) + for base, dirs, files in os.walk(path, followlinks=True) + for file in files + ) + return filter(os.path.isfile, results) + + +def findall(dir=os.curdir): + """ + Find all files under 'dir' and return the list of full filenames. + Unless dir is '.', return full filenames with dir prepended. + """ + files = _find_all_simple(dir) + if dir == os.curdir: + make_rel = functools.partial(os.path.relpath, start=dir) + files = map(make_rel, files) + return list(files) + + +class sic(str): + """Treat this string as-is (https://en.wikipedia.org/wiki/Sic)""" + + +# Apply monkey patches +monkey.patch_all() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_core_metadata.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_core_metadata.py new file mode 100644 index 0000000000000000000000000000000000000000..850cc409f71315ef8d13b0ee28acd5a7862a5415 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_core_metadata.py @@ -0,0 +1,321 @@ +""" +Handling of Core Metadata for Python packages (including reading and writing). + +See: https://packaging.python.org/en/latest/specifications/core-metadata/ +""" + +from __future__ import annotations + +import os +import stat +import textwrap +from email import message_from_file +from email.message import Message +from tempfile import NamedTemporaryFile + +from packaging.markers import Marker +from packaging.requirements import Requirement +from packaging.utils import canonicalize_name, canonicalize_version +from packaging.version import Version + +from . import _normalization, _reqs +from ._static import is_static +from .warnings import SetuptoolsDeprecationWarning + +from distutils.util import rfc822_escape + + +def get_metadata_version(self): + mv = getattr(self, 'metadata_version', None) + if mv is None: + mv = Version('2.2') + self.metadata_version = mv + return mv + + +def rfc822_unescape(content: str) -> str: + """Reverse RFC-822 escaping by removing leading whitespaces from content.""" + lines = content.splitlines() + if len(lines) == 1: + return lines[0].lstrip() + return '\n'.join((lines[0].lstrip(), textwrap.dedent('\n'.join(lines[1:])))) + + +def _read_field_from_msg(msg: Message, field: str) -> str | None: + """Read Message header field.""" + value = msg[field] + if value == 'UNKNOWN': + return None + return value + + +def _read_field_unescaped_from_msg(msg: Message, field: str) -> str | None: + """Read Message header field and apply rfc822_unescape.""" + value = _read_field_from_msg(msg, field) + if value is None: + return value + return rfc822_unescape(value) + + +def _read_list_from_msg(msg: Message, field: str) -> list[str] | None: + """Read Message header field and return all results as list.""" + values = msg.get_all(field, None) + if values == []: + return None + return values + + +def _read_payload_from_msg(msg: Message) -> str | None: + value = str(msg.get_payload()).strip() + if value == 'UNKNOWN' or not value: + return None + return value + + +def read_pkg_file(self, file): + """Reads the metadata values from a file object.""" + msg = message_from_file(file) + + self.metadata_version = Version(msg['metadata-version']) + self.name = _read_field_from_msg(msg, 'name') + self.version = _read_field_from_msg(msg, 'version') + self.description = _read_field_from_msg(msg, 'summary') + # we are filling author only. + self.author = _read_field_from_msg(msg, 'author') + self.maintainer = None + self.author_email = _read_field_from_msg(msg, 'author-email') + self.maintainer_email = None + self.url = _read_field_from_msg(msg, 'home-page') + self.download_url = _read_field_from_msg(msg, 'download-url') + self.license = _read_field_unescaped_from_msg(msg, 'license') + + self.long_description = _read_field_unescaped_from_msg(msg, 'description') + if self.long_description is None and self.metadata_version >= Version('2.1'): + self.long_description = _read_payload_from_msg(msg) + self.description = _read_field_from_msg(msg, 'summary') + + if 'keywords' in msg: + self.keywords = _read_field_from_msg(msg, 'keywords').split(',') + + self.platforms = _read_list_from_msg(msg, 'platform') + self.classifiers = _read_list_from_msg(msg, 'classifier') + + # PEP 314 - these fields only exist in 1.1 + if self.metadata_version == Version('1.1'): + self.requires = _read_list_from_msg(msg, 'requires') + self.provides = _read_list_from_msg(msg, 'provides') + self.obsoletes = _read_list_from_msg(msg, 'obsoletes') + else: + self.requires = None + self.provides = None + self.obsoletes = None + + self.license_files = _read_list_from_msg(msg, 'license-file') + + +def single_line(val): + """ + Quick and dirty validation for Summary pypa/setuptools#1390. + """ + if '\n' in val: + # TODO: Replace with `raise ValueError("newlines not allowed")` + # after reviewing #2893. + msg = "newlines are not allowed in `summary` and will break in the future" + SetuptoolsDeprecationWarning.emit("Invalid config.", msg) + # due_date is undefined. Controversial change, there was a lot of push back. + val = val.strip().split('\n')[0] + return val + + +def write_pkg_info(self, base_dir): + """Write the PKG-INFO file into the release tree.""" + temp = "" + final = os.path.join(base_dir, 'PKG-INFO') + try: + # Use a temporary file while writing to avoid race conditions + # (e.g. `importlib.metadata` reading `.egg-info/PKG-INFO`): + with NamedTemporaryFile("w", encoding="utf-8", dir=base_dir, delete=False) as f: + temp = f.name + self.write_pkg_file(f) + permissions = stat.S_IMODE(os.lstat(temp).st_mode) + os.chmod(temp, permissions | stat.S_IRGRP | stat.S_IROTH) + os.replace(temp, final) # atomic operation. + finally: + if temp and os.path.exists(temp): + os.remove(temp) + + +# Based on Python 3.5 version +def write_pkg_file(self, file): # noqa: C901 # is too complex (14) # FIXME + """Write the PKG-INFO format data to a file object.""" + version = self.get_metadata_version() + + def write_field(key, value): + file.write(f"{key}: {value}\n") + + write_field('Metadata-Version', str(version)) + write_field('Name', self.get_name()) + write_field('Version', self.get_version()) + + summary = self.get_description() + if summary: + write_field('Summary', single_line(summary)) + + optional_fields = ( + ('Home-page', 'url'), + ('Download-URL', 'download_url'), + ('Author', 'author'), + ('Author-email', 'author_email'), + ('Maintainer', 'maintainer'), + ('Maintainer-email', 'maintainer_email'), + ) + + for field, attr in optional_fields: + attr_val = getattr(self, attr, None) + if attr_val is not None: + write_field(field, attr_val) + + license = self.get_license() + if license: + write_field('License', rfc822_escape(license)) + + for label, url in self.project_urls.items(): + write_field('Project-URL', f'{label}, {url}') + + keywords = ','.join(self.get_keywords()) + if keywords: + write_field('Keywords', keywords) + + platforms = self.get_platforms() or [] + for platform in platforms: + write_field('Platform', platform) + + self._write_list(file, 'Classifier', self.get_classifiers()) + + # PEP 314 + self._write_list(file, 'Requires', self.get_requires()) + self._write_list(file, 'Provides', self.get_provides()) + self._write_list(file, 'Obsoletes', self.get_obsoletes()) + + # Setuptools specific for PEP 345 + if hasattr(self, 'python_requires'): + write_field('Requires-Python', self.python_requires) + + # PEP 566 + if self.long_description_content_type: + write_field('Description-Content-Type', self.long_description_content_type) + + self._write_list(file, 'License-File', self.license_files or []) + _write_requirements(self, file) + + for field, attr in _POSSIBLE_DYNAMIC_FIELDS.items(): + if (val := getattr(self, attr, None)) and not is_static(val): + write_field('Dynamic', field) + + long_description = self.get_long_description() + if long_description: + file.write(f"\n{long_description}") + if not long_description.endswith("\n"): + file.write("\n") + + +def _write_requirements(self, file): + for req in _reqs.parse(self.install_requires): + file.write(f"Requires-Dist: {req}\n") + + processed_extras = {} + for augmented_extra, reqs in self.extras_require.items(): + # Historically, setuptools allows "augmented extras": `:` + unsafe_extra, _, condition = augmented_extra.partition(":") + unsafe_extra = unsafe_extra.strip() + extra = _normalization.safe_extra(unsafe_extra) + + if extra: + _write_provides_extra(file, processed_extras, extra, unsafe_extra) + for req in _reqs.parse_strings(reqs): + r = _include_extra(req, extra, condition.strip()) + file.write(f"Requires-Dist: {r}\n") + + return processed_extras + + +def _include_extra(req: str, extra: str, condition: str) -> Requirement: + r = Requirement(req) # create a fresh object that can be modified + parts = ( + f"({r.marker})" if r.marker else None, + f"({condition})" if condition else None, + f"extra == {extra!r}" if extra else None, + ) + r.marker = Marker(" and ".join(x for x in parts if x)) + return r + + +def _write_provides_extra(file, processed_extras, safe, unsafe): + previous = processed_extras.get(safe) + if previous == unsafe: + SetuptoolsDeprecationWarning.emit( + 'Ambiguity during "extra" normalization for dependencies.', + f""" + {previous!r} and {unsafe!r} normalize to the same value:\n + {safe!r}\n + In future versions, setuptools might halt the build process. + """, + see_url="https://peps.python.org/pep-0685/", + ) + else: + processed_extras[safe] = unsafe + file.write(f"Provides-Extra: {safe}\n") + + +# from pypa/distutils#244; needed only until that logic is always available +def get_fullname(self): + return _distribution_fullname(self.get_name(), self.get_version()) + + +def _distribution_fullname(name: str, version: str) -> str: + """ + >>> _distribution_fullname('setup.tools', '1.0-2') + 'setup_tools-1.0.post2' + >>> _distribution_fullname('setup-tools', '1.2post2') + 'setup_tools-1.2.post2' + >>> _distribution_fullname('setup-tools', '1.0-r2') + 'setup_tools-1.0.post2' + >>> _distribution_fullname('setup.tools', '1.0.post') + 'setup_tools-1.0.post0' + >>> _distribution_fullname('setup.tools', '1.0+ubuntu-1') + 'setup_tools-1.0+ubuntu.1' + """ + return "{}-{}".format( + canonicalize_name(name).replace('-', '_'), + canonicalize_version(version, strip_trailing_zero=False), + ) + + +_POSSIBLE_DYNAMIC_FIELDS = { + # Core Metadata Field x related Distribution attribute + "author": "author", + "author-email": "author_email", + "classifier": "classifiers", + "description": "long_description", + "description-content-type": "long_description_content_type", + "download-url": "download_url", + "home-page": "url", + "keywords": "keywords", + "license": "license", + # "license-file": "license_files", # XXX: does PEP 639 exempt Dynamic ?? + "maintainer": "maintainer", + "maintainer-email": "maintainer_email", + "obsoletes": "obsoletes", + # "obsoletes-dist": "obsoletes_dist", # NOT USED + "platform": "platforms", + "project-url": "project_urls", + "provides": "provides", + # "provides-dist": "provides_dist", # NOT USED + "provides-extra": "extras_require", + "requires": "requires", + "requires-dist": "install_requires", + # "requires-external": "requires_external", # NOT USED + "requires-python": "python_requires", + "summary": "description", + # "supported-platform": "supported_platforms", # NOT USED +} diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_entry_points.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_entry_points.py new file mode 100644 index 0000000000000000000000000000000000000000..e785fc7df8d51241a38675902fe5e5b78a0cc29c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_entry_points.py @@ -0,0 +1,90 @@ +import functools +import itertools +import operator + +from jaraco.functools import pass_none +from jaraco.text import yield_lines +from more_itertools import consume + +from ._importlib import metadata +from ._itertools import ensure_unique +from .errors import OptionError + + +def ensure_valid(ep): + """ + Exercise one of the dynamic properties to trigger + the pattern match. + """ + try: + ep.extras + except (AttributeError, AssertionError) as ex: + # Why both? See https://github.com/python/importlib_metadata/issues/488 + msg = ( + f"Problems to parse {ep}.\nPlease ensure entry-point follows the spec: " + "https://packaging.python.org/en/latest/specifications/entry-points/" + ) + raise OptionError(msg) from ex + + +def load_group(value, group): + """ + Given a value of an entry point or series of entry points, + return each as an EntryPoint. + """ + # normalize to a single sequence of lines + lines = yield_lines(value) + text = f'[{group}]\n' + '\n'.join(lines) + return metadata.EntryPoints._from_text(text) + + +def by_group_and_name(ep): + return ep.group, ep.name + + +def validate(eps: metadata.EntryPoints): + """ + Ensure entry points are unique by group and name and validate each. + """ + consume(map(ensure_valid, ensure_unique(eps, key=by_group_and_name))) + return eps + + +@functools.singledispatch +def load(eps): + """ + Given a Distribution.entry_points, produce EntryPoints. + """ + groups = itertools.chain.from_iterable( + load_group(value, group) for group, value in eps.items() + ) + return validate(metadata.EntryPoints(groups)) + + +@load.register(str) +def _(eps): + r""" + >>> ep, = load('[console_scripts]\nfoo=bar') + >>> ep.group + 'console_scripts' + >>> ep.name + 'foo' + >>> ep.value + 'bar' + """ + return validate(metadata.EntryPoints(metadata.EntryPoints._from_text(eps))) + + +load.register(type(None), lambda x: x) + + +@pass_none +def render(eps: metadata.EntryPoints): + by_group = operator.attrgetter('group') + groups = itertools.groupby(sorted(eps, key=by_group), by_group) + + return '\n'.join(f'[{group}]\n{render_items(items)}\n' for group, items in groups) + + +def render_items(eps): + return '\n'.join(f'{ep.name} = {ep.value}' for ep in sorted(eps)) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_normalization.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_normalization.py new file mode 100644 index 0000000000000000000000000000000000000000..9541a55d6c966b4600de590455946a97e1da39d7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_normalization.py @@ -0,0 +1,150 @@ +""" +Helpers for normalization as expected in wheel/sdist/module file names +and core metadata +""" + +import re + +import packaging + +# https://packaging.python.org/en/latest/specifications/core-metadata/#name +_VALID_NAME = re.compile(r"^([A-Z0-9]|[A-Z0-9][A-Z0-9._-]*[A-Z0-9])$", re.I) +_UNSAFE_NAME_CHARS = re.compile(r"[^A-Z0-9._-]+", re.I) +_NON_ALPHANUMERIC = re.compile(r"[^A-Z0-9]+", re.I) +_PEP440_FALLBACK = re.compile(r"^v?(?P(?:[0-9]+!)?[0-9]+(?:\.[0-9]+)*)", re.I) + + +def safe_identifier(name: str) -> str: + """Make a string safe to be used as Python identifier. + >>> safe_identifier("12abc") + '_12abc' + >>> safe_identifier("__editable__.myns.pkg-78.9.3_local") + '__editable___myns_pkg_78_9_3_local' + """ + safe = re.sub(r'\W|^(?=\d)', '_', name) + assert safe.isidentifier() + return safe + + +def safe_name(component: str) -> str: + """Escape a component used as a project name according to Core Metadata. + >>> safe_name("hello world") + 'hello-world' + >>> safe_name("hello?world") + 'hello-world' + >>> safe_name("hello_world") + 'hello_world' + """ + # See pkg_resources.safe_name + return _UNSAFE_NAME_CHARS.sub("-", component) + + +def safe_version(version: str) -> str: + """Convert an arbitrary string into a valid version string. + Can still raise an ``InvalidVersion`` exception. + To avoid exceptions use ``best_effort_version``. + >>> safe_version("1988 12 25") + '1988.12.25' + >>> safe_version("v0.2.1") + '0.2.1' + >>> safe_version("v0.2?beta") + '0.2b0' + >>> safe_version("v0.2 beta") + '0.2b0' + >>> safe_version("ubuntu lts") + Traceback (most recent call last): + ... + packaging.version.InvalidVersion: Invalid version: 'ubuntu.lts' + """ + v = version.replace(' ', '.') + try: + return str(packaging.version.Version(v)) + except packaging.version.InvalidVersion: + attempt = _UNSAFE_NAME_CHARS.sub("-", v) + return str(packaging.version.Version(attempt)) + + +def best_effort_version(version: str) -> str: + """Convert an arbitrary string into a version-like string. + Fallback when ``safe_version`` is not safe enough. + >>> best_effort_version("v0.2 beta") + '0.2b0' + >>> best_effort_version("ubuntu lts") + '0.dev0+sanitized.ubuntu.lts' + >>> best_effort_version("0.23ubuntu1") + '0.23.dev0+sanitized.ubuntu1' + >>> best_effort_version("0.23-") + '0.23.dev0+sanitized' + >>> best_effort_version("0.-_") + '0.dev0+sanitized' + >>> best_effort_version("42.+?1") + '42.dev0+sanitized.1' + """ + # See pkg_resources._forgiving_version + try: + return safe_version(version) + except packaging.version.InvalidVersion: + v = version.replace(' ', '.') + match = _PEP440_FALLBACK.search(v) + if match: + safe = match["safe"] + rest = v[len(safe) :] + else: + safe = "0" + rest = version + safe_rest = _NON_ALPHANUMERIC.sub(".", rest).strip(".") + local = f"sanitized.{safe_rest}".strip(".") + return safe_version(f"{safe}.dev0+{local}") + + +def safe_extra(extra: str) -> str: + """Normalize extra name according to PEP 685 + >>> safe_extra("_FrIeNdLy-._.-bArD") + 'friendly-bard' + >>> safe_extra("FrIeNdLy-._.-bArD__._-") + 'friendly-bard' + """ + return _NON_ALPHANUMERIC.sub("-", extra).strip("-").lower() + + +def filename_component(value: str) -> str: + """Normalize each component of a filename (e.g. distribution/version part of wheel) + Note: ``value`` needs to be already normalized. + >>> filename_component("my-pkg") + 'my_pkg' + """ + return value.replace("-", "_").strip("_") + + +def filename_component_broken(value: str) -> str: + """ + Produce the incorrect filename component for compatibility. + + See pypa/setuptools#4167 for detailed analysis. + + TODO: replace this with filename_component after pip 24 is + nearly-ubiquitous. + + >>> filename_component_broken('foo_bar-baz') + 'foo-bar-baz' + """ + return value.replace('_', '-') + + +def safer_name(value: str) -> str: + """Like ``safe_name`` but can be used as filename component for wheel""" + # See bdist_wheel.safer_name + return ( + # Per https://packaging.python.org/en/latest/specifications/name-normalization/#name-normalization + re.sub(r"[-_.]+", "-", safe_name(value)) + .lower() + # Per https://packaging.python.org/en/latest/specifications/binary-distribution-format/#escaping-and-unicode + .replace("-", "_") + ) + + +def safer_best_effort_version(value: str) -> str: + """Like ``best_effort_version`` but can be used as filename component for wheel""" + # See bdist_wheel.safer_verion + # TODO: Replace with only safe_version in the future (no need for best effort) + return filename_component(best_effort_version(value)) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_reqs.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_reqs.py new file mode 100644 index 0000000000000000000000000000000000000000..c793be4d6eb3991d7b4ada615201a01cbfbbefd5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_reqs.py @@ -0,0 +1,42 @@ +from __future__ import annotations + +from collections.abc import Iterable, Iterator +from functools import lru_cache +from typing import TYPE_CHECKING, Callable, TypeVar, Union, overload + +import jaraco.text as text +from packaging.requirements import Requirement + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + +_T = TypeVar("_T") +_StrOrIter: TypeAlias = Union[str, Iterable[str]] + + +parse_req: Callable[[str], Requirement] = lru_cache()(Requirement) +# Setuptools parses the same requirement many times +# (e.g. first for validation than for normalisation), +# so it might be worth to cache. + + +def parse_strings(strs: _StrOrIter) -> Iterator[str]: + """ + Yield requirement strings for each specification in `strs`. + + `strs` must be a string, or a (possibly-nested) iterable thereof. + """ + return text.join_continuation(map(text.drop_comment, text.yield_lines(strs))) + + +# These overloads are only needed because of a mypy false-positive, pyright gets it right +# https://github.com/python/mypy/issues/3737 +@overload +def parse(strs: _StrOrIter) -> Iterator[Requirement]: ... +@overload +def parse(strs: _StrOrIter, parser: Callable[[str], _T]) -> Iterator[_T]: ... +def parse(strs: _StrOrIter, parser: Callable[[str], _T] = parse_req) -> Iterator[_T]: # type: ignore[assignment] + """ + Replacement for ``pkg_resources.parse_requirements`` that uses ``packaging``. + """ + return map(parser, parse_strings(strs)) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_shutil.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_shutil.py new file mode 100644 index 0000000000000000000000000000000000000000..6acbb4281fc986587f52a83395dc63912a863caf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_shutil.py @@ -0,0 +1,53 @@ +"""Convenience layer on top of stdlib's shutil and os""" + +import os +import stat +from typing import Callable, TypeVar + +from .compat import py311 + +from distutils import log + +try: + from os import chmod # pyright: ignore[reportAssignmentType] + # Losing type-safety w/ pyright, but that's ok +except ImportError: # pragma: no cover + # Jython compatibility + def chmod(*args: object, **kwargs: object) -> None: # type: ignore[misc] # Mypy reuses the imported definition anyway + pass + + +_T = TypeVar("_T") + + +def attempt_chmod_verbose(path, mode): + log.debug("changing mode of %s to %o", path, mode) + try: + chmod(path, mode) + except OSError as e: # pragma: no cover + log.debug("chmod failed: %s", e) + + +# Must match shutil._OnExcCallback +def _auto_chmod( + func: Callable[..., _T], arg: str, exc: BaseException +) -> _T: # pragma: no cover + """shutils onexc callback to automatically call chmod for certain functions.""" + # Only retry for scenarios known to have an issue + if func in [os.unlink, os.remove] and os.name == 'nt': + attempt_chmod_verbose(arg, stat.S_IWRITE) + return func(arg) + raise exc + + +def rmtree(path, ignore_errors=False, onexc=_auto_chmod): + """ + Similar to ``shutil.rmtree`` but automatically executes ``chmod`` + for well know Windows failure scenarios. + """ + return py311.shutil_rmtree(path, ignore_errors, onexc) + + +def rmdir(path, **opts): + if os.path.isdir(path): + rmtree(path, **opts) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_static.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_static.py new file mode 100644 index 0000000000000000000000000000000000000000..075a0bcddf3d438f42eb1641ce67488ff0320b7b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/_static.py @@ -0,0 +1,188 @@ +from functools import wraps +from typing import TypeVar + +import packaging.specifiers + +from .warnings import SetuptoolsDeprecationWarning + + +class Static: + """ + Wrapper for built-in object types that are allow setuptools to identify + static core metadata (in opposition to ``Dynamic``, as defined :pep:`643`). + + The trick is to mark values with :class:`Static` when they come from + ``pyproject.toml`` or ``setup.cfg``, so if any plugin overwrite the value + with a built-in, setuptools will be able to recognise the change. + + We inherit from built-in classes, so that we don't need to change the existing + code base to deal with the new types. + We also should strive for immutability objects to avoid changes after the + initial parsing. + """ + + _mutated_: bool = False # TODO: Remove after deprecation warning is solved + + +def _prevent_modification(target: type, method: str, copying: str) -> None: + """ + Because setuptools is very flexible we cannot fully prevent + plugins and user customisations from modifying static values that were + parsed from config files. + But we can attempt to block "in-place" mutations and identify when they + were done. + """ + fn = getattr(target, method, None) + if fn is None: + return + + @wraps(fn) + def _replacement(self: Static, *args, **kwargs): + # TODO: After deprecation period raise NotImplementedError instead of warning + # which obviated the existence and checks of the `_mutated_` attribute. + self._mutated_ = True + SetuptoolsDeprecationWarning.emit( + "Direct modification of value will be disallowed", + f""" + In an effort to implement PEP 643, direct/in-place changes of static values + that come from configuration files are deprecated. + If you need to modify this value, please first create a copy with {copying} + and make sure conform to all relevant standards when overriding setuptools + functionality (https://packaging.python.org/en/latest/specifications/). + """, + due_date=(2025, 10, 10), # Initially introduced in 2024-09-06 + ) + return fn(self, *args, **kwargs) + + _replacement.__doc__ = "" # otherwise doctest may fail. + setattr(target, method, _replacement) + + +class Str(str, Static): + pass + + +class Tuple(tuple, Static): + pass + + +class List(list, Static): + """ + :meta private: + >>> x = List([1, 2, 3]) + >>> is_static(x) + True + >>> x += [0] # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + SetuptoolsDeprecationWarning: Direct modification ... + >>> is_static(x) # no longer static after modification + False + >>> y = list(x) + >>> y.clear() + >>> y + [] + >>> y == x + False + >>> is_static(List(y)) + True + """ + + +# Make `List` immutable-ish +# (certain places of setuptools/distutils issue a warn if we use tuple instead of list) +for _method in ( + '__delitem__', + '__iadd__', + '__setitem__', + 'append', + 'clear', + 'extend', + 'insert', + 'remove', + 'reverse', + 'pop', +): + _prevent_modification(List, _method, "`list(value)`") + + +class Dict(dict, Static): + """ + :meta private: + >>> x = Dict({'a': 1, 'b': 2}) + >>> is_static(x) + True + >>> x['c'] = 0 # doctest: +IGNORE_EXCEPTION_DETAIL + Traceback (most recent call last): + SetuptoolsDeprecationWarning: Direct modification ... + >>> x._mutated_ + True + >>> is_static(x) # no longer static after modification + False + >>> y = dict(x) + >>> y.popitem() + ('b', 2) + >>> y == x + False + >>> is_static(Dict(y)) + True + """ + + +# Make `Dict` immutable-ish (we cannot inherit from types.MappingProxyType): +for _method in ( + '__delitem__', + '__ior__', + '__setitem__', + 'clear', + 'pop', + 'popitem', + 'setdefault', + 'update', +): + _prevent_modification(Dict, _method, "`dict(value)`") + + +class SpecifierSet(packaging.specifiers.SpecifierSet, Static): + """Not exactly a built-in type but useful for ``requires-python``""" + + +T = TypeVar("T") + + +def noop(value: T) -> T: + """ + >>> noop(42) + 42 + """ + return value + + +_CONVERSIONS = {str: Str, tuple: Tuple, list: List, dict: Dict} + + +def attempt_conversion(value: T) -> T: + """ + >>> is_static(attempt_conversion("hello")) + True + >>> is_static(object()) + False + """ + return _CONVERSIONS.get(type(value), noop)(value) # type: ignore[call-overload] + + +def is_static(value: object) -> bool: + """ + >>> is_static(a := Dict({'a': 1})) + True + >>> is_static(dict(a)) + False + >>> is_static(b := List([1, 2, 3])) + True + >>> is_static(list(b)) + False + """ + return isinstance(value, Static) and not value._mutated_ + + +EMPTY_LIST = List() +EMPTY_DICT = Dict() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/archive_util.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/archive_util.py new file mode 100644 index 0000000000000000000000000000000000000000..1a02010bb2af2be0487730d6a32080877b9ac220 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/archive_util.py @@ -0,0 +1,219 @@ +"""Utilities for extracting common archive formats""" + +import contextlib +import os +import posixpath +import shutil +import tarfile +import zipfile + +from ._path import ensure_directory + +from distutils.errors import DistutilsError + +__all__ = [ + "unpack_archive", + "unpack_zipfile", + "unpack_tarfile", + "default_filter", + "UnrecognizedFormat", + "extraction_drivers", + "unpack_directory", +] + + +class UnrecognizedFormat(DistutilsError): + """Couldn't recognize the archive type""" + + +def default_filter(src, dst): + """The default progress/filter callback; returns True for all files""" + return dst + + +def unpack_archive( + filename, extract_dir, progress_filter=default_filter, drivers=None +) -> None: + """Unpack `filename` to `extract_dir`, or raise ``UnrecognizedFormat`` + + `progress_filter` is a function taking two arguments: a source path + internal to the archive ('/'-separated), and a filesystem path where it + will be extracted. The callback must return the desired extract path + (which may be the same as the one passed in), or else ``None`` to skip + that file or directory. The callback can thus be used to report on the + progress of the extraction, as well as to filter the items extracted or + alter their extraction paths. + + `drivers`, if supplied, must be a non-empty sequence of functions with the + same signature as this function (minus the `drivers` argument), that raise + ``UnrecognizedFormat`` if they do not support extracting the designated + archive type. The `drivers` are tried in sequence until one is found that + does not raise an error, or until all are exhausted (in which case + ``UnrecognizedFormat`` is raised). If you do not supply a sequence of + drivers, the module's ``extraction_drivers`` constant will be used, which + means that ``unpack_zipfile`` and ``unpack_tarfile`` will be tried, in that + order. + """ + for driver in drivers or extraction_drivers: + try: + driver(filename, extract_dir, progress_filter) + except UnrecognizedFormat: + continue + else: + return + else: + raise UnrecognizedFormat(f"Not a recognized archive type: {filename}") + + +def unpack_directory(filename, extract_dir, progress_filter=default_filter) -> None: + """ "Unpack" a directory, using the same interface as for archives + + Raises ``UnrecognizedFormat`` if `filename` is not a directory + """ + if not os.path.isdir(filename): + raise UnrecognizedFormat(f"{filename} is not a directory") + + paths = { + filename: ('', extract_dir), + } + for base, dirs, files in os.walk(filename): + src, dst = paths[base] + for d in dirs: + paths[os.path.join(base, d)] = src + d + '/', os.path.join(dst, d) + for f in files: + target = os.path.join(dst, f) + target = progress_filter(src + f, target) + if not target: + # skip non-files + continue + ensure_directory(target) + f = os.path.join(base, f) + shutil.copyfile(f, target) + shutil.copystat(f, target) + + +def unpack_zipfile(filename, extract_dir, progress_filter=default_filter) -> None: + """Unpack zip `filename` to `extract_dir` + + Raises ``UnrecognizedFormat`` if `filename` is not a zipfile (as determined + by ``zipfile.is_zipfile()``). See ``unpack_archive()`` for an explanation + of the `progress_filter` argument. + """ + + if not zipfile.is_zipfile(filename): + raise UnrecognizedFormat(f"{filename} is not a zip file") + + with zipfile.ZipFile(filename) as z: + _unpack_zipfile_obj(z, extract_dir, progress_filter) + + +def _unpack_zipfile_obj(zipfile_obj, extract_dir, progress_filter=default_filter): + """Internal/private API used by other parts of setuptools. + Similar to ``unpack_zipfile``, but receives an already opened :obj:`zipfile.ZipFile` + object instead of a filename. + """ + for info in zipfile_obj.infolist(): + name = info.filename + + # don't extract absolute paths or ones with .. in them + if name.startswith('/') or '..' in name.split('/'): + continue + + target = os.path.join(extract_dir, *name.split('/')) + target = progress_filter(name, target) + if not target: + continue + if name.endswith('/'): + # directory + ensure_directory(target) + else: + # file + ensure_directory(target) + data = zipfile_obj.read(info.filename) + with open(target, 'wb') as f: + f.write(data) + unix_attributes = info.external_attr >> 16 + if unix_attributes: + os.chmod(target, unix_attributes) + + +def _resolve_tar_file_or_dir(tar_obj, tar_member_obj): + """Resolve any links and extract link targets as normal files.""" + while tar_member_obj is not None and ( + tar_member_obj.islnk() or tar_member_obj.issym() + ): + linkpath = tar_member_obj.linkname + if tar_member_obj.issym(): + base = posixpath.dirname(tar_member_obj.name) + linkpath = posixpath.join(base, linkpath) + linkpath = posixpath.normpath(linkpath) + tar_member_obj = tar_obj._getmember(linkpath) + + is_file_or_dir = tar_member_obj is not None and ( + tar_member_obj.isfile() or tar_member_obj.isdir() + ) + if is_file_or_dir: + return tar_member_obj + + raise LookupError('Got unknown file type') + + +def _iter_open_tar(tar_obj, extract_dir, progress_filter): + """Emit member-destination pairs from a tar archive.""" + # don't do any chowning! + tar_obj.chown = lambda *args: None + + with contextlib.closing(tar_obj): + for member in tar_obj: + name = member.name + # don't extract absolute paths or ones with .. in them + if name.startswith('/') or '..' in name.split('/'): + continue + + prelim_dst = os.path.join(extract_dir, *name.split('/')) + + try: + member = _resolve_tar_file_or_dir(tar_obj, member) + except LookupError: + continue + + final_dst = progress_filter(name, prelim_dst) + if not final_dst: + continue + + if final_dst.endswith(os.sep): + final_dst = final_dst[:-1] + + yield member, final_dst + + +def unpack_tarfile(filename, extract_dir, progress_filter=default_filter) -> bool: + """Unpack tar/tar.gz/tar.bz2 `filename` to `extract_dir` + + Raises ``UnrecognizedFormat`` if `filename` is not a tarfile (as determined + by ``tarfile.open()``). See ``unpack_archive()`` for an explanation + of the `progress_filter` argument. + """ + try: + tarobj = tarfile.open(filename) + except tarfile.TarError as e: + raise UnrecognizedFormat( + f"{filename} is not a compressed or uncompressed tar file" + ) from e + + for member, final_dst in _iter_open_tar( + tarobj, + extract_dir, + progress_filter, + ): + try: + # XXX Ugh + tarobj._extract_member(member, final_dst) + except tarfile.ExtractError: + # chown/chmod/mkfifo/mknode/makedev failed + pass + + return True + + +extraction_drivers = unpack_directory, unpack_zipfile, unpack_tarfile diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/cli-64.exe b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/cli-64.exe new file mode 100644 index 0000000000000000000000000000000000000000..3ea50eebfe3f0113b231a318cc1ad6e238afd60d Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/cli-64.exe differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/discovery.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/discovery.py new file mode 100644 index 0000000000000000000000000000000000000000..c88839918562bad12f1a2e72309f1eacfe23349c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/discovery.py @@ -0,0 +1,614 @@ +"""Automatic discovery of Python modules and packages (for inclusion in the +distribution) and other config values. + +For the purposes of this module, the following nomenclature is used: + +- "src-layout": a directory representing a Python project that contains a "src" + folder. Everything under the "src" folder is meant to be included in the + distribution when packaging the project. Example:: + + . + ├── tox.ini + ├── pyproject.toml + └── src/ + └── mypkg/ + ├── __init__.py + ├── mymodule.py + └── my_data_file.txt + +- "flat-layout": a Python project that does not use "src-layout" but instead + have a directory under the project root for each package:: + + . + ├── tox.ini + ├── pyproject.toml + └── mypkg/ + ├── __init__.py + ├── mymodule.py + └── my_data_file.txt + +- "single-module": a project that contains a single Python script direct under + the project root (no directory used):: + + . + ├── tox.ini + ├── pyproject.toml + └── mymodule.py + +""" + +from __future__ import annotations + +import itertools +import os +from collections.abc import Iterable, Iterator, Mapping +from fnmatch import fnmatchcase +from glob import glob +from pathlib import Path +from typing import TYPE_CHECKING, ClassVar + +import _distutils_hack.override # noqa: F401 + +from ._path import StrPath + +from distutils import log +from distutils.util import convert_path + +if TYPE_CHECKING: + from setuptools import Distribution + +chain_iter = itertools.chain.from_iterable + + +def _valid_name(path: StrPath) -> bool: + # Ignore invalid names that cannot be imported directly + return os.path.basename(path).isidentifier() + + +class _Filter: + """ + Given a list of patterns, create a callable that will be true only if + the input matches at least one of the patterns. + """ + + def __init__(self, *patterns: str) -> None: + self._patterns = dict.fromkeys(patterns) + + def __call__(self, item: str) -> bool: + return any(fnmatchcase(item, pat) for pat in self._patterns) + + def __contains__(self, item: str) -> bool: + return item in self._patterns + + +class _Finder: + """Base class that exposes functionality for module/package finders""" + + ALWAYS_EXCLUDE: ClassVar[tuple[str, ...]] = () + DEFAULT_EXCLUDE: ClassVar[tuple[str, ...]] = () + + @classmethod + def find( + cls, + where: StrPath = '.', + exclude: Iterable[str] = (), + include: Iterable[str] = ('*',), + ) -> list[str]: + """Return a list of all Python items (packages or modules, depending on + the finder implementation) found within directory 'where'. + + 'where' is the root directory which will be searched. + It should be supplied as a "cross-platform" (i.e. URL-style) path; + it will be converted to the appropriate local path syntax. + + 'exclude' is a sequence of names to exclude; '*' can be used + as a wildcard in the names. + When finding packages, 'foo.*' will exclude all subpackages of 'foo' + (but not 'foo' itself). + + 'include' is a sequence of names to include. + If it's specified, only the named items will be included. + If it's not specified, all found items will be included. + 'include' can contain shell style wildcard patterns just like + 'exclude'. + """ + + exclude = exclude or cls.DEFAULT_EXCLUDE + return list( + cls._find_iter( + convert_path(str(where)), + _Filter(*cls.ALWAYS_EXCLUDE, *exclude), + _Filter(*include), + ) + ) + + @classmethod + def _find_iter( + cls, where: StrPath, exclude: _Filter, include: _Filter + ) -> Iterator[str]: + raise NotImplementedError + + +class PackageFinder(_Finder): + """ + Generate a list of all Python packages found within a directory + """ + + ALWAYS_EXCLUDE = ("ez_setup", "*__pycache__") + + @classmethod + def _find_iter( + cls, where: StrPath, exclude: _Filter, include: _Filter + ) -> Iterator[str]: + """ + All the packages found in 'where' that pass the 'include' filter, but + not the 'exclude' filter. + """ + for root, dirs, files in os.walk(str(where), followlinks=True): + # Copy dirs to iterate over it, then empty dirs. + all_dirs = dirs[:] + dirs[:] = [] + + for dir in all_dirs: + full_path = os.path.join(root, dir) + rel_path = os.path.relpath(full_path, where) + package = rel_path.replace(os.path.sep, '.') + + # Skip directory trees that are not valid packages + if '.' in dir or not cls._looks_like_package(full_path, package): + continue + + # Should this package be included? + if include(package) and not exclude(package): + yield package + + # Early pruning if there is nothing else to be scanned + if f"{package}*" in exclude or f"{package}.*" in exclude: + continue + + # Keep searching subdirectories, as there may be more packages + # down there, even if the parent was excluded. + dirs.append(dir) + + @staticmethod + def _looks_like_package(path: StrPath, _package_name: str) -> bool: + """Does a directory look like a package?""" + return os.path.isfile(os.path.join(path, '__init__.py')) + + +class PEP420PackageFinder(PackageFinder): + @staticmethod + def _looks_like_package(_path: StrPath, _package_name: str) -> bool: + return True + + +class ModuleFinder(_Finder): + """Find isolated Python modules. + This function will **not** recurse subdirectories. + """ + + @classmethod + def _find_iter( + cls, where: StrPath, exclude: _Filter, include: _Filter + ) -> Iterator[str]: + for file in glob(os.path.join(where, "*.py")): + module, _ext = os.path.splitext(os.path.basename(file)) + + if not cls._looks_like_module(module): + continue + + if include(module) and not exclude(module): + yield module + + _looks_like_module = staticmethod(_valid_name) + + +# We have to be extra careful in the case of flat layout to not include files +# and directories not meant for distribution (e.g. tool-related) + + +class FlatLayoutPackageFinder(PEP420PackageFinder): + _EXCLUDE = ( + "ci", + "bin", + "debian", + "doc", + "docs", + "documentation", + "manpages", + "news", + "newsfragments", + "changelog", + "test", + "tests", + "unit_test", + "unit_tests", + "example", + "examples", + "scripts", + "tools", + "util", + "utils", + "python", + "build", + "dist", + "venv", + "env", + "requirements", + # ---- Task runners / Build tools ---- + "tasks", # invoke + "fabfile", # fabric + "site_scons", # SCons + # ---- Other tools ---- + "benchmark", + "benchmarks", + "exercise", + "exercises", + "htmlcov", # Coverage.py + # ---- Hidden directories/Private packages ---- + "[._]*", + ) + + DEFAULT_EXCLUDE = tuple(chain_iter((p, f"{p}.*") for p in _EXCLUDE)) + """Reserved package names""" + + @staticmethod + def _looks_like_package(_path: StrPath, package_name: str) -> bool: + names = package_name.split('.') + # Consider PEP 561 + root_pkg_is_valid = names[0].isidentifier() or names[0].endswith("-stubs") + return root_pkg_is_valid and all(name.isidentifier() for name in names[1:]) + + +class FlatLayoutModuleFinder(ModuleFinder): + DEFAULT_EXCLUDE = ( + "setup", + "conftest", + "test", + "tests", + "example", + "examples", + "build", + # ---- Task runners ---- + "toxfile", + "noxfile", + "pavement", + "dodo", + "tasks", + "fabfile", + # ---- Other tools ---- + "[Ss][Cc]onstruct", # SCons + "conanfile", # Connan: C/C++ build tool + "manage", # Django + "benchmark", + "benchmarks", + "exercise", + "exercises", + # ---- Hidden files/Private modules ---- + "[._]*", + ) + """Reserved top-level module names""" + + +def _find_packages_within(root_pkg: str, pkg_dir: StrPath) -> list[str]: + nested = PEP420PackageFinder.find(pkg_dir) + return [root_pkg] + [".".join((root_pkg, n)) for n in nested] + + +class ConfigDiscovery: + """Fill-in metadata and options that can be automatically derived + (from other metadata/options, the file system or conventions) + """ + + def __init__(self, distribution: Distribution) -> None: + self.dist = distribution + self._called = False + self._disabled = False + self._skip_ext_modules = False + + def _disable(self): + """Internal API to disable automatic discovery""" + self._disabled = True + + def _ignore_ext_modules(self): + """Internal API to disregard ext_modules. + + Normally auto-discovery would not be triggered if ``ext_modules`` are set + (this is done for backward compatibility with existing packages relying on + ``setup.py`` or ``setup.cfg``). However, ``setuptools`` can call this function + to ignore given ``ext_modules`` and proceed with the auto-discovery if + ``packages`` and ``py_modules`` are not given (e.g. when using pyproject.toml + metadata). + """ + self._skip_ext_modules = True + + @property + def _root_dir(self) -> StrPath: + # The best is to wait until `src_root` is set in dist, before using _root_dir. + return self.dist.src_root or os.curdir + + @property + def _package_dir(self) -> dict[str, str]: + if self.dist.package_dir is None: + return {} + return self.dist.package_dir + + def __call__( + self, force: bool = False, name: bool = True, ignore_ext_modules: bool = False + ): + """Automatically discover missing configuration fields + and modifies the given ``distribution`` object in-place. + + Note that by default this will only have an effect the first time the + ``ConfigDiscovery`` object is called. + + To repeatedly invoke automatic discovery (e.g. when the project + directory changes), please use ``force=True`` (or create a new + ``ConfigDiscovery`` instance). + """ + if force is False and (self._called or self._disabled): + # Avoid overhead of multiple calls + return + + self._analyse_package_layout(ignore_ext_modules) + if name: + self.analyse_name() # depends on ``packages`` and ``py_modules`` + + self._called = True + + def _explicitly_specified(self, ignore_ext_modules: bool) -> bool: + """``True`` if the user has specified some form of package/module listing""" + ignore_ext_modules = ignore_ext_modules or self._skip_ext_modules + ext_modules = not (self.dist.ext_modules is None or ignore_ext_modules) + return ( + self.dist.packages is not None + or self.dist.py_modules is not None + or ext_modules + or hasattr(self.dist, "configuration") + and self.dist.configuration + # ^ Some projects use numpy.distutils.misc_util.Configuration + ) + + def _analyse_package_layout(self, ignore_ext_modules: bool) -> bool: + if self._explicitly_specified(ignore_ext_modules): + # For backward compatibility, just try to find modules/packages + # when nothing is given + return True + + log.debug( + "No `packages` or `py_modules` configuration, performing " + "automatic discovery." + ) + + return ( + self._analyse_explicit_layout() + or self._analyse_src_layout() + # flat-layout is the trickiest for discovery so it should be last + or self._analyse_flat_layout() + ) + + def _analyse_explicit_layout(self) -> bool: + """The user can explicitly give a package layout via ``package_dir``""" + package_dir = self._package_dir.copy() # don't modify directly + package_dir.pop("", None) # This falls under the "src-layout" umbrella + root_dir = self._root_dir + + if not package_dir: + return False + + log.debug(f"`explicit-layout` detected -- analysing {package_dir}") + pkgs = chain_iter( + _find_packages_within(pkg, os.path.join(root_dir, parent_dir)) + for pkg, parent_dir in package_dir.items() + ) + self.dist.packages = list(pkgs) + log.debug(f"discovered packages -- {self.dist.packages}") + return True + + def _analyse_src_layout(self) -> bool: + """Try to find all packages or modules under the ``src`` directory + (or anything pointed by ``package_dir[""]``). + + The "src-layout" is relatively safe for automatic discovery. + We assume that everything within is meant to be included in the + distribution. + + If ``package_dir[""]`` is not given, but the ``src`` directory exists, + this function will set ``package_dir[""] = "src"``. + """ + package_dir = self._package_dir + src_dir = os.path.join(self._root_dir, package_dir.get("", "src")) + if not os.path.isdir(src_dir): + return False + + log.debug(f"`src-layout` detected -- analysing {src_dir}") + package_dir.setdefault("", os.path.basename(src_dir)) + self.dist.package_dir = package_dir # persist eventual modifications + self.dist.packages = PEP420PackageFinder.find(src_dir) + self.dist.py_modules = ModuleFinder.find(src_dir) + log.debug(f"discovered packages -- {self.dist.packages}") + log.debug(f"discovered py_modules -- {self.dist.py_modules}") + return True + + def _analyse_flat_layout(self) -> bool: + """Try to find all packages and modules under the project root. + + Since the ``flat-layout`` is more dangerous in terms of accidentally including + extra files/directories, this function is more conservative and will raise an + error if multiple packages or modules are found. + + This assumes that multi-package dists are uncommon and refuse to support that + use case in order to be able to prevent unintended errors. + """ + log.debug(f"`flat-layout` detected -- analysing {self._root_dir}") + return self._analyse_flat_packages() or self._analyse_flat_modules() + + def _analyse_flat_packages(self) -> bool: + self.dist.packages = FlatLayoutPackageFinder.find(self._root_dir) + top_level = remove_nested_packages(remove_stubs(self.dist.packages)) + log.debug(f"discovered packages -- {self.dist.packages}") + self._ensure_no_accidental_inclusion(top_level, "packages") + return bool(top_level) + + def _analyse_flat_modules(self) -> bool: + self.dist.py_modules = FlatLayoutModuleFinder.find(self._root_dir) + log.debug(f"discovered py_modules -- {self.dist.py_modules}") + self._ensure_no_accidental_inclusion(self.dist.py_modules, "modules") + return bool(self.dist.py_modules) + + def _ensure_no_accidental_inclusion(self, detected: list[str], kind: str): + if len(detected) > 1: + from inspect import cleandoc + + from setuptools.errors import PackageDiscoveryError + + msg = f"""Multiple top-level {kind} discovered in a flat-layout: {detected}. + + To avoid accidental inclusion of unwanted files or directories, + setuptools will not proceed with this build. + + If you are trying to create a single distribution with multiple {kind} + on purpose, you should not rely on automatic discovery. + Instead, consider the following options: + + 1. set up custom discovery (`find` directive with `include` or `exclude`) + 2. use a `src-layout` + 3. explicitly set `py_modules` or `packages` with a list of names + + To find more information, look for "package discovery" on setuptools docs. + """ + raise PackageDiscoveryError(cleandoc(msg)) + + def analyse_name(self) -> None: + """The packages/modules are the essential contribution of the author. + Therefore the name of the distribution can be derived from them. + """ + if self.dist.metadata.name or self.dist.name: + # get_name() is not reliable (can return "UNKNOWN") + return + + log.debug("No `name` configuration, performing automatic discovery") + + name = ( + self._find_name_single_package_or_module() + or self._find_name_from_packages() + ) + if name: + self.dist.metadata.name = name + + def _find_name_single_package_or_module(self) -> str | None: + """Exactly one module or package""" + for field in ('packages', 'py_modules'): + items = getattr(self.dist, field, None) or [] + if items and len(items) == 1: + log.debug(f"Single module/package detected, name: {items[0]}") + return items[0] + + return None + + def _find_name_from_packages(self) -> str | None: + """Try to find the root package that is not a PEP 420 namespace""" + if not self.dist.packages: + return None + + packages = remove_stubs(sorted(self.dist.packages, key=len)) + package_dir = self.dist.package_dir or {} + + parent_pkg = find_parent_package(packages, package_dir, self._root_dir) + if parent_pkg: + log.debug(f"Common parent package detected, name: {parent_pkg}") + return parent_pkg + + log.warn("No parent package detected, impossible to derive `name`") + return None + + +def remove_nested_packages(packages: list[str]) -> list[str]: + """Remove nested packages from a list of packages. + + >>> remove_nested_packages(["a", "a.b1", "a.b2", "a.b1.c1"]) + ['a'] + >>> remove_nested_packages(["a", "b", "c.d", "c.d.e.f", "g.h", "a.a1"]) + ['a', 'b', 'c.d', 'g.h'] + """ + pkgs = sorted(packages, key=len) + top_level = pkgs[:] + size = len(pkgs) + for i, name in enumerate(reversed(pkgs)): + if any(name.startswith(f"{other}.") for other in top_level): + top_level.pop(size - i - 1) + + return top_level + + +def remove_stubs(packages: list[str]) -> list[str]: + """Remove type stubs (:pep:`561`) from a list of packages. + + >>> remove_stubs(["a", "a.b", "a-stubs", "a-stubs.b.c", "b", "c-stubs"]) + ['a', 'a.b', 'b'] + """ + return [pkg for pkg in packages if not pkg.split(".")[0].endswith("-stubs")] + + +def find_parent_package( + packages: list[str], package_dir: Mapping[str, str], root_dir: StrPath +) -> str | None: + """Find the parent package that is not a namespace.""" + packages = sorted(packages, key=len) + common_ancestors = [] + for i, name in enumerate(packages): + if not all(n.startswith(f"{name}.") for n in packages[i + 1 :]): + # Since packages are sorted by length, this condition is able + # to find a list of all common ancestors. + # When there is divergence (e.g. multiple root packages) + # the list will be empty + break + common_ancestors.append(name) + + for name in common_ancestors: + pkg_path = find_package_path(name, package_dir, root_dir) + init = os.path.join(pkg_path, "__init__.py") + if os.path.isfile(init): + return name + + return None + + +def find_package_path( + name: str, package_dir: Mapping[str, str], root_dir: StrPath +) -> str: + """Given a package name, return the path where it should be found on + disk, considering the ``package_dir`` option. + + >>> path = find_package_path("my.pkg", {"": "root/is/nested"}, ".") + >>> path.replace(os.sep, "/") + './root/is/nested/my/pkg' + + >>> path = find_package_path("my.pkg", {"my": "root/is/nested"}, ".") + >>> path.replace(os.sep, "/") + './root/is/nested/pkg' + + >>> path = find_package_path("my.pkg", {"my.pkg": "root/is/nested"}, ".") + >>> path.replace(os.sep, "/") + './root/is/nested' + + >>> path = find_package_path("other.pkg", {"my.pkg": "root/is/nested"}, ".") + >>> path.replace(os.sep, "/") + './other/pkg' + """ + parts = name.split(".") + for i in range(len(parts), 0, -1): + # Look backwards, the most specific package_dir first + partial_name = ".".join(parts[:i]) + if partial_name in package_dir: + parent = package_dir[partial_name] + return os.path.join(root_dir, parent, *parts[i:]) + + parent = package_dir.get("") or "" + return os.path.join(root_dir, *parent.split("/"), *parts) + + +def construct_package_dir(packages: list[str], package_path: StrPath) -> dict[str, str]: + parent_pkgs = remove_nested_packages(packages) + prefix = Path(package_path).parts + return {pkg: "/".join([*prefix, *pkg.split(".")]) for pkg in parent_pkgs} diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/dist.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/dist.py new file mode 100644 index 0000000000000000000000000000000000000000..02496512672dde2d55bf3bf55deab7e4729d7787 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/dist.py @@ -0,0 +1,1004 @@ +from __future__ import annotations + +import io +import itertools +import numbers +import os +import re +import sys +from collections.abc import Iterable, MutableMapping, Sequence +from glob import iglob +from pathlib import Path +from typing import TYPE_CHECKING, Any, Union + +from more_itertools import partition, unique_everseen +from packaging.markers import InvalidMarker, Marker +from packaging.specifiers import InvalidSpecifier, SpecifierSet +from packaging.version import Version + +from . import ( + _entry_points, + _reqs, + _static, + command as _, # noqa: F401 # imported for side-effects +) +from ._importlib import metadata +from ._path import StrPath +from ._reqs import _StrOrIter +from .config import pyprojecttoml, setupcfg +from .discovery import ConfigDiscovery +from .monkey import get_unpatched +from .warnings import InformationOnly, SetuptoolsDeprecationWarning + +import distutils.cmd +import distutils.command +import distutils.core +import distutils.dist +import distutils.log +from distutils.debug import DEBUG +from distutils.errors import DistutilsOptionError, DistutilsSetupError +from distutils.fancy_getopt import translate_longopt +from distutils.util import strtobool + +if TYPE_CHECKING: + from typing_extensions import TypeAlias + + from pkg_resources import Distribution as _pkg_resources_Distribution + + +__all__ = ['Distribution'] + +_sequence = tuple, list +""" +:meta private: + +Supported iterable types that are known to be: +- ordered (which `set` isn't) +- not match a str (which `Sequence[str]` does) +- not imply a nested type (like `dict`) +for use with `isinstance`. +""" +_Sequence: TypeAlias = Union[tuple[str, ...], list[str]] +# This is how stringifying _Sequence would look in Python 3.10 +_sequence_type_repr = "tuple[str, ...] | list[str]" +_OrderedStrSequence: TypeAlias = Union[str, dict[str, Any], Sequence[str]] +""" +:meta private: +Avoid single-use iterable. Disallow sets. +A poor approximation of an OrderedSequence (dict doesn't match a Sequence). +""" + + +def __getattr__(name: str) -> Any: # pragma: no cover + if name == "sequence": + SetuptoolsDeprecationWarning.emit( + "`setuptools.dist.sequence` is an internal implementation detail.", + "Please define your own `sequence = tuple, list` instead.", + due_date=(2025, 8, 28), # Originally added on 2024-08-27 + ) + return _sequence + raise AttributeError(f"module {__name__!r} has no attribute {name!r}") + + +def check_importable(dist, attr, value): + try: + ep = metadata.EntryPoint(value=value, name=None, group=None) + assert not ep.extras + except (TypeError, ValueError, AttributeError, AssertionError) as e: + raise DistutilsSetupError( + f"{attr!r} must be importable 'module:attrs' string (got {value!r})" + ) from e + + +def assert_string_list(dist, attr: str, value: _Sequence) -> None: + """Verify that value is a string list""" + try: + # verify that value is a list or tuple to exclude unordered + # or single-use iterables + assert isinstance(value, _sequence) + # verify that elements of value are strings + assert ''.join(value) != value + except (TypeError, ValueError, AttributeError, AssertionError) as e: + raise DistutilsSetupError( + f"{attr!r} must be of type <{_sequence_type_repr}> (got {value!r})" + ) from e + + +def check_nsp(dist, attr, value): + """Verify that namespace packages are valid""" + ns_packages = value + assert_string_list(dist, attr, ns_packages) + for nsp in ns_packages: + if not dist.has_contents_for(nsp): + raise DistutilsSetupError( + f"Distribution contains no modules or packages for namespace package {nsp!r}" + ) + parent, _sep, _child = nsp.rpartition('.') + if parent and parent not in ns_packages: + distutils.log.warn( + "WARNING: %r is declared as a package namespace, but %r" + " is not: please correct this in setup.py", + nsp, + parent, + ) + SetuptoolsDeprecationWarning.emit( + "The namespace_packages parameter is deprecated.", + "Please replace its usage with implicit namespaces (PEP 420).", + see_docs="references/keywords.html#keyword-namespace-packages", + # TODO: define due_date, it may break old packages that are no longer + # maintained (e.g. sphinxcontrib extensions) when installed from source. + # Warning officially introduced in May 2022, however the deprecation + # was mentioned much earlier in the docs (May 2020, see #2149). + ) + + +def check_extras(dist, attr, value): + """Verify that extras_require mapping is valid""" + try: + list(itertools.starmap(_check_extra, value.items())) + except (TypeError, ValueError, AttributeError) as e: + raise DistutilsSetupError( + "'extras_require' must be a dictionary whose values are " + "strings or lists of strings containing valid project/version " + "requirement specifiers." + ) from e + + +def _check_extra(extra, reqs): + _name, _sep, marker = extra.partition(':') + try: + _check_marker(marker) + except InvalidMarker: + msg = f"Invalid environment marker: {marker} ({extra!r})" + raise DistutilsSetupError(msg) from None + list(_reqs.parse(reqs)) + + +def _check_marker(marker): + if not marker: + return + m = Marker(marker) + m.evaluate() + + +def assert_bool(dist, attr, value): + """Verify that value is True, False, 0, or 1""" + if bool(value) != value: + raise DistutilsSetupError(f"{attr!r} must be a boolean value (got {value!r})") + + +def invalid_unless_false(dist, attr, value): + if not value: + DistDeprecationWarning.emit(f"{attr} is ignored.") + # TODO: should there be a `due_date` here? + return + raise DistutilsSetupError(f"{attr} is invalid.") + + +def check_requirements(dist, attr: str, value: _OrderedStrSequence) -> None: + """Verify that install_requires is a valid requirements list""" + try: + list(_reqs.parse(value)) + if isinstance(value, set): + raise TypeError("Unordered types are not allowed") + except (TypeError, ValueError) as error: + msg = ( + f"{attr!r} must be a string or iterable of strings " + f"containing valid project/version requirement specifiers; {error}" + ) + raise DistutilsSetupError(msg) from error + + +def check_specifier(dist, attr, value): + """Verify that value is a valid version specifier""" + try: + SpecifierSet(value) + except (InvalidSpecifier, AttributeError) as error: + msg = f"{attr!r} must be a string containing valid version specifiers; {error}" + raise DistutilsSetupError(msg) from error + + +def check_entry_points(dist, attr, value): + """Verify that entry_points map is parseable""" + try: + _entry_points.load(value) + except Exception as e: + raise DistutilsSetupError(e) from e + + +def check_package_data(dist, attr, value): + """Verify that value is a dictionary of package names to glob lists""" + if not isinstance(value, dict): + raise DistutilsSetupError( + f"{attr!r} must be a dictionary mapping package names to lists of " + "string wildcard patterns" + ) + for k, v in value.items(): + if not isinstance(k, str): + raise DistutilsSetupError( + f"keys of {attr!r} dict must be strings (got {k!r})" + ) + assert_string_list(dist, f'values of {attr!r} dict', v) + + +def check_packages(dist, attr, value): + for pkgname in value: + if not re.match(r'\w+(\.\w+)*', pkgname): + distutils.log.warn( + "WARNING: %r not a valid package name; please use only " + ".-separated package names in setup.py", + pkgname, + ) + + +if TYPE_CHECKING: + # Work around a mypy issue where type[T] can't be used as a base: https://github.com/python/mypy/issues/10962 + from distutils.core import Distribution as _Distribution +else: + _Distribution = get_unpatched(distutils.core.Distribution) + + +class Distribution(_Distribution): + """Distribution with support for tests and package data + + This is an enhanced version of 'distutils.dist.Distribution' that + effectively adds the following new optional keyword arguments to 'setup()': + + 'install_requires' -- a string or sequence of strings specifying project + versions that the distribution requires when installed, in the format + used by 'pkg_resources.require()'. They will be installed + automatically when the package is installed. If you wish to use + packages that are not available in PyPI, or want to give your users an + alternate download location, you can add a 'find_links' option to the + '[easy_install]' section of your project's 'setup.cfg' file, and then + setuptools will scan the listed web pages for links that satisfy the + requirements. + + 'extras_require' -- a dictionary mapping names of optional "extras" to the + additional requirement(s) that using those extras incurs. For example, + this:: + + extras_require = dict(reST = ["docutils>=0.3", "reSTedit"]) + + indicates that the distribution can optionally provide an extra + capability called "reST", but it can only be used if docutils and + reSTedit are installed. If the user installs your package using + EasyInstall and requests one of your extras, the corresponding + additional requirements will be installed if needed. + + 'package_data' -- a dictionary mapping package names to lists of filenames + or globs to use to find data files contained in the named packages. + If the dictionary has filenames or globs listed under '""' (the empty + string), those names will be searched for in every package, in addition + to any names for the specific package. Data files found using these + names/globs will be installed along with the package, in the same + location as the package. Note that globs are allowed to reference + the contents of non-package subdirectories, as long as you use '/' as + a path separator. (Globs are automatically converted to + platform-specific paths at runtime.) + + In addition to these new keywords, this class also has several new methods + for manipulating the distribution's contents. For example, the 'include()' + and 'exclude()' methods can be thought of as in-place add and subtract + commands that add or remove packages, modules, extensions, and so on from + the distribution. + """ + + _DISTUTILS_UNSUPPORTED_METADATA = { + 'long_description_content_type': lambda: None, + 'project_urls': dict, + 'provides_extras': dict, # behaves like an ordered set + 'license_file': lambda: None, + 'license_files': lambda: None, + 'install_requires': list, + 'extras_require': dict, + } + + # Used by build_py, editable_wheel and install_lib commands for legacy namespaces + namespace_packages: list[str] #: :meta private: DEPRECATED + + # Any: Dynamic assignment results in Incompatible types in assignment + def __init__(self, attrs: MutableMapping[str, Any] | None = None) -> None: + have_package_data = hasattr(self, "package_data") + if not have_package_data: + self.package_data: dict[str, list[str]] = {} + attrs = attrs or {} + self.dist_files: list[tuple[str, str, str]] = [] + self.include_package_data: bool | None = None + self.exclude_package_data: dict[str, list[str]] | None = None + # Filter-out setuptools' specific options. + self.src_root: str | None = attrs.pop("src_root", None) + self.dependency_links: list[str] = attrs.pop('dependency_links', []) + self.setup_requires: list[str] = attrs.pop('setup_requires', []) + for ep in metadata.entry_points(group='distutils.setup_keywords'): + vars(self).setdefault(ep.name, None) + + metadata_only = set(self._DISTUTILS_UNSUPPORTED_METADATA) + metadata_only -= {"install_requires", "extras_require"} + dist_attrs = {k: v for k, v in attrs.items() if k not in metadata_only} + _Distribution.__init__(self, dist_attrs) + + # Private API (setuptools-use only, not restricted to Distribution) + # Stores files that are referenced by the configuration and need to be in the + # sdist (e.g. `version = file: VERSION.txt`) + self._referenced_files = set[str]() + + self.set_defaults = ConfigDiscovery(self) + + self._set_metadata_defaults(attrs) + + self.metadata.version = self._normalize_version(self.metadata.version) + self._finalize_requires() + + def _validate_metadata(self): + required = {"name"} + provided = { + key + for key in vars(self.metadata) + if getattr(self.metadata, key, None) is not None + } + missing = required - provided + + if missing: + msg = f"Required package metadata is missing: {missing}" + raise DistutilsSetupError(msg) + + def _set_metadata_defaults(self, attrs): + """ + Fill-in missing metadata fields not supported by distutils. + Some fields may have been set by other tools (e.g. pbr). + Those fields (vars(self.metadata)) take precedence to + supplied attrs. + """ + for option, default in self._DISTUTILS_UNSUPPORTED_METADATA.items(): + vars(self.metadata).setdefault(option, attrs.get(option, default())) + + @staticmethod + def _normalize_version(version): + from . import sic + + if isinstance(version, numbers.Number): + # Some people apparently take "version number" too literally :) + version = str(version) + elif isinstance(version, sic) or version is None: + return version + + normalized = str(Version(version)) + if version != normalized: + InformationOnly.emit(f"Normalizing '{version}' to '{normalized}'") + return normalized + return version + + def _finalize_requires(self): + """ + Set `metadata.python_requires` and fix environment markers + in `install_requires` and `extras_require`. + """ + if getattr(self, 'python_requires', None): + self.metadata.python_requires = self.python_requires + + self._normalize_requires() + self.metadata.install_requires = self.install_requires + self.metadata.extras_require = self.extras_require + + if self.extras_require: + for extra in self.extras_require.keys(): + # Setuptools allows a weird ": syntax for extras + extra = extra.split(':')[0] + if extra: + self.metadata.provides_extras.setdefault(extra) + + def _normalize_requires(self): + """Make sure requirement-related attributes exist and are normalized""" + install_requires = getattr(self, "install_requires", None) or [] + extras_require = getattr(self, "extras_require", None) or {} + + # Preserve the "static"-ness of values parsed from config files + list_ = _static.List if _static.is_static(install_requires) else list + self.install_requires = list_(map(str, _reqs.parse(install_requires))) + + dict_ = _static.Dict if _static.is_static(extras_require) else dict + self.extras_require = dict_( + (k, list(map(str, _reqs.parse(v or [])))) for k, v in extras_require.items() + ) + + def _finalize_license_files(self) -> None: + """Compute names of all license files which should be included.""" + license_files: list[str] | None = self.metadata.license_files + patterns = license_files or [] + + license_file: str | None = self.metadata.license_file + if license_file and license_file not in patterns: + patterns.append(license_file) + + if license_files is None and license_file is None: + # Default patterns match the ones wheel uses + # See https://wheel.readthedocs.io/en/stable/user_guide.html + # -> 'Including license files in the generated wheel file' + patterns = ['LICEN[CS]E*', 'COPYING*', 'NOTICE*', 'AUTHORS*'] + + self.metadata.license_files = list( + unique_everseen(self._expand_patterns(patterns)) + ) + + @staticmethod + def _expand_patterns(patterns): + """ + >>> list(Distribution._expand_patterns(['LICENSE'])) + ['LICENSE'] + >>> list(Distribution._expand_patterns(['pyproject.toml', 'LIC*'])) + ['pyproject.toml', 'LICENSE'] + """ + return ( + path + for pattern in patterns + for path in sorted(iglob(pattern)) + if not path.endswith('~') and os.path.isfile(path) + ) + + # FIXME: 'Distribution._parse_config_files' is too complex (14) + def _parse_config_files(self, filenames=None): # noqa: C901 + """ + Adapted from distutils.dist.Distribution.parse_config_files, + this method provides the same functionality in subtly-improved + ways. + """ + from configparser import ConfigParser + + # Ignore install directory options if we have a venv + ignore_options = ( + [] + if sys.prefix == sys.base_prefix + else [ + 'install-base', + 'install-platbase', + 'install-lib', + 'install-platlib', + 'install-purelib', + 'install-headers', + 'install-scripts', + 'install-data', + 'prefix', + 'exec-prefix', + 'home', + 'user', + 'root', + ] + ) + + ignore_options = frozenset(ignore_options) + + if filenames is None: + filenames = self.find_config_files() + + if DEBUG: + self.announce("Distribution.parse_config_files():") + + parser = ConfigParser() + parser.optionxform = str + for filename in filenames: + with open(filename, encoding='utf-8') as reader: + if DEBUG: + self.announce(" reading {filename}".format(**locals())) + parser.read_file(reader) + for section in parser.sections(): + options = parser.options(section) + opt_dict = self.get_option_dict(section) + + for opt in options: + if opt == '__name__' or opt in ignore_options: + continue + + val = parser.get(section, opt) + opt = self.warn_dash_deprecation(opt, section) + opt = self.make_option_lowercase(opt, section) + opt_dict[opt] = (filename, val) + + # Make the ConfigParser forget everything (so we retain + # the original filenames that options come from) + parser.__init__() + + if 'global' not in self.command_options: + return + + # If there was a "global" section in the config file, use it + # to set Distribution options. + + for opt, (src, val) in self.command_options['global'].items(): + alias = self.negative_opt.get(opt) + if alias: + val = not strtobool(val) + elif opt in ('verbose', 'dry_run'): # ugh! + val = strtobool(val) + + try: + setattr(self, alias or opt, val) + except ValueError as e: + raise DistutilsOptionError(e) from e + + def warn_dash_deprecation(self, opt: str, section: str) -> str: + if section in ( + 'options.extras_require', + 'options.data_files', + ): + return opt + + underscore_opt = opt.replace('-', '_') + commands = list( + itertools.chain( + distutils.command.__all__, + self._setuptools_commands(), + ) + ) + if ( + not section.startswith('options') + and section != 'metadata' + and section not in commands + ): + return underscore_opt + + if '-' in opt: + SetuptoolsDeprecationWarning.emit( + "Invalid dash-separated options", + f""" + Usage of dash-separated {opt!r} will not be supported in future + versions. Please use the underscore name {underscore_opt!r} instead. + """, + see_docs="userguide/declarative_config.html", + due_date=(2025, 3, 3), + # Warning initially introduced in 3 Mar 2021 + ) + return underscore_opt + + def _setuptools_commands(self): + try: + entry_points = metadata.distribution('setuptools').entry_points + return {ep.name for ep in entry_points} # Avoid newer API for compatibility + except metadata.PackageNotFoundError: + # during bootstrapping, distribution doesn't exist + return [] + + def make_option_lowercase(self, opt: str, section: str) -> str: + if section != 'metadata' or opt.islower(): + return opt + + lowercase_opt = opt.lower() + SetuptoolsDeprecationWarning.emit( + "Invalid uppercase configuration", + f""" + Usage of uppercase key {opt!r} in {section!r} will not be supported in + future versions. Please use lowercase {lowercase_opt!r} instead. + """, + see_docs="userguide/declarative_config.html", + due_date=(2025, 3, 3), + # Warning initially introduced in 6 Mar 2021 + ) + return lowercase_opt + + # FIXME: 'Distribution._set_command_options' is too complex (14) + def _set_command_options(self, command_obj, option_dict=None): # noqa: C901 + """ + Set the options for 'command_obj' from 'option_dict'. Basically + this means copying elements of a dictionary ('option_dict') to + attributes of an instance ('command'). + + 'command_obj' must be a Command instance. If 'option_dict' is not + supplied, uses the standard option dictionary for this command + (from 'self.command_options'). + + (Adopted from distutils.dist.Distribution._set_command_options) + """ + command_name = command_obj.get_command_name() + if option_dict is None: + option_dict = self.get_option_dict(command_name) + + if DEBUG: + self.announce(f" setting options for '{command_name}' command:") + for option, (source, value) in option_dict.items(): + if DEBUG: + self.announce(f" {option} = {value} (from {source})") + try: + bool_opts = [translate_longopt(o) for o in command_obj.boolean_options] + except AttributeError: + bool_opts = [] + try: + neg_opt = command_obj.negative_opt + except AttributeError: + neg_opt = {} + + try: + is_string = isinstance(value, str) + if option in neg_opt and is_string: + setattr(command_obj, neg_opt[option], not strtobool(value)) + elif option in bool_opts and is_string: + setattr(command_obj, option, strtobool(value)) + elif hasattr(command_obj, option): + setattr(command_obj, option, value) + else: + raise DistutilsOptionError( + f"error in {source}: command '{command_name}' has no such option '{option}'" + ) + except ValueError as e: + raise DistutilsOptionError(e) from e + + def _get_project_config_files(self, filenames: Iterable[StrPath] | None): + """Add default file and split between INI and TOML""" + tomlfiles = [] + standard_project_metadata = Path(self.src_root or os.curdir, "pyproject.toml") + if filenames is not None: + parts = partition(lambda f: Path(f).suffix == ".toml", filenames) + filenames = list(parts[0]) # 1st element => predicate is False + tomlfiles = list(parts[1]) # 2nd element => predicate is True + elif standard_project_metadata.exists(): + tomlfiles = [standard_project_metadata] + return filenames, tomlfiles + + def parse_config_files( + self, + filenames: Iterable[StrPath] | None = None, + ignore_option_errors: bool = False, + ) -> None: + """Parses configuration files from various levels + and loads configuration. + """ + inifiles, tomlfiles = self._get_project_config_files(filenames) + + self._parse_config_files(filenames=inifiles) + + setupcfg.parse_configuration( + self, self.command_options, ignore_option_errors=ignore_option_errors + ) + for filename in tomlfiles: + pyprojecttoml.apply_configuration(self, filename, ignore_option_errors) + + self._finalize_requires() + self._finalize_license_files() + + def fetch_build_eggs( + self, requires: _StrOrIter + ) -> list[_pkg_resources_Distribution]: + """Resolve pre-setup requirements""" + from .installer import _fetch_build_eggs + + return _fetch_build_eggs(self, requires) + + def finalize_options(self) -> None: + """ + Allow plugins to apply arbitrary operations to the + distribution. Each hook may optionally define a 'order' + to influence the order of execution. Smaller numbers + go first and the default is 0. + """ + group = 'setuptools.finalize_distribution_options' + + def by_order(hook): + return getattr(hook, 'order', 0) + + defined = metadata.entry_points(group=group) + filtered = itertools.filterfalse(self._removed, defined) + loaded = map(lambda e: e.load(), filtered) + for ep in sorted(loaded, key=by_order): + ep(self) + + @staticmethod + def _removed(ep): + """ + When removing an entry point, if metadata is loaded + from an older version of Setuptools, that removed + entry point will attempt to be loaded and will fail. + See #2765 for more details. + """ + removed = { + # removed 2021-09-05 + '2to3_doctests', + } + return ep.name in removed + + def _finalize_setup_keywords(self): + for ep in metadata.entry_points(group='distutils.setup_keywords'): + value = getattr(self, ep.name, None) + if value is not None: + ep.load()(self, ep.name, value) + + def get_egg_cache_dir(self): + from . import windows_support + + egg_cache_dir = os.path.join(os.curdir, '.eggs') + if not os.path.exists(egg_cache_dir): + os.mkdir(egg_cache_dir) + windows_support.hide_file(egg_cache_dir) + readme_txt_filename = os.path.join(egg_cache_dir, 'README.txt') + with open(readme_txt_filename, 'w', encoding="utf-8") as f: + f.write( + 'This directory contains eggs that were downloaded ' + 'by setuptools to build, test, and run plug-ins.\n\n' + ) + f.write( + 'This directory caches those eggs to prevent ' + 'repeated downloads.\n\n' + ) + f.write('However, it is safe to delete this directory.\n\n') + + return egg_cache_dir + + def fetch_build_egg(self, req): + """Fetch an egg needed for building""" + from .installer import fetch_build_egg + + return fetch_build_egg(self, req) + + def get_command_class(self, command: str) -> type[distutils.cmd.Command]: # type: ignore[override] # Not doing complex overrides yet + """Pluggable version of get_command_class()""" + if command in self.cmdclass: + return self.cmdclass[command] + + # Special case bdist_wheel so it's never loaded from "wheel" + if command == 'bdist_wheel': + from .command.bdist_wheel import bdist_wheel + + return bdist_wheel + + eps = metadata.entry_points(group='distutils.commands', name=command) + for ep in eps: + self.cmdclass[command] = cmdclass = ep.load() + return cmdclass + else: + return _Distribution.get_command_class(self, command) + + def print_commands(self): + for ep in metadata.entry_points(group='distutils.commands'): + if ep.name not in self.cmdclass: + cmdclass = ep.load() + self.cmdclass[ep.name] = cmdclass + return _Distribution.print_commands(self) + + def get_command_list(self): + for ep in metadata.entry_points(group='distutils.commands'): + if ep.name not in self.cmdclass: + cmdclass = ep.load() + self.cmdclass[ep.name] = cmdclass + return _Distribution.get_command_list(self) + + def include(self, **attrs) -> None: + """Add items to distribution that are named in keyword arguments + + For example, 'dist.include(py_modules=["x"])' would add 'x' to + the distribution's 'py_modules' attribute, if it was not already + there. + + Currently, this method only supports inclusion for attributes that are + lists or tuples. If you need to add support for adding to other + attributes in this or a subclass, you can add an '_include_X' method, + where 'X' is the name of the attribute. The method will be called with + the value passed to 'include()'. So, 'dist.include(foo={"bar":"baz"})' + will try to call 'dist._include_foo({"bar":"baz"})', which can then + handle whatever special inclusion logic is needed. + """ + for k, v in attrs.items(): + include = getattr(self, '_include_' + k, None) + if include: + include(v) + else: + self._include_misc(k, v) + + def exclude_package(self, package: str) -> None: + """Remove packages, modules, and extensions in named package""" + + pfx = package + '.' + if self.packages: + self.packages = [ + p for p in self.packages if p != package and not p.startswith(pfx) + ] + + if self.py_modules: + self.py_modules = [ + p for p in self.py_modules if p != package and not p.startswith(pfx) + ] + + if self.ext_modules: + self.ext_modules = [ + p + for p in self.ext_modules + if p.name != package and not p.name.startswith(pfx) + ] + + def has_contents_for(self, package: str) -> bool: + """Return true if 'exclude_package(package)' would do something""" + + pfx = package + '.' + + for p in self.iter_distribution_names(): + if p == package or p.startswith(pfx): + return True + + return False + + def _exclude_misc(self, name: str, value: _Sequence) -> None: + """Handle 'exclude()' for list/tuple attrs without a special handler""" + if not isinstance(value, _sequence): + raise DistutilsSetupError( + f"{name}: setting must be of type <{_sequence_type_repr}> (got {value!r})" + ) + try: + old = getattr(self, name) + except AttributeError as e: + raise DistutilsSetupError(f"{name}: No such distribution setting") from e + if old is not None and not isinstance(old, _sequence): + raise DistutilsSetupError( + name + ": this setting cannot be changed via include/exclude" + ) + elif old: + setattr(self, name, [item for item in old if item not in value]) + + def _include_misc(self, name: str, value: _Sequence) -> None: + """Handle 'include()' for list/tuple attrs without a special handler""" + + if not isinstance(value, _sequence): + raise DistutilsSetupError( + f"{name}: setting must be of type <{_sequence_type_repr}> (got {value!r})" + ) + try: + old = getattr(self, name) + except AttributeError as e: + raise DistutilsSetupError(f"{name}: No such distribution setting") from e + if old is None: + setattr(self, name, value) + elif not isinstance(old, _sequence): + raise DistutilsSetupError( + name + ": this setting cannot be changed via include/exclude" + ) + else: + new = [item for item in value if item not in old] + setattr(self, name, list(old) + new) + + def exclude(self, **attrs) -> None: + """Remove items from distribution that are named in keyword arguments + + For example, 'dist.exclude(py_modules=["x"])' would remove 'x' from + the distribution's 'py_modules' attribute. Excluding packages uses + the 'exclude_package()' method, so all of the package's contained + packages, modules, and extensions are also excluded. + + Currently, this method only supports exclusion from attributes that are + lists or tuples. If you need to add support for excluding from other + attributes in this or a subclass, you can add an '_exclude_X' method, + where 'X' is the name of the attribute. The method will be called with + the value passed to 'exclude()'. So, 'dist.exclude(foo={"bar":"baz"})' + will try to call 'dist._exclude_foo({"bar":"baz"})', which can then + handle whatever special exclusion logic is needed. + """ + for k, v in attrs.items(): + exclude = getattr(self, '_exclude_' + k, None) + if exclude: + exclude(v) + else: + self._exclude_misc(k, v) + + def _exclude_packages(self, packages: _Sequence) -> None: + if not isinstance(packages, _sequence): + raise DistutilsSetupError( + f"packages: setting must be of type <{_sequence_type_repr}> (got {packages!r})" + ) + list(map(self.exclude_package, packages)) + + def _parse_command_opts(self, parser, args): + # Remove --with-X/--without-X options when processing command args + self.global_options = self.__class__.global_options + self.negative_opt = self.__class__.negative_opt + + # First, expand any aliases + command = args[0] + aliases = self.get_option_dict('aliases') + while command in aliases: + _src, alias = aliases[command] + del aliases[command] # ensure each alias can expand only once! + import shlex + + args[:1] = shlex.split(alias, True) + command = args[0] + + nargs = _Distribution._parse_command_opts(self, parser, args) + + # Handle commands that want to consume all remaining arguments + cmd_class = self.get_command_class(command) + if getattr(cmd_class, 'command_consumes_arguments', None): + self.get_option_dict(command)['args'] = ("command line", nargs) + if nargs is not None: + return [] + + return nargs + + def get_cmdline_options(self) -> dict[str, dict[str, str | None]]: + """Return a '{cmd: {opt:val}}' map of all command-line options + + Option names are all long, but do not include the leading '--', and + contain dashes rather than underscores. If the option doesn't take + an argument (e.g. '--quiet'), the 'val' is 'None'. + + Note that options provided by config files are intentionally excluded. + """ + + d: dict[str, dict[str, str | None]] = {} + + for cmd, opts in self.command_options.items(): + val: str | None + for opt, (src, val) in opts.items(): + if src != "command line": + continue + + opt = opt.replace('_', '-') + + if val == 0: + cmdobj = self.get_command_obj(cmd) + neg_opt = self.negative_opt.copy() + neg_opt.update(getattr(cmdobj, 'negative_opt', {})) + for neg, pos in neg_opt.items(): + if pos == opt: + opt = neg + val = None + break + else: + raise AssertionError("Shouldn't be able to get here") + + elif val == 1: + val = None + + d.setdefault(cmd, {})[opt] = val + + return d + + def iter_distribution_names(self): + """Yield all packages, modules, and extension names in distribution""" + + yield from self.packages or () + + yield from self.py_modules or () + + for ext in self.ext_modules or (): + if isinstance(ext, tuple): + name, _buildinfo = ext + else: + name = ext.name + if name.endswith('module'): + name = name[:-6] + yield name + + def handle_display_options(self, option_order): + """If there were any non-global "display-only" options + (--help-commands or the metadata display options) on the command + line, display the requested info and return true; else return + false. + """ + import sys + + if self.help_commands: + return _Distribution.handle_display_options(self, option_order) + + # Stdout may be StringIO (e.g. in tests) + if not isinstance(sys.stdout, io.TextIOWrapper): + return _Distribution.handle_display_options(self, option_order) + + # Don't wrap stdout if utf-8 is already the encoding. Provides + # workaround for #334. + if sys.stdout.encoding.lower() in ('utf-8', 'utf8'): + return _Distribution.handle_display_options(self, option_order) + + # Print metadata in UTF-8 no matter the platform + encoding = sys.stdout.encoding + sys.stdout.reconfigure(encoding='utf-8') + try: + return _Distribution.handle_display_options(self, option_order) + finally: + sys.stdout.reconfigure(encoding=encoding) + + def run_command(self, command) -> None: + self.set_defaults() + # Postpone defaults until all explicit configuration is considered + # (setup() args, config files, command line and plugins) + + super().run_command(command) + + +class DistDeprecationWarning(SetuptoolsDeprecationWarning): + """Class for warning about deprecations in dist in + setuptools. Not ignored by default, unlike DeprecationWarning.""" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/errors.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/errors.py new file mode 100644 index 0000000000000000000000000000000000000000..990ecbf4e2f18eb188addc9e0466152a20193a90 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/errors.py @@ -0,0 +1,67 @@ +"""setuptools.errors + +Provides exceptions used by setuptools modules. +""" + +from __future__ import annotations + +from distutils import errors as _distutils_errors + +# Re-export errors from distutils to facilitate the migration to PEP632 + +ByteCompileError = _distutils_errors.DistutilsByteCompileError +CCompilerError = _distutils_errors.CCompilerError +ClassError = _distutils_errors.DistutilsClassError +CompileError = _distutils_errors.CompileError +ExecError = _distutils_errors.DistutilsExecError +FileError = _distutils_errors.DistutilsFileError +InternalError = _distutils_errors.DistutilsInternalError +LibError = _distutils_errors.LibError +LinkError = _distutils_errors.LinkError +ModuleError = _distutils_errors.DistutilsModuleError +OptionError = _distutils_errors.DistutilsOptionError +PlatformError = _distutils_errors.DistutilsPlatformError +PreprocessError = _distutils_errors.PreprocessError +SetupError = _distutils_errors.DistutilsSetupError +TemplateError = _distutils_errors.DistutilsTemplateError +UnknownFileError = _distutils_errors.UnknownFileError + +# The root error class in the hierarchy +BaseError = _distutils_errors.DistutilsError + + +class InvalidConfigError(OptionError): # type: ignore[valid-type, misc] # distutils imports are `Any` on python 3.12+ + """Error used for invalid configurations.""" + + +class RemovedConfigError(OptionError): # type: ignore[valid-type, misc] # distutils imports are `Any` on python 3.12+ + """Error used for configurations that were deprecated and removed.""" + + +class RemovedCommandError(BaseError, RuntimeError): # type: ignore[valid-type, misc] # distutils imports are `Any` on python 3.12+ + """Error used for commands that have been removed in setuptools. + + Since ``setuptools`` is built on ``distutils``, simply removing a command + from ``setuptools`` will make the behavior fall back to ``distutils``; this + error is raised if a command exists in ``distutils`` but has been actively + removed in ``setuptools``. + """ + + +class PackageDiscoveryError(BaseError, RuntimeError): # type: ignore[valid-type, misc] # distutils imports are `Any` on python 3.12+ + """Impossible to perform automatic discovery of packages and/or modules. + + The current project layout or given discovery options can lead to problems when + scanning the project directory. + + Setuptools might also refuse to complete auto-discovery if an error prone condition + is detected (e.g. when a project is organised as a flat-layout but contains + multiple directories that can be taken as top-level packages inside a single + distribution [*]_). In these situations the users are encouraged to be explicit + about which packages to include or to make the discovery parameters more specific. + + .. [*] Since multi-package distributions are uncommon it is very likely that the + developers did not intend for all the directories to be packaged, and are just + leaving auxiliary code in the repository top-level, such as maintenance-related + scripts. + """ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/extension.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/extension.py new file mode 100644 index 0000000000000000000000000000000000000000..76e03d9d6bdcdbd72e443e90c85d34429c22c261 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/extension.py @@ -0,0 +1,177 @@ +from __future__ import annotations + +import functools +import re +from typing import TYPE_CHECKING + +from setuptools._path import StrPath + +from .monkey import get_unpatched + +import distutils.core +import distutils.errors +import distutils.extension + + +def _have_cython(): + """ + Return True if Cython can be imported. + """ + cython_impl = 'Cython.Distutils.build_ext' + try: + # from (cython_impl) import build_ext + __import__(cython_impl, fromlist=['build_ext']).build_ext + except Exception: + return False + return True + + +# for compatibility +have_pyrex = _have_cython +if TYPE_CHECKING: + # Work around a mypy issue where type[T] can't be used as a base: https://github.com/python/mypy/issues/10962 + from distutils.core import Extension as _Extension +else: + _Extension = get_unpatched(distutils.core.Extension) + + +class Extension(_Extension): + """ + Describes a single extension module. + + This means that all source files will be compiled into a single binary file + ``.`` (with ```` derived from ``name`` and + ```` defined by one of the values in + ``importlib.machinery.EXTENSION_SUFFIXES``). + + In the case ``.pyx`` files are passed as ``sources and`` ``Cython`` is **not** + installed in the build environment, ``setuptools`` may also try to look for the + equivalent ``.cpp`` or ``.c`` files. + + :arg str name: + the full name of the extension, including any packages -- ie. + *not* a filename or pathname, but Python dotted name + + :arg list[str|os.PathLike[str]] sources: + list of source filenames, relative to the distribution root + (where the setup script lives), in Unix form (slash-separated) + for portability. Source files may be C, C++, SWIG (.i), + platform-specific resource files, or whatever else is recognized + by the "build_ext" command as source for a Python extension. + + :keyword list[str] include_dirs: + list of directories to search for C/C++ header files (in Unix + form for portability) + + :keyword list[tuple[str, str|None]] define_macros: + list of macros to define; each macro is defined using a 2-tuple: + the first item corresponding to the name of the macro and the second + item either a string with its value or None to + define it without a particular value (equivalent of "#define + FOO" in source or -DFOO on Unix C compiler command line) + + :keyword list[str] undef_macros: + list of macros to undefine explicitly + + :keyword list[str] library_dirs: + list of directories to search for C/C++ libraries at link time + + :keyword list[str] libraries: + list of library names (not filenames or paths) to link against + + :keyword list[str] runtime_library_dirs: + list of directories to search for C/C++ libraries at run time + (for shared extensions, this is when the extension is loaded). + Setting this will cause an exception during build on Windows + platforms. + + :keyword list[str] extra_objects: + list of extra files to link with (eg. object files not implied + by 'sources', static library that must be explicitly specified, + binary resource files, etc.) + + :keyword list[str] extra_compile_args: + any extra platform- and compiler-specific information to use + when compiling the source files in 'sources'. For platforms and + compilers where "command line" makes sense, this is typically a + list of command-line arguments, but for other platforms it could + be anything. + + :keyword list[str] extra_link_args: + any extra platform- and compiler-specific information to use + when linking object files together to create the extension (or + to create a new static Python interpreter). Similar + interpretation as for 'extra_compile_args'. + + :keyword list[str] export_symbols: + list of symbols to be exported from a shared extension. Not + used on all platforms, and not generally necessary for Python + extensions, which typically export exactly one symbol: "init" + + extension_name. + + :keyword list[str] swig_opts: + any extra options to pass to SWIG if a source file has the .i + extension. + + :keyword list[str] depends: + list of files that the extension depends on + + :keyword str language: + extension language (i.e. "c", "c++", "objc"). Will be detected + from the source extensions if not provided. + + :keyword bool optional: + specifies that a build failure in the extension should not abort the + build process, but simply not install the failing extension. + + :keyword bool py_limited_api: + opt-in flag for the usage of :doc:`Python's limited API `. + + :raises setuptools.errors.PlatformError: if ``runtime_library_dirs`` is + specified on Windows. (since v63) + """ + + # These 4 are set and used in setuptools/command/build_ext.py + # The lack of a default value and risk of `AttributeError` is purposeful + # to avoid people forgetting to call finalize_options if they modify the extension list. + # See example/rationale in https://github.com/pypa/setuptools/issues/4529. + _full_name: str #: Private API, internal use only. + _links_to_dynamic: bool #: Private API, internal use only. + _needs_stub: bool #: Private API, internal use only. + _file_name: str #: Private API, internal use only. + + def __init__( + self, + name: str, + sources: list[StrPath], + *args, + py_limited_api: bool = False, + **kw, + ) -> None: + # The *args is needed for compatibility as calls may use positional + # arguments. py_limited_api may be set only via keyword. + self.py_limited_api = py_limited_api + super().__init__( + name, + sources, # type: ignore[arg-type] # Vendored version of setuptools supports PathLike + *args, + **kw, + ) + + def _convert_pyx_sources_to_lang(self): + """ + Replace sources with .pyx extensions to sources with the target + language extension. This mechanism allows language authors to supply + pre-converted sources but to prefer the .pyx sources. + """ + if _have_cython(): + # the build has Cython, so allow it to compile the .pyx files + return + lang = self.language or '' + target_ext = '.cpp' if lang.lower() == 'c++' else '.c' + sub = functools.partial(re.sub, '.pyx$', target_ext) + self.sources = list(map(sub, self.sources)) + + +class Library(Extension): + """Just like a regular Extension, but built as a library instead""" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/glob.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/glob.py new file mode 100644 index 0000000000000000000000000000000000000000..1dfff2cd50ff87b8cef9d936f1fc9d4a2478b136 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/glob.py @@ -0,0 +1,185 @@ +""" +Filename globbing utility. Mostly a copy of `glob` from Python 3.5. + +Changes include: + * `yield from` and PEP3102 `*` removed. + * Hidden files are not ignored. +""" + +from __future__ import annotations + +import fnmatch +import os +import re +from collections.abc import Iterable, Iterator +from typing import TYPE_CHECKING, AnyStr, overload + +if TYPE_CHECKING: + from _typeshed import BytesPath, StrOrBytesPath, StrPath + +__all__ = ["glob", "iglob", "escape"] + + +def glob(pathname: AnyStr, recursive: bool = False) -> list[AnyStr]: + """Return a list of paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. + """ + return list(iglob(pathname, recursive=recursive)) + + +def iglob(pathname: AnyStr, recursive: bool = False) -> Iterator[AnyStr]: + """Return an iterator which yields the paths matching a pathname pattern. + + The pattern may contain simple shell-style wildcards a la + fnmatch. However, unlike fnmatch, filenames starting with a + dot are special cases that are not matched by '*' and '?' + patterns. + + If recursive is true, the pattern '**' will match any files and + zero or more directories and subdirectories. + """ + it = _iglob(pathname, recursive) + if recursive and _isrecursive(pathname): + s = next(it) # skip empty string + assert not s + return it + + +def _iglob(pathname: AnyStr, recursive: bool) -> Iterator[AnyStr]: + dirname, basename = os.path.split(pathname) + glob_in_dir = glob2 if recursive and _isrecursive(basename) else glob1 + + if not has_magic(pathname): + if basename: + if os.path.lexists(pathname): + yield pathname + else: + # Patterns ending with a slash should match only directories + if os.path.isdir(dirname): + yield pathname + return + + if not dirname: + yield from glob_in_dir(dirname, basename) + return + # `os.path.split()` returns the argument itself as a dirname if it is a + # drive or UNC path. Prevent an infinite recursion if a drive or UNC path + # contains magic characters (i.e. r'\\?\C:'). + if dirname != pathname and has_magic(dirname): + dirs: Iterable[AnyStr] = _iglob(dirname, recursive) + else: + dirs = [dirname] + if not has_magic(basename): + glob_in_dir = glob0 + for dirname in dirs: + for name in glob_in_dir(dirname, basename): + yield os.path.join(dirname, name) + + +# These 2 helper functions non-recursively glob inside a literal directory. +# They return a list of basenames. `glob1` accepts a pattern while `glob0` +# takes a literal basename (so it only has to check for its existence). + + +@overload +def glob1(dirname: StrPath, pattern: str) -> list[str]: ... +@overload +def glob1(dirname: BytesPath, pattern: bytes) -> list[bytes]: ... +def glob1(dirname: StrOrBytesPath, pattern: str | bytes) -> list[str] | list[bytes]: + if not dirname: + if isinstance(pattern, bytes): + dirname = os.curdir.encode('ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except OSError: + return [] + # mypy false-positives: str or bytes type possibility is always kept in sync + return fnmatch.filter(names, pattern) # type: ignore[type-var, return-value] + + +def glob0(dirname, basename): + if not basename: + # `os.path.split()` returns an empty basename for paths ending with a + # directory separator. 'q*x/' should match only directories. + if os.path.isdir(dirname): + return [basename] + else: + if os.path.lexists(os.path.join(dirname, basename)): + return [basename] + return [] + + +# This helper function recursively yields relative pathnames inside a literal +# directory. + + +@overload +def glob2(dirname: StrPath, pattern: str) -> Iterator[str]: ... +@overload +def glob2(dirname: BytesPath, pattern: bytes) -> Iterator[bytes]: ... +def glob2(dirname: StrOrBytesPath, pattern: str | bytes) -> Iterator[str | bytes]: + assert _isrecursive(pattern) + yield pattern[:0] + yield from _rlistdir(dirname) + + +# Recursively yields relative pathnames inside a literal directory. +@overload +def _rlistdir(dirname: StrPath) -> Iterator[str]: ... +@overload +def _rlistdir(dirname: BytesPath) -> Iterator[bytes]: ... +def _rlistdir(dirname: StrOrBytesPath) -> Iterator[str | bytes]: + if not dirname: + if isinstance(dirname, bytes): + dirname = os.curdir.encode('ASCII') + else: + dirname = os.curdir + try: + names = os.listdir(dirname) + except OSError: + return + for x in names: + yield x + # mypy false-positives: str or bytes type possibility is always kept in sync + path = os.path.join(dirname, x) if dirname else x # type: ignore[arg-type] + for y in _rlistdir(path): + yield os.path.join(x, y) # type: ignore[arg-type] + + +magic_check = re.compile('([*?[])') +magic_check_bytes = re.compile(b'([*?[])') + + +def has_magic(s: str | bytes) -> bool: + if isinstance(s, bytes): + return magic_check_bytes.search(s) is not None + else: + return magic_check.search(s) is not None + + +def _isrecursive(pattern: str | bytes) -> bool: + if isinstance(pattern, bytes): + return pattern == b'**' + else: + return pattern == '**' + + +def escape(pathname): + """Escape all special characters.""" + # Escaping is done by wrapping any of "*?[" between square brackets. + # Metacharacters do not work in the drive part and shouldn't be escaped. + drive, pathname = os.path.splitdrive(pathname) + if isinstance(pathname, bytes): + pathname = magic_check_bytes.sub(rb'[\1]', pathname) + else: + pathname = magic_check.sub(r'[\1]', pathname) + return drive + pathname diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/gui-32.exe b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/gui-32.exe new file mode 100644 index 0000000000000000000000000000000000000000..1eb430c6d614a5daea4139badc09c222a4b0e72a Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/gui-32.exe differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/gui-arm64.exe b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/gui-arm64.exe new file mode 100644 index 0000000000000000000000000000000000000000..1e00ffacb182c2af206e5dd9d9fbc41d236da0d1 Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/gui-arm64.exe differ diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/installer.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/installer.py new file mode 100644 index 0000000000000000000000000000000000000000..64bc2def078bab5e4fb6aba26981e7b73bfc37a5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/installer.py @@ -0,0 +1,150 @@ +from __future__ import annotations + +import glob +import os +import subprocess +import sys +import tempfile +from functools import partial + +from pkg_resources import Distribution + +from . import _reqs +from ._reqs import _StrOrIter +from .warnings import SetuptoolsDeprecationWarning +from .wheel import Wheel + +from distutils import log +from distutils.errors import DistutilsError + + +def _fixup_find_links(find_links): + """Ensure find-links option end-up being a list of strings.""" + if isinstance(find_links, str): + return find_links.split() + assert isinstance(find_links, (tuple, list)) + return find_links + + +def fetch_build_egg(dist, req): + """Fetch an egg needed for building. + + Use pip/wheel to fetch/build a wheel.""" + _DeprecatedInstaller.emit() + _warn_wheel_not_available(dist) + return _fetch_build_egg_no_warn(dist, req) + + +def _fetch_build_eggs(dist, requires: _StrOrIter) -> list[Distribution]: + import pkg_resources # Delay import to avoid unnecessary side-effects + + _DeprecatedInstaller.emit(stacklevel=3) + _warn_wheel_not_available(dist) + + resolved_dists = pkg_resources.working_set.resolve( + _reqs.parse(requires, pkg_resources.Requirement), # required for compatibility + installer=partial(_fetch_build_egg_no_warn, dist), # avoid warning twice + replace_conflicting=True, + ) + for dist in resolved_dists: + pkg_resources.working_set.add(dist, replace=True) + return resolved_dists + + +def _fetch_build_egg_no_warn(dist, req): # noqa: C901 # is too complex (16) # FIXME + import pkg_resources # Delay import to avoid unnecessary side-effects + + # Ignore environment markers; if supplied, it is required. + req = strip_marker(req) + # Take easy_install options into account, but do not override relevant + # pip environment variables (like PIP_INDEX_URL or PIP_QUIET); they'll + # take precedence. + opts = dist.get_option_dict('easy_install') + if 'allow_hosts' in opts: + raise DistutilsError( + 'the `allow-hosts` option is not supported ' + 'when using pip to install requirements.' + ) + quiet = 'PIP_QUIET' not in os.environ and 'PIP_VERBOSE' not in os.environ + if 'PIP_INDEX_URL' in os.environ: + index_url = None + elif 'index_url' in opts: + index_url = opts['index_url'][1] + else: + index_url = None + find_links = ( + _fixup_find_links(opts['find_links'][1])[:] if 'find_links' in opts else [] + ) + if dist.dependency_links: + find_links.extend(dist.dependency_links) + eggs_dir = os.path.realpath(dist.get_egg_cache_dir()) + environment = pkg_resources.Environment() + for egg_dist in pkg_resources.find_distributions(eggs_dir): + if egg_dist in req and environment.can_add(egg_dist): + return egg_dist + with tempfile.TemporaryDirectory() as tmpdir: + cmd = [ + sys.executable, + '-m', + 'pip', + '--disable-pip-version-check', + 'wheel', + '--no-deps', + '-w', + tmpdir, + ] + if quiet: + cmd.append('--quiet') + if index_url is not None: + cmd.extend(('--index-url', index_url)) + for link in find_links or []: + cmd.extend(('--find-links', link)) + # If requirement is a PEP 508 direct URL, directly pass + # the URL to pip, as `req @ url` does not work on the + # command line. + cmd.append(req.url or str(req)) + try: + subprocess.check_call(cmd) + except subprocess.CalledProcessError as e: + raise DistutilsError(str(e)) from e + wheel = Wheel(glob.glob(os.path.join(tmpdir, '*.whl'))[0]) + dist_location = os.path.join(eggs_dir, wheel.egg_name()) + wheel.install_as_egg(dist_location) + dist_metadata = pkg_resources.PathMetadata( + dist_location, os.path.join(dist_location, 'EGG-INFO') + ) + return pkg_resources.Distribution.from_filename( + dist_location, metadata=dist_metadata + ) + + +def strip_marker(req): + """ + Return a new requirement without the environment marker to avoid + calling pip with something like `babel; extra == "i18n"`, which + would always be ignored. + """ + import pkg_resources # Delay import to avoid unnecessary side-effects + + # create a copy to avoid mutating the input + req = pkg_resources.Requirement.parse(str(req)) + req.marker = None + return req + + +def _warn_wheel_not_available(dist): + import pkg_resources # Delay import to avoid unnecessary side-effects + + try: + pkg_resources.get_distribution('wheel') + except pkg_resources.DistributionNotFound: + dist.announce('WARNING: The wheel package is not available.', log.WARN) + + +class _DeprecatedInstaller(SetuptoolsDeprecationWarning): + _SUMMARY = "setuptools.installer and fetch_build_eggs are deprecated." + _DETAILS = """ + Requirements should be satisfied by a PEP 517 installer. + If you are using pip, you can try `pip install --use-pep517`. + """ + # _DUE_DATE not decided yet diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/launch.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/launch.py new file mode 100644 index 0000000000000000000000000000000000000000..0d162647d55777d7afa1bf1e44a6c200a3f82419 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/launch.py @@ -0,0 +1,36 @@ +""" +Launch the Python script on the command line after +setuptools is bootstrapped via import. +""" + +# Note that setuptools gets imported implicitly by the +# invocation of this script using python -m setuptools.launch + +import sys +import tokenize + + +def run() -> None: + """ + Run the script in sys.argv[1] as if it had + been invoked naturally. + """ + __builtins__ + script_name = sys.argv[1] + namespace = dict( + __file__=script_name, + __name__='__main__', + __doc__=None, + ) + sys.argv[:] = sys.argv[1:] + + open_ = getattr(tokenize, 'open', open) + with open_(script_name) as fid: + script = fid.read() + norm_script = script.replace('\\r\\n', '\\n') + code = compile(norm_script, script_name, 'exec') + exec(code, namespace) + + +if __name__ == '__main__': + run() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/logging.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/logging.py new file mode 100644 index 0000000000000000000000000000000000000000..532da899f7dc02f9fea9a44c429086b98fe043d8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/logging.py @@ -0,0 +1,40 @@ +import inspect +import logging +import sys + +from . import monkey + +import distutils.log + + +def _not_warning(record): + return record.levelno < logging.WARNING + + +def configure() -> None: + """ + Configure logging to emit warning and above to stderr + and everything else to stdout. This behavior is provided + for compatibility with distutils.log but may change in + the future. + """ + err_handler = logging.StreamHandler() + err_handler.setLevel(logging.WARNING) + out_handler = logging.StreamHandler(sys.stdout) + out_handler.addFilter(_not_warning) + handlers = err_handler, out_handler + logging.basicConfig( + format="{message}", style='{', handlers=handlers, level=logging.DEBUG + ) + if inspect.ismodule(distutils.dist.log): + monkey.patch_func(set_threshold, distutils.log, 'set_threshold') + # For some reason `distutils.log` module is getting cached in `distutils.dist` + # and then loaded again when patched, + # implying: id(distutils.log) != id(distutils.dist.log). + # Make sure the same module object is used everywhere: + distutils.dist.log = distutils.log + + +def set_threshold(level: int) -> int: + logging.root.setLevel(level * 10) + return set_threshold.unpatched(level) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/modified.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/modified.py new file mode 100644 index 0000000000000000000000000000000000000000..6ba02fab68734e1e96fd50d7c4b6ffb1442717fb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/modified.py @@ -0,0 +1,18 @@ +try: + # Ensure a DistutilsError raised by these methods is the same as distutils.errors.DistutilsError + from distutils._modified import ( + newer, + newer_group, + newer_pairwise, + newer_pairwise_group, + ) +except ImportError: + # fallback for SETUPTOOLS_USE_DISTUTILS=stdlib, because _modified never existed in stdlib + from ._distutils._modified import ( + newer, + newer_group, + newer_pairwise, + newer_pairwise_group, + ) + +__all__ = ['newer', 'newer_pairwise', 'newer_group', 'newer_pairwise_group'] diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/monkey.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/monkey.py new file mode 100644 index 0000000000000000000000000000000000000000..6ad1abac295c1df613942b8896edf089a103ae1f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/monkey.py @@ -0,0 +1,126 @@ +""" +Monkey patching of distutils. +""" + +from __future__ import annotations + +import inspect +import platform +import sys +import types +from typing import TypeVar, cast, overload + +import distutils.filelist + +_T = TypeVar("_T") +_UnpatchT = TypeVar("_UnpatchT", type, types.FunctionType) + + +__all__: list[str] = [] +""" +Everything is private. Contact the project team +if you think you need this functionality. +""" + + +def _get_mro(cls): + """ + Returns the bases classes for cls sorted by the MRO. + + Works around an issue on Jython where inspect.getmro will not return all + base classes if multiple classes share the same name. Instead, this + function will return a tuple containing the class itself, and the contents + of cls.__bases__. See https://github.com/pypa/setuptools/issues/1024. + """ + if platform.python_implementation() == "Jython": + return (cls,) + cls.__bases__ + return inspect.getmro(cls) + + +@overload +def get_unpatched(item: _UnpatchT) -> _UnpatchT: ... +@overload +def get_unpatched(item: object) -> None: ... +def get_unpatched( + item: type | types.FunctionType | object, +) -> type | types.FunctionType | None: + if isinstance(item, type): + return get_unpatched_class(item) + if isinstance(item, types.FunctionType): + return get_unpatched_function(item) + return None + + +def get_unpatched_class(cls: type[_T]) -> type[_T]: + """Protect against re-patching the distutils if reloaded + + Also ensures that no other distutils extension monkeypatched the distutils + first. + """ + external_bases = ( + cast(type[_T], cls) + for cls in _get_mro(cls) + if not cls.__module__.startswith('setuptools') + ) + base = next(external_bases) + if not base.__module__.startswith('distutils'): + msg = f"distutils has already been patched by {cls!r}" + raise AssertionError(msg) + return base + + +def patch_all(): + import setuptools + + # we can't patch distutils.cmd, alas + distutils.core.Command = setuptools.Command # type: ignore[misc,assignment] # monkeypatching + + _patch_distribution_metadata() + + # Install Distribution throughout the distutils + for module in distutils.dist, distutils.core, distutils.cmd: + module.Distribution = setuptools.dist.Distribution + + # Install the patched Extension + distutils.core.Extension = setuptools.extension.Extension # type: ignore[misc,assignment] # monkeypatching + distutils.extension.Extension = setuptools.extension.Extension # type: ignore[misc,assignment] # monkeypatching + if 'distutils.command.build_ext' in sys.modules: + sys.modules[ + 'distutils.command.build_ext' + ].Extension = setuptools.extension.Extension + + +def _patch_distribution_metadata(): + from . import _core_metadata + + """Patch write_pkg_file and read_pkg_file for higher metadata standards""" + for attr in ( + 'write_pkg_info', + 'write_pkg_file', + 'read_pkg_file', + 'get_metadata_version', + 'get_fullname', + ): + new_val = getattr(_core_metadata, attr) + setattr(distutils.dist.DistributionMetadata, attr, new_val) + + +def patch_func(replacement, target_mod, func_name): + """ + Patch func_name in target_mod with replacement + + Important - original must be resolved by name to avoid + patching an already patched function. + """ + original = getattr(target_mod, func_name) + + # set the 'unpatched' attribute on the replacement to + # point to the original. + vars(replacement).setdefault('unpatched', original) + + # replace the function in the original module + setattr(target_mod, func_name, replacement) + + +def get_unpatched_function(candidate): + return candidate.unpatched diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/msvc.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/msvc.py new file mode 100644 index 0000000000000000000000000000000000000000..9c9a63568ef7a3fcc838bbb2088622d88b323d6b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/msvc.py @@ -0,0 +1,1527 @@ +""" +Environment info about Microsoft Compilers. + +>>> getfixture('windows_only') +>>> ei = EnvironmentInfo('amd64') +""" + +from __future__ import annotations + +import contextlib +import itertools +import json +import os +import os.path +import platform +from typing import TYPE_CHECKING, TypedDict + +from more_itertools import unique_everseen + +import distutils.errors + +if TYPE_CHECKING: + from typing_extensions import LiteralString, NotRequired + +# https://github.com/python/mypy/issues/8166 +if not TYPE_CHECKING and platform.system() == 'Windows': + import winreg + from os import environ +else: + # Mock winreg and environ so the module can be imported on this platform. + + class winreg: + HKEY_USERS = None + HKEY_CURRENT_USER = None + HKEY_LOCAL_MACHINE = None + HKEY_CLASSES_ROOT = None + + environ: dict[str, str] = dict() + + +class PlatformInfo: + """ + Current and Target Architectures information. + + Parameters + ---------- + arch: str + Target architecture. + """ + + current_cpu = environ.get('processor_architecture', '').lower() + + def __init__(self, arch) -> None: + self.arch = arch.lower().replace('x64', 'amd64') + + @property + def target_cpu(self): + """ + Return Target CPU architecture. + + Return + ------ + str + Target CPU + """ + return self.arch[self.arch.find('_') + 1 :] + + def target_is_x86(self): + """ + Return True if target CPU is x86 32 bits.. + + Return + ------ + bool + CPU is x86 32 bits + """ + return self.target_cpu == 'x86' + + def current_is_x86(self): + """ + Return True if current CPU is x86 32 bits.. + + Return + ------ + bool + CPU is x86 32 bits + """ + return self.current_cpu == 'x86' + + def current_dir(self, hidex86=False, x64=False) -> str: + """ + Current platform specific subfolder. + + Parameters + ---------- + hidex86: bool + return '' and not '\x86' if architecture is x86. + x64: bool + return '\x64' and not '\amd64' if architecture is amd64. + + Return + ------ + str + subfolder: '\target', or '' (see hidex86 parameter) + """ + return ( + '' + if (self.current_cpu == 'x86' and hidex86) + else r'\x64' + if (self.current_cpu == 'amd64' and x64) + else rf'\{self.current_cpu}' + ) + + def target_dir(self, hidex86=False, x64=False) -> str: + r""" + Target platform specific subfolder. + + Parameters + ---------- + hidex86: bool + return '' and not '\x86' if architecture is x86. + x64: bool + return '\x64' and not '\amd64' if architecture is amd64. + + Return + ------ + str + subfolder: '\current', or '' (see hidex86 parameter) + """ + return ( + '' + if (self.target_cpu == 'x86' and hidex86) + else r'\x64' + if (self.target_cpu == 'amd64' and x64) + else rf'\{self.target_cpu}' + ) + + def cross_dir(self, forcex86=False): + r""" + Cross platform specific subfolder. + + Parameters + ---------- + forcex86: bool + Use 'x86' as current architecture even if current architecture is + not x86. + + Return + ------ + str + subfolder: '' if target architecture is current architecture, + '\current_target' if not. + """ + current = 'x86' if forcex86 else self.current_cpu + return ( + '' + if self.target_cpu == current + else self.target_dir().replace('\\', f'\\{current}_') + ) + + +class RegistryInfo: + """ + Microsoft Visual Studio related registry information. + + Parameters + ---------- + platform_info: PlatformInfo + "PlatformInfo" instance. + """ + + HKEYS = ( + winreg.HKEY_USERS, + winreg.HKEY_CURRENT_USER, + winreg.HKEY_LOCAL_MACHINE, + winreg.HKEY_CLASSES_ROOT, + ) + + def __init__(self, platform_info) -> None: + self.pi = platform_info + + @property + def visualstudio(self) -> str: + """ + Microsoft Visual Studio root registry key. + + Return + ------ + str + Registry key + """ + return 'VisualStudio' + + @property + def sxs(self): + """ + Microsoft Visual Studio SxS registry key. + + Return + ------ + str + Registry key + """ + return os.path.join(self.visualstudio, 'SxS') + + @property + def vc(self): + """ + Microsoft Visual C++ VC7 registry key. + + Return + ------ + str + Registry key + """ + return os.path.join(self.sxs, 'VC7') + + @property + def vs(self): + """ + Microsoft Visual Studio VS7 registry key. + + Return + ------ + str + Registry key + """ + return os.path.join(self.sxs, 'VS7') + + @property + def vc_for_python(self) -> str: + """ + Microsoft Visual C++ for Python registry key. + + Return + ------ + str + Registry key + """ + return r'DevDiv\VCForPython' + + @property + def microsoft_sdk(self) -> str: + """ + Microsoft SDK registry key. + + Return + ------ + str + Registry key + """ + return 'Microsoft SDKs' + + @property + def windows_sdk(self): + """ + Microsoft Windows/Platform SDK registry key. + + Return + ------ + str + Registry key + """ + return os.path.join(self.microsoft_sdk, 'Windows') + + @property + def netfx_sdk(self): + """ + Microsoft .NET Framework SDK registry key. + + Return + ------ + str + Registry key + """ + return os.path.join(self.microsoft_sdk, 'NETFXSDK') + + @property + def windows_kits_roots(self) -> str: + """ + Microsoft Windows Kits Roots registry key. + + Return + ------ + str + Registry key + """ + return r'Windows Kits\Installed Roots' + + def microsoft(self, key, x86=False): + """ + Return key in Microsoft software registry. + + Parameters + ---------- + key: str + Registry key path where look. + x86: str + Force x86 software registry. + + Return + ------ + str + Registry key + """ + node64 = '' if self.pi.current_is_x86() or x86 else 'Wow6432Node' + return os.path.join('Software', node64, 'Microsoft', key) + + def lookup(self, key, name): + """ + Look for values in registry in Microsoft software registry. + + Parameters + ---------- + key: str + Registry key path where look. + name: str + Value name to find. + + Return + ------ + str + value + """ + key_read = winreg.KEY_READ + openkey = winreg.OpenKey + closekey = winreg.CloseKey + ms = self.microsoft + for hkey in self.HKEYS: + bkey = None + try: + bkey = openkey(hkey, ms(key), 0, key_read) + except OSError: + if not self.pi.current_is_x86(): + try: + bkey = openkey(hkey, ms(key, True), 0, key_read) + except OSError: + continue + else: + continue + try: + return winreg.QueryValueEx(bkey, name)[0] + except OSError: + pass + finally: + if bkey: + closekey(bkey) + return None + + +class SystemInfo: + """ + Microsoft Windows and Visual Studio related system information. + + Parameters + ---------- + registry_info: RegistryInfo + "RegistryInfo" instance. + vc_ver: float + Required Microsoft Visual C++ version. + """ + + # Variables and properties in this class use originals CamelCase variables + # names from Microsoft source files for more easy comparison. + WinDir = environ.get('WinDir', '') + ProgramFiles = environ.get('ProgramFiles', '') + ProgramFilesx86 = environ.get('ProgramFiles(x86)', ProgramFiles) + + def __init__(self, registry_info, vc_ver=None) -> None: + self.ri = registry_info + self.pi = self.ri.pi + + self.known_vs_paths = self.find_programdata_vs_vers() + + # Except for VS15+, VC version is aligned with VS version + self.vs_ver = self.vc_ver = vc_ver or self._find_latest_available_vs_ver() + + def _find_latest_available_vs_ver(self): + """ + Find the latest VC version + + Return + ------ + float + version + """ + reg_vc_vers = self.find_reg_vs_vers() + + if not (reg_vc_vers or self.known_vs_paths): + raise distutils.errors.DistutilsPlatformError( + 'No Microsoft Visual C++ version found' + ) + + vc_vers = set(reg_vc_vers) + vc_vers.update(self.known_vs_paths) + return sorted(vc_vers)[-1] + + def find_reg_vs_vers(self): + """ + Find Microsoft Visual Studio versions available in registry. + + Return + ------ + list of float + Versions + """ + ms = self.ri.microsoft + vckeys = (self.ri.vc, self.ri.vc_for_python, self.ri.vs) + vs_vers = [] + for hkey, key in itertools.product(self.ri.HKEYS, vckeys): + try: + bkey = winreg.OpenKey(hkey, ms(key), 0, winreg.KEY_READ) + except OSError: + continue + with bkey: + subkeys, values, _ = winreg.QueryInfoKey(bkey) + for i in range(values): + with contextlib.suppress(ValueError): + ver = float(winreg.EnumValue(bkey, i)[0]) + if ver not in vs_vers: + vs_vers.append(ver) + for i in range(subkeys): + with contextlib.suppress(ValueError): + ver = float(winreg.EnumKey(bkey, i)) + if ver not in vs_vers: + vs_vers.append(ver) + return sorted(vs_vers) + + def find_programdata_vs_vers(self) -> dict[float, str]: + r""" + Find Visual studio 2017+ versions from information in + "C:\ProgramData\Microsoft\VisualStudio\Packages\_Instances". + + Return + ------ + dict + float version as key, path as value. + """ + vs_versions: dict[float, str] = {} + instances_dir = r'C:\ProgramData\Microsoft\VisualStudio\Packages\_Instances' + + try: + hashed_names = os.listdir(instances_dir) + + except OSError: + # Directory not exists with all Visual Studio versions + return vs_versions + + for name in hashed_names: + try: + # Get VS installation path from "state.json" file + state_path = os.path.join(instances_dir, name, 'state.json') + with open(state_path, 'rt', encoding='utf-8') as state_file: + state = json.load(state_file) + vs_path = state['installationPath'] + + # Raises OSError if this VS installation does not contain VC + os.listdir(os.path.join(vs_path, r'VC\Tools\MSVC')) + + # Store version and path + vs_versions[self._as_float_version(state['installationVersion'])] = ( + vs_path + ) + + except (OSError, KeyError): + # Skip if "state.json" file is missing or bad format + continue + + return vs_versions + + @staticmethod + def _as_float_version(version): + """ + Return a string version as a simplified float version (major.minor) + + Parameters + ---------- + version: str + Version. + + Return + ------ + float + version + """ + return float('.'.join(version.split('.')[:2])) + + @property + def VSInstallDir(self): + """ + Microsoft Visual Studio directory. + + Return + ------ + str + path + """ + # Default path + default = os.path.join( + self.ProgramFilesx86, f'Microsoft Visual Studio {self.vs_ver:0.1f}' + ) + + # Try to get path from registry, if fail use default path + return self.ri.lookup(self.ri.vs, f'{self.vs_ver:0.1f}') or default + + @property + def VCInstallDir(self): + """ + Microsoft Visual C++ directory. + + Return + ------ + str + path + """ + path = self._guess_vc() or self._guess_vc_legacy() + + if not os.path.isdir(path): + msg = 'Microsoft Visual C++ directory not found' + raise distutils.errors.DistutilsPlatformError(msg) + + return path + + def _guess_vc(self): + """ + Locate Visual C++ for VS2017+. + + Return + ------ + str + path + """ + if self.vs_ver <= 14.0: + return '' + + try: + # First search in known VS paths + vs_dir = self.known_vs_paths[self.vs_ver] + except KeyError: + # Else, search with path from registry + vs_dir = self.VSInstallDir + + guess_vc = os.path.join(vs_dir, r'VC\Tools\MSVC') + + # Subdir with VC exact version as name + try: + # Update the VC version with real one instead of VS version + vc_ver = os.listdir(guess_vc)[-1] + self.vc_ver = self._as_float_version(vc_ver) + return os.path.join(guess_vc, vc_ver) + except (OSError, IndexError): + return '' + + def _guess_vc_legacy(self): + """ + Locate Visual C++ for versions prior to 2017. + + Return + ------ + str + path + """ + default = os.path.join( + self.ProgramFilesx86, + rf'Microsoft Visual Studio {self.vs_ver:0.1f}\VC', + ) + + # Try to get "VC++ for Python" path from registry as default path + reg_path = os.path.join(self.ri.vc_for_python, f'{self.vs_ver:0.1f}') + python_vc = self.ri.lookup(reg_path, 'installdir') + default_vc = os.path.join(python_vc, 'VC') if python_vc else default + + # Try to get path from registry, if fail use default path + return self.ri.lookup(self.ri.vc, f'{self.vs_ver:0.1f}') or default_vc + + @property + def WindowsSdkVersion(self) -> tuple[LiteralString, ...]: + """ + Microsoft Windows SDK versions for specified MSVC++ version. + + Return + ------ + tuple of str + versions + """ + if self.vs_ver <= 9.0: + return '7.0', '6.1', '6.0a' + elif self.vs_ver == 10.0: + return '7.1', '7.0a' + elif self.vs_ver == 11.0: + return '8.0', '8.0a' + elif self.vs_ver == 12.0: + return '8.1', '8.1a' + elif self.vs_ver >= 14.0: + return '10.0', '8.1' + return () + + @property + def WindowsSdkLastVersion(self): + """ + Microsoft Windows SDK last version. + + Return + ------ + str + version + """ + return self._use_last_dir_name(os.path.join(self.WindowsSdkDir, 'lib')) + + @property + def WindowsSdkDir(self) -> str | None: # noqa: C901 # is too complex (12) # FIXME + """ + Microsoft Windows SDK directory. + + Return + ------ + str + path + """ + sdkdir: str | None = '' + for ver in self.WindowsSdkVersion: + # Try to get it from registry + loc = os.path.join(self.ri.windows_sdk, f'v{ver}') + sdkdir = self.ri.lookup(loc, 'installationfolder') + if sdkdir: + break + if not sdkdir or not os.path.isdir(sdkdir): + # Try to get "VC++ for Python" version from registry + path = os.path.join(self.ri.vc_for_python, f'{self.vc_ver:0.1f}') + install_base = self.ri.lookup(path, 'installdir') + if install_base: + sdkdir = os.path.join(install_base, 'WinSDK') + if not sdkdir or not os.path.isdir(sdkdir): + # If fail, use default new path + for ver in self.WindowsSdkVersion: + intver = ver[: ver.rfind('.')] + path = rf'Microsoft SDKs\Windows Kits\{intver}' + d = os.path.join(self.ProgramFiles, path) + if os.path.isdir(d): + sdkdir = d + if not sdkdir or not os.path.isdir(sdkdir): + # If fail, use default old path + for ver in self.WindowsSdkVersion: + path = rf'Microsoft SDKs\Windows\v{ver}' + d = os.path.join(self.ProgramFiles, path) + if os.path.isdir(d): + sdkdir = d + if not sdkdir: + # If fail, use Platform SDK + sdkdir = os.path.join(self.VCInstallDir, 'PlatformSDK') + return sdkdir + + @property + def WindowsSDKExecutablePath(self): + """ + Microsoft Windows SDK executable directory. + + Return + ------ + str + path + """ + # Find WinSDK NetFx Tools registry dir name + if self.vs_ver <= 11.0: + netfxver = 35 + arch = '' + else: + netfxver = 40 + hidex86 = True if self.vs_ver <= 12.0 else False + arch = self.pi.current_dir(x64=True, hidex86=hidex86).replace('\\', '-') + fx = f'WinSDK-NetFx{netfxver}Tools{arch}' + + # list all possibles registry paths + regpaths = [] + if self.vs_ver >= 14.0: + for ver in self.NetFxSdkVersion: + regpaths += [os.path.join(self.ri.netfx_sdk, ver, fx)] + + for ver in self.WindowsSdkVersion: + regpaths += [os.path.join(self.ri.windows_sdk, f'v{ver}A', fx)] + + # Return installation folder from the more recent path + for path in regpaths: + execpath = self.ri.lookup(path, 'installationfolder') + if execpath: + return execpath + + return None + + @property + def FSharpInstallDir(self): + """ + Microsoft Visual F# directory. + + Return + ------ + str + path + """ + path = os.path.join(self.ri.visualstudio, rf'{self.vs_ver:0.1f}\Setup\F#') + return self.ri.lookup(path, 'productdir') or '' + + @property + def UniversalCRTSdkDir(self): + """ + Microsoft Universal CRT SDK directory. + + Return + ------ + str + path + """ + # Set Kit Roots versions for specified MSVC++ version + vers = ('10', '81') if self.vs_ver >= 14.0 else () + + # Find path of the more recent Kit + for ver in vers: + sdkdir = self.ri.lookup(self.ri.windows_kits_roots, f'kitsroot{ver}') + if sdkdir: + return sdkdir or '' + + return None + + @property + def UniversalCRTSdkLastVersion(self): + """ + Microsoft Universal C Runtime SDK last version. + + Return + ------ + str + version + """ + return self._use_last_dir_name(os.path.join(self.UniversalCRTSdkDir, 'lib')) + + @property + def NetFxSdkVersion(self): + """ + Microsoft .NET Framework SDK versions. + + Return + ------ + tuple of str + versions + """ + # Set FxSdk versions for specified VS version + return ( + ('4.7.2', '4.7.1', '4.7', '4.6.2', '4.6.1', '4.6', '4.5.2', '4.5.1', '4.5') + if self.vs_ver >= 14.0 + else () + ) + + @property + def NetFxSdkDir(self): + """ + Microsoft .NET Framework SDK directory. + + Return + ------ + str + path + """ + sdkdir = '' + for ver in self.NetFxSdkVersion: + loc = os.path.join(self.ri.netfx_sdk, ver) + sdkdir = self.ri.lookup(loc, 'kitsinstallationfolder') + if sdkdir: + break + return sdkdir + + @property + def FrameworkDir32(self): + """ + Microsoft .NET Framework 32bit directory. + + Return + ------ + str + path + """ + # Default path + guess_fw = os.path.join(self.WinDir, r'Microsoft.NET\Framework') + + # Try to get path from registry, if fail use default path + return self.ri.lookup(self.ri.vc, 'frameworkdir32') or guess_fw + + @property + def FrameworkDir64(self): + """ + Microsoft .NET Framework 64bit directory. + + Return + ------ + str + path + """ + # Default path + guess_fw = os.path.join(self.WinDir, r'Microsoft.NET\Framework64') + + # Try to get path from registry, if fail use default path + return self.ri.lookup(self.ri.vc, 'frameworkdir64') or guess_fw + + @property + def FrameworkVersion32(self) -> tuple[str, ...]: + """ + Microsoft .NET Framework 32bit versions. + + Return + ------ + tuple of str + versions + """ + return self._find_dot_net_versions(32) + + @property + def FrameworkVersion64(self) -> tuple[str, ...]: + """ + Microsoft .NET Framework 64bit versions. + + Return + ------ + tuple of str + versions + """ + return self._find_dot_net_versions(64) + + def _find_dot_net_versions(self, bits) -> tuple[str, ...]: + """ + Find Microsoft .NET Framework versions. + + Parameters + ---------- + bits: int + Platform number of bits: 32 or 64. + + Return + ------ + tuple of str + versions + """ + # Find actual .NET version in registry + reg_ver = self.ri.lookup(self.ri.vc, f'frameworkver{bits}') + dot_net_dir = getattr(self, f'FrameworkDir{bits}') + ver = reg_ver or self._use_last_dir_name(dot_net_dir, 'v') or '' + + # Set .NET versions for specified MSVC++ version + if self.vs_ver >= 12.0: + return ver, 'v4.0' + elif self.vs_ver >= 10.0: + return 'v4.0.30319' if ver.lower()[:2] != 'v4' else ver, 'v3.5' + elif self.vs_ver == 9.0: + return 'v3.5', 'v2.0.50727' + elif self.vs_ver == 8.0: + return 'v3.0', 'v2.0.50727' + return () + + @staticmethod + def _use_last_dir_name(path, prefix=''): + """ + Return name of the last dir in path or '' if no dir found. + + Parameters + ---------- + path: str + Use dirs in this path + prefix: str + Use only dirs starting by this prefix + + Return + ------ + str + name + """ + matching_dirs = ( + dir_name + for dir_name in reversed(os.listdir(path)) + if os.path.isdir(os.path.join(path, dir_name)) + and dir_name.startswith(prefix) + ) + return next(matching_dirs, None) or '' + + +class _EnvironmentDict(TypedDict): + include: str + lib: str + libpath: str + path: str + py_vcruntime_redist: NotRequired[str | None] + + +class EnvironmentInfo: + """ + Return environment variables for specified Microsoft Visual C++ version + and platform : Lib, Include, Path and libpath. + + This function is compatible with Microsoft Visual C++ 9.0 to 14.X. + + Script created by analysing Microsoft environment configuration files like + "vcvars[...].bat", "SetEnv.Cmd", "vcbuildtools.bat", ... + + Parameters + ---------- + arch: str + Target architecture. + vc_ver: float + Required Microsoft Visual C++ version. If not set, autodetect the last + version. + vc_min_ver: float + Minimum Microsoft Visual C++ version. + """ + + # Variables and properties in this class use originals CamelCase variables + # names from Microsoft source files for more easy comparison. + + def __init__(self, arch, vc_ver=None, vc_min_ver=0) -> None: + self.pi = PlatformInfo(arch) + self.ri = RegistryInfo(self.pi) + self.si = SystemInfo(self.ri, vc_ver) + + if self.vc_ver < vc_min_ver: + err = 'No suitable Microsoft Visual C++ version found' + raise distutils.errors.DistutilsPlatformError(err) + + @property + def vs_ver(self): + """ + Microsoft Visual Studio. + + Return + ------ + float + version + """ + return self.si.vs_ver + + @property + def vc_ver(self): + """ + Microsoft Visual C++ version. + + Return + ------ + float + version + """ + return self.si.vc_ver + + @property + def VSTools(self): + """ + Microsoft Visual Studio Tools. + + Return + ------ + list of str + paths + """ + paths = [r'Common7\IDE', r'Common7\Tools'] + + if self.vs_ver >= 14.0: + arch_subdir = self.pi.current_dir(hidex86=True, x64=True) + paths += [r'Common7\IDE\CommonExtensions\Microsoft\TestWindow'] + paths += [r'Team Tools\Performance Tools'] + paths += [rf'Team Tools\Performance Tools{arch_subdir}'] + + return [os.path.join(self.si.VSInstallDir, path) for path in paths] + + @property + def VCIncludes(self): + """ + Microsoft Visual C++ & Microsoft Foundation Class Includes. + + Return + ------ + list of str + paths + """ + return [ + os.path.join(self.si.VCInstallDir, 'Include'), + os.path.join(self.si.VCInstallDir, r'ATLMFC\Include'), + ] + + @property + def VCLibraries(self): + """ + Microsoft Visual C++ & Microsoft Foundation Class Libraries. + + Return + ------ + list of str + paths + """ + if self.vs_ver >= 15.0: + arch_subdir = self.pi.target_dir(x64=True) + else: + arch_subdir = self.pi.target_dir(hidex86=True) + paths = [f'Lib{arch_subdir}', rf'ATLMFC\Lib{arch_subdir}'] + + if self.vs_ver >= 14.0: + paths += [rf'Lib\store{arch_subdir}'] + + return [os.path.join(self.si.VCInstallDir, path) for path in paths] + + @property + def VCStoreRefs(self): + """ + Microsoft Visual C++ store references Libraries. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 14.0: + return [] + return [os.path.join(self.si.VCInstallDir, r'Lib\store\references')] + + @property + def VCTools(self): + """ + Microsoft Visual C++ Tools. + + Return + ------ + list of str + paths + """ + si = self.si + tools = [os.path.join(si.VCInstallDir, 'VCPackages')] + + forcex86 = True if self.vs_ver <= 10.0 else False + arch_subdir = self.pi.cross_dir(forcex86) + if arch_subdir: + tools += [os.path.join(si.VCInstallDir, f'Bin{arch_subdir}')] + + if self.vs_ver == 14.0: + path = f'Bin{self.pi.current_dir(hidex86=True)}' + tools += [os.path.join(si.VCInstallDir, path)] + + elif self.vs_ver >= 15.0: + host_dir = ( + r'bin\HostX86%s' if self.pi.current_is_x86() else r'bin\HostX64%s' + ) + tools += [ + os.path.join(si.VCInstallDir, host_dir % self.pi.target_dir(x64=True)) + ] + + if self.pi.current_cpu != self.pi.target_cpu: + tools += [ + os.path.join( + si.VCInstallDir, host_dir % self.pi.current_dir(x64=True) + ) + ] + + else: + tools += [os.path.join(si.VCInstallDir, 'Bin')] + + return tools + + @property + def OSLibraries(self): + """ + Microsoft Windows SDK Libraries. + + Return + ------ + list of str + paths + """ + if self.vs_ver <= 10.0: + arch_subdir = self.pi.target_dir(hidex86=True, x64=True) + return [os.path.join(self.si.WindowsSdkDir, f'Lib{arch_subdir}')] + + else: + arch_subdir = self.pi.target_dir(x64=True) + lib = os.path.join(self.si.WindowsSdkDir, 'lib') + libver = self._sdk_subdir + return [os.path.join(lib, f'{libver}um{arch_subdir}')] + + @property + def OSIncludes(self): + """ + Microsoft Windows SDK Include. + + Return + ------ + list of str + paths + """ + include = os.path.join(self.si.WindowsSdkDir, 'include') + + if self.vs_ver <= 10.0: + return [include, os.path.join(include, 'gl')] + + else: + if self.vs_ver >= 14.0: + sdkver = self._sdk_subdir + else: + sdkver = '' + return [ + os.path.join(include, f'{sdkver}shared'), + os.path.join(include, f'{sdkver}um'), + os.path.join(include, f'{sdkver}winrt'), + ] + + @property + def OSLibpath(self): + """ + Microsoft Windows SDK Libraries Paths. + + Return + ------ + list of str + paths + """ + ref = os.path.join(self.si.WindowsSdkDir, 'References') + libpath = [] + + if self.vs_ver <= 9.0: + libpath += self.OSLibraries + + if self.vs_ver >= 11.0: + libpath += [os.path.join(ref, r'CommonConfiguration\Neutral')] + + if self.vs_ver >= 14.0: + libpath += [ + ref, + os.path.join(self.si.WindowsSdkDir, 'UnionMetadata'), + os.path.join(ref, 'Windows.Foundation.UniversalApiContract', '1.0.0.0'), + os.path.join(ref, 'Windows.Foundation.FoundationContract', '1.0.0.0'), + os.path.join( + ref, 'Windows.Networking.Connectivity.WwanContract', '1.0.0.0' + ), + os.path.join( + self.si.WindowsSdkDir, + 'ExtensionSDKs', + 'Microsoft.VCLibs', + f'{self.vs_ver:0.1f}', + 'References', + 'CommonConfiguration', + 'neutral', + ), + ] + return libpath + + @property + def SdkTools(self): + """ + Microsoft Windows SDK Tools. + + Return + ------ + list of str + paths + """ + return list(self._sdk_tools()) + + def _sdk_tools(self): + """ + Microsoft Windows SDK Tools paths generator. + + Return + ------ + generator of str + paths + """ + if self.vs_ver < 15.0: + bin_dir = 'Bin' if self.vs_ver <= 11.0 else r'Bin\x86' + yield os.path.join(self.si.WindowsSdkDir, bin_dir) + + if not self.pi.current_is_x86(): + arch_subdir = self.pi.current_dir(x64=True) + path = f'Bin{arch_subdir}' + yield os.path.join(self.si.WindowsSdkDir, path) + + if self.vs_ver in (10.0, 11.0): + if self.pi.target_is_x86(): + arch_subdir = '' + else: + arch_subdir = self.pi.current_dir(hidex86=True, x64=True) + path = rf'Bin\NETFX 4.0 Tools{arch_subdir}' + yield os.path.join(self.si.WindowsSdkDir, path) + + elif self.vs_ver >= 15.0: + path = os.path.join(self.si.WindowsSdkDir, 'Bin') + arch_subdir = self.pi.current_dir(x64=True) + sdkver = self.si.WindowsSdkLastVersion + yield os.path.join(path, f'{sdkver}{arch_subdir}') + + if self.si.WindowsSDKExecutablePath: + yield self.si.WindowsSDKExecutablePath + + @property + def _sdk_subdir(self): + """ + Microsoft Windows SDK version subdir. + + Return + ------ + str + subdir + """ + ucrtver = self.si.WindowsSdkLastVersion + return (f'{ucrtver}\\') if ucrtver else '' + + @property + def SdkSetup(self): + """ + Microsoft Windows SDK Setup. + + Return + ------ + list of str + paths + """ + if self.vs_ver > 9.0: + return [] + + return [os.path.join(self.si.WindowsSdkDir, 'Setup')] + + @property + def FxTools(self): + """ + Microsoft .NET Framework Tools. + + Return + ------ + list of str + paths + """ + pi = self.pi + si = self.si + + if self.vs_ver <= 10.0: + include32 = True + include64 = not pi.target_is_x86() and not pi.current_is_x86() + else: + include32 = pi.target_is_x86() or pi.current_is_x86() + include64 = pi.current_cpu == 'amd64' or pi.target_cpu == 'amd64' + + tools = [] + if include32: + tools += [ + os.path.join(si.FrameworkDir32, ver) for ver in si.FrameworkVersion32 + ] + if include64: + tools += [ + os.path.join(si.FrameworkDir64, ver) for ver in si.FrameworkVersion64 + ] + return tools + + @property + def NetFxSDKLibraries(self): + """ + Microsoft .Net Framework SDK Libraries. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 14.0 or not self.si.NetFxSdkDir: + return [] + + arch_subdir = self.pi.target_dir(x64=True) + return [os.path.join(self.si.NetFxSdkDir, rf'lib\um{arch_subdir}')] + + @property + def NetFxSDKIncludes(self): + """ + Microsoft .Net Framework SDK Includes. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 14.0 or not self.si.NetFxSdkDir: + return [] + + return [os.path.join(self.si.NetFxSdkDir, r'include\um')] + + @property + def VsTDb(self): + """ + Microsoft Visual Studio Team System Database. + + Return + ------ + list of str + paths + """ + return [os.path.join(self.si.VSInstallDir, r'VSTSDB\Deploy')] + + @property + def MSBuild(self): + """ + Microsoft Build Engine. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 12.0: + return [] + elif self.vs_ver < 15.0: + base_path = self.si.ProgramFilesx86 + arch_subdir = self.pi.current_dir(hidex86=True) + else: + base_path = self.si.VSInstallDir + arch_subdir = '' + + path = rf'MSBuild\{self.vs_ver:0.1f}\bin{arch_subdir}' + build = [os.path.join(base_path, path)] + + if self.vs_ver >= 15.0: + # Add Roslyn C# & Visual Basic Compiler + build += [os.path.join(base_path, path, 'Roslyn')] + + return build + + @property + def HTMLHelpWorkshop(self): + """ + Microsoft HTML Help Workshop. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 11.0: + return [] + + return [os.path.join(self.si.ProgramFilesx86, 'HTML Help Workshop')] + + @property + def UCRTLibraries(self): + """ + Microsoft Universal C Runtime SDK Libraries. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 14.0: + return [] + + arch_subdir = self.pi.target_dir(x64=True) + lib = os.path.join(self.si.UniversalCRTSdkDir, 'lib') + ucrtver = self._ucrt_subdir + return [os.path.join(lib, f'{ucrtver}ucrt{arch_subdir}')] + + @property + def UCRTIncludes(self): + """ + Microsoft Universal C Runtime SDK Include. + + Return + ------ + list of str + paths + """ + if self.vs_ver < 14.0: + return [] + + include = os.path.join(self.si.UniversalCRTSdkDir, 'include') + return [os.path.join(include, f'{self._ucrt_subdir}ucrt')] + + @property + def _ucrt_subdir(self): + """ + Microsoft Universal C Runtime SDK version subdir. + + Return + ------ + str + subdir + """ + ucrtver = self.si.UniversalCRTSdkLastVersion + return (f'{ucrtver}\\') if ucrtver else '' + + @property + def FSharp(self): + """ + Microsoft Visual F#. + + Return + ------ + list of str + paths + """ + if 11.0 > self.vs_ver > 12.0: + return [] + + return [self.si.FSharpInstallDir] + + @property + def VCRuntimeRedist(self) -> str | None: + """ + Microsoft Visual C++ runtime redistributable dll. + + Returns the first suitable path found or None. + """ + vcruntime = f'vcruntime{self.vc_ver}0.dll' + arch_subdir = self.pi.target_dir(x64=True).strip('\\') + + # Installation prefixes candidates + prefixes = [] + tools_path = self.si.VCInstallDir + redist_path = os.path.dirname(tools_path.replace(r'\Tools', r'\Redist')) + if os.path.isdir(redist_path): + # Redist version may not be exactly the same as tools + redist_path = os.path.join(redist_path, os.listdir(redist_path)[-1]) + prefixes += [redist_path, os.path.join(redist_path, 'onecore')] + + prefixes += [os.path.join(tools_path, 'redist')] # VS14 legacy path + + # CRT directory + crt_dirs = ( + f'Microsoft.VC{self.vc_ver * 10}.CRT', + # Sometime store in directory with VS version instead of VC + f'Microsoft.VC{int(self.vs_ver) * 10}.CRT', + ) + + # vcruntime path + candidate_paths = ( + os.path.join(prefix, arch_subdir, crt_dir, vcruntime) + for (prefix, crt_dir) in itertools.product(prefixes, crt_dirs) + ) + return next(filter(os.path.isfile, candidate_paths), None) # type: ignore[arg-type] #python/mypy#12682 + + def return_env(self, exists: bool = True) -> _EnvironmentDict: + """ + Return environment dict. + + Parameters + ---------- + exists: bool + It True, only return existing paths. + + Return + ------ + dict + environment + """ + env = _EnvironmentDict( + include=self._build_paths( + 'include', + [ + self.VCIncludes, + self.OSIncludes, + self.UCRTIncludes, + self.NetFxSDKIncludes, + ], + exists, + ), + lib=self._build_paths( + 'lib', + [ + self.VCLibraries, + self.OSLibraries, + self.FxTools, + self.UCRTLibraries, + self.NetFxSDKLibraries, + ], + exists, + ), + libpath=self._build_paths( + 'libpath', + [self.VCLibraries, self.FxTools, self.VCStoreRefs, self.OSLibpath], + exists, + ), + path=self._build_paths( + 'path', + [ + self.VCTools, + self.VSTools, + self.VsTDb, + self.SdkTools, + self.SdkSetup, + self.FxTools, + self.MSBuild, + self.HTMLHelpWorkshop, + self.FSharp, + ], + exists, + ), + ) + if self.vs_ver >= 14 and self.VCRuntimeRedist: + env['py_vcruntime_redist'] = self.VCRuntimeRedist + return env + + def _build_paths(self, name, spec_path_lists, exists): + """ + Given an environment variable name and specified paths, + return a pathsep-separated string of paths containing + unique, extant, directories from those paths and from + the environment variable. Raise an error if no paths + are resolved. + + Parameters + ---------- + name: str + Environment variable name + spec_path_lists: list of str + Paths + exists: bool + It True, only return existing paths. + + Return + ------ + str + Pathsep-separated paths + """ + # flatten spec_path_lists + spec_paths = itertools.chain.from_iterable(spec_path_lists) + env_paths = environ.get(name, '').split(os.pathsep) + paths = itertools.chain(spec_paths, env_paths) + extant_paths = list(filter(os.path.isdir, paths)) if exists else paths + if not extant_paths: + msg = f"{name.upper()} environment variable is empty" + raise distutils.errors.DistutilsPlatformError(msg) + unique_paths = unique_everseen(extant_paths) + return os.pathsep.join(unique_paths) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/namespaces.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/namespaces.py new file mode 100644 index 0000000000000000000000000000000000000000..85ea2ebd654c480b8c19d1715b3772c4bcfd812e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/namespaces.py @@ -0,0 +1,106 @@ +import itertools +import os + +from .compat import py312 + +from distutils import log + +flatten = itertools.chain.from_iterable + + +class Installer: + nspkg_ext = '-nspkg.pth' + + def install_namespaces(self) -> None: + nsp = self._get_all_ns_packages() + if not nsp: + return + filename = self._get_nspkg_file() + self.outputs.append(filename) + log.info("Installing %s", filename) + lines = map(self._gen_nspkg_line, nsp) + + if self.dry_run: + # always generate the lines, even in dry run + list(lines) + return + + with open(filename, 'wt', encoding=py312.PTH_ENCODING) as f: + # Python<3.13 requires encoding="locale" instead of "utf-8" + # See: python/cpython#77102 + f.writelines(lines) + + def uninstall_namespaces(self) -> None: + filename = self._get_nspkg_file() + if not os.path.exists(filename): + return + log.info("Removing %s", filename) + os.remove(filename) + + def _get_nspkg_file(self): + filename, _ = os.path.splitext(self._get_target()) + return filename + self.nspkg_ext + + def _get_target(self): + return self.target + + _nspkg_tmpl = ( + "import sys, types, os", + "p = os.path.join(%(root)s, *%(pth)r)", + "importlib = __import__('importlib.util')", + "__import__('importlib.machinery')", + ( + "m = " + "sys.modules.setdefault(%(pkg)r, " + "importlib.util.module_from_spec(" + "importlib.machinery.PathFinder.find_spec(%(pkg)r, " + "[os.path.dirname(p)])))" + ), + ("m = m or sys.modules.setdefault(%(pkg)r, types.ModuleType(%(pkg)r))"), + "mp = (m or []) and m.__dict__.setdefault('__path__',[])", + "(p not in mp) and mp.append(p)", + ) + "lines for the namespace installer" + + _nspkg_tmpl_multi = ('m and setattr(sys.modules[%(parent)r], %(child)r, m)',) + "additional line(s) when a parent package is indicated" + + def _get_root(self): + return "sys._getframe(1).f_locals['sitedir']" + + def _gen_nspkg_line(self, pkg): + pth = tuple(pkg.split('.')) + root = self._get_root() + tmpl_lines = self._nspkg_tmpl + parent, sep, child = pkg.rpartition('.') + if parent: + tmpl_lines += self._nspkg_tmpl_multi + return ';'.join(tmpl_lines) % locals() + '\n' + + def _get_all_ns_packages(self): + """Return sorted list of all package namespaces""" + pkgs = self.distribution.namespace_packages or [] + return sorted(set(flatten(map(self._pkg_names, pkgs)))) + + @staticmethod + def _pkg_names(pkg): + """ + Given a namespace package, yield the components of that + package. + + >>> names = Installer._pkg_names('a.b.c') + >>> set(names) == set(['a', 'a.b', 'a.b.c']) + True + """ + parts = pkg.split('.') + while parts: + yield '.'.join(parts) + parts.pop() + + +class DevelopInstaller(Installer): + def _get_root(self): + return repr(str(self.egg_path)) + + def _get_target(self): + return self.egg_link diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/package_index.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/package_index.py new file mode 100644 index 0000000000000000000000000000000000000000..1a6abebcda4f62c91e0843a17ed4716f8fd3b1d9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/package_index.py @@ -0,0 +1,1137 @@ +"""PyPI and direct package downloading.""" + +from __future__ import annotations + +import base64 +import configparser +import hashlib +import html +import http.client +import io +import itertools +import os +import re +import shutil +import socket +import subprocess +import sys +import urllib.error +import urllib.parse +import urllib.request +from fnmatch import translate +from functools import wraps +from typing import NamedTuple + +from more_itertools import unique_everseen + +import setuptools +from pkg_resources import ( + BINARY_DIST, + CHECKOUT_DIST, + DEVELOP_DIST, + EGG_DIST, + SOURCE_DIST, + Distribution, + Environment, + Requirement, + find_distributions, + normalize_path, + parse_version, + safe_name, + safe_version, + to_filename, +) +from setuptools.wheel import Wheel + +from .unicode_utils import _cfg_read_utf8_with_fallback, _read_utf8_with_fallback + +from distutils import log +from distutils.errors import DistutilsError + +EGG_FRAGMENT = re.compile(r'^egg=([-A-Za-z0-9_.+!]+)$') +HREF = re.compile(r"""href\s*=\s*['"]?([^'"> ]+)""", re.I) +PYPI_MD5 = re.compile( + r'([^<]+)\n\s+\(md5\)' +) +URL_SCHEME = re.compile('([-+.a-z0-9]{2,}):', re.I).match +EXTENSIONS = ".tar.gz .tar.bz2 .tar .zip .tgz".split() + +__all__ = [ + 'PackageIndex', + 'distros_for_url', + 'parse_bdist_wininst', + 'interpret_distro_name', +] + +_SOCKET_TIMEOUT = 15 + +user_agent = f"setuptools/{setuptools.__version__} Python-urllib/{sys.version_info.major}.{sys.version_info.minor}" + + +def parse_requirement_arg(spec): + try: + return Requirement.parse(spec) + except ValueError as e: + raise DistutilsError( + f"Not a URL, existing file, or requirement spec: {spec!r}" + ) from e + + +def parse_bdist_wininst(name): + """Return (base,pyversion) or (None,None) for possible .exe name""" + + lower = name.lower() + base, py_ver, plat = None, None, None + + if lower.endswith('.exe'): + if lower.endswith('.win32.exe'): + base = name[:-10] + plat = 'win32' + elif lower.startswith('.win32-py', -16): + py_ver = name[-7:-4] + base = name[:-16] + plat = 'win32' + elif lower.endswith('.win-amd64.exe'): + base = name[:-14] + plat = 'win-amd64' + elif lower.startswith('.win-amd64-py', -20): + py_ver = name[-7:-4] + base = name[:-20] + plat = 'win-amd64' + return base, py_ver, plat + + +def egg_info_for_url(url): + parts = urllib.parse.urlparse(url) + _scheme, server, path, _parameters, _query, fragment = parts + base = urllib.parse.unquote(path.split('/')[-1]) + if server == 'sourceforge.net' and base == 'download': # XXX Yuck + base = urllib.parse.unquote(path.split('/')[-2]) + if '#' in base: + base, fragment = base.split('#', 1) + return base, fragment + + +def distros_for_url(url, metadata=None): + """Yield egg or source distribution objects that might be found at a URL""" + base, fragment = egg_info_for_url(url) + yield from distros_for_location(url, base, metadata) + if fragment: + match = EGG_FRAGMENT.match(fragment) + if match: + yield from interpret_distro_name( + url, match.group(1), metadata, precedence=CHECKOUT_DIST + ) + + +def distros_for_location(location, basename, metadata=None): + """Yield egg or source distribution objects based on basename""" + if basename.endswith('.egg.zip'): + basename = basename[:-4] # strip the .zip + if basename.endswith('.egg') and '-' in basename: + # only one, unambiguous interpretation + return [Distribution.from_location(location, basename, metadata)] + if basename.endswith('.whl') and '-' in basename: + wheel = Wheel(basename) + if not wheel.is_compatible(): + return [] + return [ + Distribution( + location=location, + project_name=wheel.project_name, + version=wheel.version, + # Increase priority over eggs. + precedence=EGG_DIST + 1, + ) + ] + if basename.endswith('.exe'): + win_base, py_ver, platform = parse_bdist_wininst(basename) + if win_base is not None: + return interpret_distro_name( + location, win_base, metadata, py_ver, BINARY_DIST, platform + ) + # Try source distro extensions (.zip, .tgz, etc.) + # + for ext in EXTENSIONS: + if basename.endswith(ext): + basename = basename[: -len(ext)] + return interpret_distro_name(location, basename, metadata) + return [] # no extension matched + + +def distros_for_filename(filename, metadata=None): + """Yield possible egg or source distribution objects based on a filename""" + return distros_for_location( + normalize_path(filename), os.path.basename(filename), metadata + ) + + +def interpret_distro_name( + location, basename, metadata, py_version=None, precedence=SOURCE_DIST, platform=None +): + """Generate the interpretation of a source distro name + + Note: if `location` is a filesystem filename, you should call + ``pkg_resources.normalize_path()`` on it before passing it to this + routine! + """ + + parts = basename.split('-') + if not py_version and any(re.match(r'py\d\.\d$', p) for p in parts[2:]): + # it is a bdist_dumb, not an sdist -- bail out + return + + # find the pivot (p) that splits the name from the version. + # infer the version as the first item that has a digit. + for p in range(len(parts)): + if parts[p][:1].isdigit(): + break + else: + p = len(parts) + + yield Distribution( + location, + metadata, + '-'.join(parts[:p]), + '-'.join(parts[p:]), + py_version=py_version, + precedence=precedence, + platform=platform, + ) + + +def unique_values(func): + """ + Wrap a function returning an iterable such that the resulting iterable + only ever yields unique items. + """ + + @wraps(func) + def wrapper(*args, **kwargs): + return unique_everseen(func(*args, **kwargs)) + + return wrapper + + +REL = re.compile(r"""<([^>]*\srel\s{0,10}=\s{0,10}['"]?([^'" >]+)[^>]*)>""", re.I) +""" +Regex for an HTML tag with 'rel="val"' attributes. +""" + + +@unique_values +def find_external_links(url, page): + """Find rel="homepage" and rel="download" links in `page`, yielding URLs""" + + for match in REL.finditer(page): + tag, rel = match.groups() + rels = set(map(str.strip, rel.lower().split(','))) + if 'homepage' in rels or 'download' in rels: + for match in HREF.finditer(tag): + yield urllib.parse.urljoin(url, htmldecode(match.group(1))) + + for tag in ("Home Page", "Download URL"): + pos = page.find(tag) + if pos != -1: + match = HREF.search(page, pos) + if match: + yield urllib.parse.urljoin(url, htmldecode(match.group(1))) + + +class ContentChecker: + """ + A null content checker that defines the interface for checking content + """ + + def feed(self, block): + """ + Feed a block of data to the hash. + """ + return + + def is_valid(self): + """ + Check the hash. Return False if validation fails. + """ + return True + + def report(self, reporter, template): + """ + Call reporter with information about the checker (hash name) + substituted into the template. + """ + return + + +class HashChecker(ContentChecker): + pattern = re.compile( + r'(?Psha1|sha224|sha384|sha256|sha512|md5)=' + r'(?P[a-f0-9]+)' + ) + + def __init__(self, hash_name, expected) -> None: + self.hash_name = hash_name + self.hash = hashlib.new(hash_name) + self.expected = expected + + @classmethod + def from_url(cls, url): + "Construct a (possibly null) ContentChecker from a URL" + fragment = urllib.parse.urlparse(url)[-1] + if not fragment: + return ContentChecker() + match = cls.pattern.search(fragment) + if not match: + return ContentChecker() + return cls(**match.groupdict()) + + def feed(self, block): + self.hash.update(block) + + def is_valid(self): + return self.hash.hexdigest() == self.expected + + def report(self, reporter, template): + msg = template % self.hash_name + return reporter(msg) + + +class PackageIndex(Environment): + """A distribution index that scans web pages for download URLs""" + + def __init__( + self, + index_url: str = "https://pypi.org/simple/", + hosts=('*',), + ca_bundle=None, + verify_ssl: bool = True, + *args, + **kw, + ) -> None: + super().__init__(*args, **kw) + self.index_url = index_url + "/"[: not index_url.endswith('/')] + self.scanned_urls: dict = {} + self.fetched_urls: dict = {} + self.package_pages: dict = {} + self.allows = re.compile('|'.join(map(translate, hosts))).match + self.to_scan: list = [] + self.opener = urllib.request.urlopen + + def add(self, dist): + # ignore invalid versions + try: + parse_version(dist.version) + except Exception: + return None + return super().add(dist) + + # FIXME: 'PackageIndex.process_url' is too complex (14) + def process_url(self, url, retrieve: bool = False) -> None: # noqa: C901 + """Evaluate a URL as a possible download, and maybe retrieve it""" + if url in self.scanned_urls and not retrieve: + return + self.scanned_urls[url] = True + if not URL_SCHEME(url): + self.process_filename(url) + return + else: + dists = list(distros_for_url(url)) + if dists: + if not self.url_ok(url): + return + self.debug("Found link: %s", url) + + if dists or not retrieve or url in self.fetched_urls: + list(map(self.add, dists)) + return # don't need the actual page + + if not self.url_ok(url): + self.fetched_urls[url] = True + return + + self.info("Reading %s", url) + self.fetched_urls[url] = True # prevent multiple fetch attempts + tmpl = "Download error on %s: %%s -- Some packages may not be found!" + f = self.open_url(url, tmpl % url) + if f is None: + return + if isinstance(f, urllib.error.HTTPError) and f.code == 401: + self.info(f"Authentication error: {f.msg}") + self.fetched_urls[f.url] = True + if 'html' not in f.headers.get('content-type', '').lower(): + f.close() # not html, we can't process it + return + + base = f.url # handle redirects + page = f.read() + if not isinstance(page, str): + # In Python 3 and got bytes but want str. + if isinstance(f, urllib.error.HTTPError): + # Errors have no charset, assume latin1: + charset = 'latin-1' + else: + charset = f.headers.get_param('charset') or 'latin-1' + page = page.decode(charset, "ignore") + f.close() + for match in HREF.finditer(page): + link = urllib.parse.urljoin(base, htmldecode(match.group(1))) + self.process_url(link) + if url.startswith(self.index_url) and getattr(f, 'code', None) != 404: + page = self.process_index(url, page) + + def process_filename(self, fn, nested: bool = False) -> None: + # process filenames or directories + if not os.path.exists(fn): + self.warn("Not found: %s", fn) + return + + if os.path.isdir(fn) and not nested: + path = os.path.realpath(fn) + for item in os.listdir(path): + self.process_filename(os.path.join(path, item), True) + + dists = distros_for_filename(fn) + if dists: + self.debug("Found: %s", fn) + list(map(self.add, dists)) + + def url_ok(self, url, fatal: bool = False) -> bool: + s = URL_SCHEME(url) + is_file = s and s.group(1).lower() == 'file' + if is_file or self.allows(urllib.parse.urlparse(url)[1]): + return True + msg = ( + "\nNote: Bypassing %s (disallowed host; see " + "https://setuptools.pypa.io/en/latest/deprecated/" + "easy_install.html#restricting-downloads-with-allow-hosts for details).\n" + ) + if fatal: + raise DistutilsError(msg % url) + else: + self.warn(msg, url) + return False + + def scan_egg_links(self, search_path) -> None: + dirs = filter(os.path.isdir, search_path) + egg_links = ( + (path, entry) + for path in dirs + for entry in os.listdir(path) + if entry.endswith('.egg-link') + ) + list(itertools.starmap(self.scan_egg_link, egg_links)) + + def scan_egg_link(self, path, entry) -> None: + content = _read_utf8_with_fallback(os.path.join(path, entry)) + # filter non-empty lines + lines = list(filter(None, map(str.strip, content.splitlines()))) + + if len(lines) != 2: + # format is not recognized; punt + return + + egg_path, _setup_path = lines + + for dist in find_distributions(os.path.join(path, egg_path)): + dist.location = os.path.join(path, *lines) + dist.precedence = SOURCE_DIST + self.add(dist) + + def _scan(self, link): + # Process a URL to see if it's for a package page + NO_MATCH_SENTINEL = None, None + if not link.startswith(self.index_url): + return NO_MATCH_SENTINEL + + parts = list(map(urllib.parse.unquote, link[len(self.index_url) :].split('/'))) + if len(parts) != 2 or '#' in parts[1]: + return NO_MATCH_SENTINEL + + # it's a package page, sanitize and index it + pkg = safe_name(parts[0]) + ver = safe_version(parts[1]) + self.package_pages.setdefault(pkg.lower(), {})[link] = True + return to_filename(pkg), to_filename(ver) + + def process_index(self, url, page): + """Process the contents of a PyPI page""" + + # process an index page into the package-page index + for match in HREF.finditer(page): + try: + self._scan(urllib.parse.urljoin(url, htmldecode(match.group(1)))) + except ValueError: + pass + + pkg, ver = self._scan(url) # ensure this page is in the page index + if not pkg: + return "" # no sense double-scanning non-package pages + + # process individual package page + for new_url in find_external_links(url, page): + # Process the found URL + base, frag = egg_info_for_url(new_url) + if base.endswith('.py') and not frag: + if ver: + new_url += f'#egg={pkg}-{ver}' + else: + self.need_version_info(url) + self.scan_url(new_url) + + return PYPI_MD5.sub( + lambda m: '{}'.format(*m.group(1, 3, 2)), page + ) + + def need_version_info(self, url) -> None: + self.scan_all( + "Page at %s links to .py file(s) without version info; an index " + "scan is required.", + url, + ) + + def scan_all(self, msg=None, *args) -> None: + if self.index_url not in self.fetched_urls: + if msg: + self.warn(msg, *args) + self.info("Scanning index of all packages (this may take a while)") + self.scan_url(self.index_url) + + def find_packages(self, requirement) -> None: + self.scan_url(self.index_url + requirement.unsafe_name + '/') + + if not self.package_pages.get(requirement.key): + # Fall back to safe version of the name + self.scan_url(self.index_url + requirement.project_name + '/') + + if not self.package_pages.get(requirement.key): + # We couldn't find the target package, so search the index page too + self.not_found_in_index(requirement) + + for url in list(self.package_pages.get(requirement.key, ())): + # scan each page that might be related to the desired package + self.scan_url(url) + + def obtain(self, requirement, installer=None): + self.prescan() + self.find_packages(requirement) + for dist in self[requirement.key]: + if dist in requirement: + return dist + self.debug("%s does not match %s", requirement, dist) + return super().obtain(requirement, installer) + + def check_hash(self, checker, filename, tfp) -> None: + """ + checker is a ContentChecker + """ + checker.report(self.debug, f"Validating %s checksum for {filename}") + if not checker.is_valid(): + tfp.close() + os.unlink(filename) + raise DistutilsError( + f"{checker.hash.name} validation failed for {os.path.basename(filename)}; " + "possible download problem?" + ) + + def add_find_links(self, urls) -> None: + """Add `urls` to the list that will be prescanned for searches""" + for url in urls: + if ( + self.to_scan is None # if we have already "gone online" + or not URL_SCHEME(url) # or it's a local file/directory + or url.startswith('file:') + or list(distros_for_url(url)) # or a direct package link + ): + # then go ahead and process it now + self.scan_url(url) + else: + # otherwise, defer retrieval till later + self.to_scan.append(url) + + def prescan(self): + """Scan urls scheduled for prescanning (e.g. --find-links)""" + if self.to_scan: + list(map(self.scan_url, self.to_scan)) + self.to_scan = None # from now on, go ahead and process immediately + + def not_found_in_index(self, requirement) -> None: + if self[requirement.key]: # we've seen at least one distro + meth, msg = self.info, "Couldn't retrieve index page for %r" + else: # no distros seen for this name, might be misspelled + meth, msg = self.warn, "Couldn't find index page for %r (maybe misspelled?)" + meth(msg, requirement.unsafe_name) + self.scan_all() + + def download(self, spec, tmpdir): + """Locate and/or download `spec` to `tmpdir`, returning a local path + + `spec` may be a ``Requirement`` object, or a string containing a URL, + an existing local filename, or a project/version requirement spec + (i.e. the string form of a ``Requirement`` object). If it is the URL + of a .py file with an unambiguous ``#egg=name-version`` tag (i.e., one + that escapes ``-`` as ``_`` throughout), a trivial ``setup.py`` is + automatically created alongside the downloaded file. + + If `spec` is a ``Requirement`` object or a string containing a + project/version requirement spec, this method returns the location of + a matching distribution (possibly after downloading it to `tmpdir`). + If `spec` is a locally existing file or directory name, it is simply + returned unchanged. If `spec` is a URL, it is downloaded to a subpath + of `tmpdir`, and the local filename is returned. Various errors may be + raised if a problem occurs during downloading. + """ + if not isinstance(spec, Requirement): + scheme = URL_SCHEME(spec) + if scheme: + # It's a url, download it to tmpdir + found = self._download_url(spec, tmpdir) + base, fragment = egg_info_for_url(spec) + if base.endswith('.py'): + found = self.gen_setup(found, fragment, tmpdir) + return found + elif os.path.exists(spec): + # Existing file or directory, just return it + return spec + else: + spec = parse_requirement_arg(spec) + return getattr(self.fetch_distribution(spec, tmpdir), 'location', None) + + def fetch_distribution( # noqa: C901 # is too complex (14) # FIXME + self, + requirement, + tmpdir, + force_scan: bool = False, + source: bool = False, + develop_ok: bool = False, + local_index=None, + ) -> Distribution | None: + """Obtain a distribution suitable for fulfilling `requirement` + + `requirement` must be a ``pkg_resources.Requirement`` instance. + If necessary, or if the `force_scan` flag is set, the requirement is + searched for in the (online) package index as well as the locally + installed packages. If a distribution matching `requirement` is found, + the returned distribution's ``location`` is the value you would have + gotten from calling the ``download()`` method with the matching + distribution's URL or filename. If no matching distribution is found, + ``None`` is returned. + + If the `source` flag is set, only source distributions and source + checkout links will be considered. Unless the `develop_ok` flag is + set, development and system eggs (i.e., those using the ``.egg-info`` + format) will be ignored. + """ + # process a Requirement + self.info("Searching for %s", requirement) + skipped = set() + dist = None + + def find(req, env: Environment | None = None): + if env is None: + env = self + # Find a matching distribution; may be called more than once + + for dist in env[req.key]: + if dist.precedence == DEVELOP_DIST and not develop_ok: + if dist not in skipped: + self.warn( + "Skipping development or system egg: %s", + dist, + ) + skipped.add(dist) + continue + + test = dist in req and (dist.precedence <= SOURCE_DIST or not source) + if test: + loc = self.download(dist.location, tmpdir) + dist.download_location = loc + if os.path.exists(dist.download_location): + return dist + + return None + + if force_scan: + self.prescan() + self.find_packages(requirement) + dist = find(requirement) + + if not dist and local_index is not None: + dist = find(requirement, local_index) + + if dist is None: + if self.to_scan is not None: + self.prescan() + dist = find(requirement) + + if dist is None and not force_scan: + self.find_packages(requirement) + dist = find(requirement) + + if dist is None: + self.warn( + "No local packages or working download links found for %s%s", + (source and "a source distribution of " or ""), + requirement, + ) + return None + else: + self.info("Best match: %s", dist) + return dist.clone(location=dist.download_location) + + def fetch( + self, requirement, tmpdir, force_scan: bool = False, source: bool = False + ) -> str | None: + """Obtain a file suitable for fulfilling `requirement` + + DEPRECATED; use the ``fetch_distribution()`` method now instead. For + backward compatibility, this routine is identical but returns the + ``location`` of the downloaded distribution instead of a distribution + object. + """ + dist = self.fetch_distribution(requirement, tmpdir, force_scan, source) + if dist is not None: + return dist.location + return None + + def gen_setup(self, filename, fragment, tmpdir): + match = EGG_FRAGMENT.match(fragment) + dists = ( + match + and [ + d + for d in interpret_distro_name(filename, match.group(1), None) + if d.version + ] + or [] + ) + + if len(dists) == 1: # unambiguous ``#egg`` fragment + basename = os.path.basename(filename) + + # Make sure the file has been downloaded to the temp dir. + if os.path.dirname(filename) != tmpdir: + dst = os.path.join(tmpdir, basename) + if not (os.path.exists(dst) and os.path.samefile(filename, dst)): + shutil.copy2(filename, dst) + filename = dst + + with open(os.path.join(tmpdir, 'setup.py'), 'w', encoding="utf-8") as file: + file.write( + "from setuptools import setup\n" + f"setup(name={dists[0].project_name!r}, version={dists[0].version!r}, py_modules=[{os.path.splitext(basename)[0]!r}])\n" + ) + return filename + + elif match: + raise DistutilsError( + f"Can't unambiguously interpret project/version identifier {fragment!r}; " + "any dashes in the name or version should be escaped using " + f"underscores. {dists!r}" + ) + else: + raise DistutilsError( + "Can't process plain .py files without an '#egg=name-version'" + " suffix to enable automatic setup script generation." + ) + + dl_blocksize = 8192 + + def _download_to(self, url, filename): + self.info("Downloading %s", url) + # Download the file + fp = None + try: + checker = HashChecker.from_url(url) + fp = self.open_url(url) + if isinstance(fp, urllib.error.HTTPError): + raise DistutilsError(f"Can't download {url}: {fp.code} {fp.msg}") + headers = fp.info() + blocknum = 0 + bs = self.dl_blocksize + size = -1 + if "content-length" in headers: + # Some servers return multiple Content-Length headers :( + sizes = headers.get_all('Content-Length') + size = max(map(int, sizes)) + self.reporthook(url, filename, blocknum, bs, size) + with open(filename, 'wb') as tfp: + while True: + block = fp.read(bs) + if block: + checker.feed(block) + tfp.write(block) + blocknum += 1 + self.reporthook(url, filename, blocknum, bs, size) + else: + break + self.check_hash(checker, filename, tfp) + return headers + finally: + if fp: + fp.close() + + def reporthook(self, url, filename, blocknum, blksize, size) -> None: + pass # no-op + + # FIXME: + def open_url(self, url, warning=None): # noqa: C901 # is too complex (12) + if url.startswith('file:'): + return local_open(url) + try: + return open_with_auth(url, self.opener) + except (ValueError, http.client.InvalidURL) as v: + msg = ' '.join([str(arg) for arg in v.args]) + if warning: + self.warn(warning, msg) + else: + raise DistutilsError(f'{url} {msg}') from v + except urllib.error.HTTPError as v: + return v + except urllib.error.URLError as v: + if warning: + self.warn(warning, v.reason) + else: + raise DistutilsError(f"Download error for {url}: {v.reason}") from v + except http.client.BadStatusLine as v: + if warning: + self.warn(warning, v.line) + else: + raise DistutilsError( + f'{url} returned a bad status line. The server might be ' + f'down, {v.line}' + ) from v + except (http.client.HTTPException, OSError) as v: + if warning: + self.warn(warning, v) + else: + raise DistutilsError(f"Download error for {url}: {v}") from v + + def _download_url(self, url, tmpdir): + # Determine download filename + # + name, _fragment = egg_info_for_url(url) + if name: + while '..' in name: + name = name.replace('..', '.').replace('\\', '_') + else: + name = "__downloaded__" # default if URL has no path contents + + if name.endswith('.egg.zip'): + name = name[:-4] # strip the extra .zip before download + + filename = os.path.join(tmpdir, name) + + return self._download_vcs(url, filename) or self._download_other(url, filename) + + @staticmethod + def _resolve_vcs(url): + """ + >>> rvcs = PackageIndex._resolve_vcs + >>> rvcs('git+http://foo/bar') + 'git' + >>> rvcs('hg+https://foo/bar') + 'hg' + >>> rvcs('git:myhost') + 'git' + >>> rvcs('hg:myhost') + >>> rvcs('http://foo/bar') + """ + scheme = urllib.parse.urlsplit(url).scheme + pre, sep, _post = scheme.partition('+') + # svn and git have their own protocol; hg does not + allowed = set(['svn', 'git'] + ['hg'] * bool(sep)) + return next(iter({pre} & allowed), None) + + def _download_vcs(self, url, spec_filename): + vcs = self._resolve_vcs(url) + if not vcs: + return None + if vcs == 'svn': + raise DistutilsError( + f"Invalid config, SVN download is not supported: {url}" + ) + + filename, _, _ = spec_filename.partition('#') + url, rev = self._vcs_split_rev_from_url(url) + + self.info(f"Doing {vcs} clone from {url} to {filename}") + subprocess.check_call([vcs, 'clone', '--quiet', url, filename]) + + co_commands = dict( + git=[vcs, '-C', filename, 'checkout', '--quiet', rev], + hg=[vcs, '--cwd', filename, 'up', '-C', '-r', rev, '-q'], + ) + if rev is not None: + self.info(f"Checking out {rev}") + subprocess.check_call(co_commands[vcs]) + + return filename + + def _download_other(self, url, filename): + scheme = urllib.parse.urlsplit(url).scheme + if scheme == 'file': # pragma: no cover + return urllib.request.url2pathname(urllib.parse.urlparse(url).path) + # raise error if not allowed + self.url_ok(url, True) + return self._attempt_download(url, filename) + + def scan_url(self, url) -> None: + self.process_url(url, True) + + def _attempt_download(self, url, filename): + headers = self._download_to(url, filename) + if 'html' in headers.get('content-type', '').lower(): + return self._invalid_download_html(url, headers, filename) + else: + return filename + + def _invalid_download_html(self, url, headers, filename): + os.unlink(filename) + raise DistutilsError(f"Unexpected HTML page found at {url}") + + @staticmethod + def _vcs_split_rev_from_url(url): + """ + Given a possible VCS URL, return a clean URL and resolved revision if any. + + >>> vsrfu = PackageIndex._vcs_split_rev_from_url + >>> vsrfu('git+https://github.com/pypa/setuptools@v69.0.0#egg-info=setuptools') + ('https://github.com/pypa/setuptools', 'v69.0.0') + >>> vsrfu('git+https://github.com/pypa/setuptools#egg-info=setuptools') + ('https://github.com/pypa/setuptools', None) + >>> vsrfu('http://foo/bar') + ('http://foo/bar', None) + """ + parts = urllib.parse.urlsplit(url) + + clean_scheme = parts.scheme.split('+', 1)[-1] + + # Some fragment identification fails + no_fragment_path, _, _ = parts.path.partition('#') + + pre, sep, post = no_fragment_path.rpartition('@') + clean_path, rev = (pre, post) if sep else (post, None) + + resolved = parts._replace( + scheme=clean_scheme, + path=clean_path, + # discard the fragment + fragment='', + ).geturl() + + return resolved, rev + + def debug(self, msg, *args) -> None: + log.debug(msg, *args) + + def info(self, msg, *args) -> None: + log.info(msg, *args) + + def warn(self, msg, *args) -> None: + log.warn(msg, *args) + + +# This pattern matches a character entity reference (a decimal numeric +# references, a hexadecimal numeric reference, or a named reference). +entity_sub = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?').sub + + +def decode_entity(match): + what = match.group(0) + return html.unescape(what) + + +def htmldecode(text): + """ + Decode HTML entities in the given text. + + >>> htmldecode( + ... 'https://../package_name-0.1.2.tar.gz' + ... '?tokena=A&tokenb=B">package_name-0.1.2.tar.gz') + 'https://../package_name-0.1.2.tar.gz?tokena=A&tokenb=B">package_name-0.1.2.tar.gz' + """ + return entity_sub(decode_entity, text) + + +def socket_timeout(timeout=15): + def _socket_timeout(func): + def _socket_timeout(*args, **kwargs): + old_timeout = socket.getdefaulttimeout() + socket.setdefaulttimeout(timeout) + try: + return func(*args, **kwargs) + finally: + socket.setdefaulttimeout(old_timeout) + + return _socket_timeout + + return _socket_timeout + + +def _encode_auth(auth): + """ + Encode auth from a URL suitable for an HTTP header. + >>> str(_encode_auth('username%3Apassword')) + 'dXNlcm5hbWU6cGFzc3dvcmQ=' + + Long auth strings should not cause a newline to be inserted. + >>> long_auth = 'username:' + 'password'*10 + >>> chr(10) in str(_encode_auth(long_auth)) + False + """ + auth_s = urllib.parse.unquote(auth) + # convert to bytes + auth_bytes = auth_s.encode() + encoded_bytes = base64.b64encode(auth_bytes) + # convert back to a string + encoded = encoded_bytes.decode() + # strip the trailing carriage return + return encoded.replace('\n', '') + + +class Credential(NamedTuple): + """ + A username/password pair. + + Displayed separated by `:`. + >>> str(Credential('username', 'password')) + 'username:password' + """ + + username: str + password: str + + def __str__(self) -> str: + return f'{self.username}:{self.password}' + + +class PyPIConfig(configparser.RawConfigParser): + def __init__(self): + """ + Load from ~/.pypirc + """ + defaults = dict.fromkeys(['username', 'password', 'repository'], '') + super().__init__(defaults) + + rc = os.path.join(os.path.expanduser('~'), '.pypirc') + if os.path.exists(rc): + _cfg_read_utf8_with_fallback(self, rc) + + @property + def creds_by_repository(self): + sections_with_repositories = [ + section + for section in self.sections() + if self.get(section, 'repository').strip() + ] + + return dict(map(self._get_repo_cred, sections_with_repositories)) + + def _get_repo_cred(self, section): + repo = self.get(section, 'repository').strip() + return repo, Credential( + self.get(section, 'username').strip(), + self.get(section, 'password').strip(), + ) + + def find_credential(self, url): + """ + If the URL indicated appears to be a repository defined in this + config, return the credential for that repository. + """ + for repository, cred in self.creds_by_repository.items(): + if url.startswith(repository): + return cred + return None + + +def open_with_auth(url, opener=urllib.request.urlopen): + """Open a urllib2 request, handling HTTP authentication""" + + parsed = urllib.parse.urlparse(url) + scheme, netloc, path, params, query, frag = parsed + + # Double scheme does not raise on macOS as revealed by a + # failing test. We would expect "nonnumeric port". Refs #20. + if netloc.endswith(':'): + raise http.client.InvalidURL("nonnumeric port: ''") + + if scheme in ('http', 'https'): + auth, address = _splituser(netloc) + else: + auth, address = (None, None) + + if not auth: + cred = PyPIConfig().find_credential(url) + if cred: + auth = str(cred) + info = cred.username, url + log.info('Authenticating as %s for %s (from .pypirc)', *info) + + if auth: + auth = "Basic " + _encode_auth(auth) + parts = scheme, address, path, params, query, frag + new_url = urllib.parse.urlunparse(parts) + request = urllib.request.Request(new_url) + request.add_header("Authorization", auth) + else: + request = urllib.request.Request(url) + + request.add_header('User-Agent', user_agent) + fp = opener(request) + + if auth: + # Put authentication info back into request URL if same host, + # so that links found on the page will work + s2, h2, path2, param2, query2, frag2 = urllib.parse.urlparse(fp.url) + if s2 == scheme and h2 == address: + parts = s2, netloc, path2, param2, query2, frag2 + fp.url = urllib.parse.urlunparse(parts) + + return fp + + +# copy of urllib.parse._splituser from Python 3.8 +# See https://github.com/python/cpython/issues/80072. +def _splituser(host): + """splituser('user[:passwd]@host[:port]') + --> 'user[:passwd]', 'host[:port]'.""" + user, delim, host = host.rpartition('@') + return (user if delim else None), host + + +# adding a timeout to avoid freezing package_index +open_with_auth = socket_timeout(_SOCKET_TIMEOUT)(open_with_auth) + + +def fix_sf_url(url): + return url # backward compatibility + + +def local_open(url): + """Read a local path, with special support for directories""" + _scheme, _server, path, _param, _query, _frag = urllib.parse.urlparse(url) + filename = urllib.request.url2pathname(path) + if os.path.isfile(filename): + return urllib.request.urlopen(url) + elif path.endswith('/') and os.path.isdir(filename): + files = [] + for f in os.listdir(filename): + filepath = os.path.join(filename, f) + if f == 'index.html': + body = _read_utf8_with_fallback(filepath) + break + elif os.path.isdir(filepath): + f += '/' + files.append(f'{f}') + else: + tmpl = "{url}{files}" + body = tmpl.format(url=url, files='\n'.join(files)) + status, message = 200, "OK" + else: + status, message, body = 404, "Path not found", "Not found" + + headers = {'content-type': 'text/html'} + body_stream = io.StringIO(body) + return urllib.error.HTTPError(url, status, message, headers, body_stream) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/script.tmpl b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/script.tmpl new file mode 100644 index 0000000000000000000000000000000000000000..ff5efbcab3b58063dd84787181c26a95fb663d94 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/script.tmpl @@ -0,0 +1,3 @@ +# EASY-INSTALL-SCRIPT: %(spec)r,%(script_name)r +__requires__ = %(spec)r +__import__('pkg_resources').run_script(%(spec)r, %(script_name)r) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/unicode_utils.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/unicode_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a6e33f2e0d21a269102c3c13f0a27adf2854a03e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/unicode_utils.py @@ -0,0 +1,102 @@ +import sys +import unicodedata +from configparser import RawConfigParser + +from .compat import py39 +from .warnings import SetuptoolsDeprecationWarning + + +# HFS Plus uses decomposed UTF-8 +def decompose(path): + if isinstance(path, str): + return unicodedata.normalize('NFD', path) + try: + path = path.decode('utf-8') + path = unicodedata.normalize('NFD', path) + path = path.encode('utf-8') + except UnicodeError: + pass # Not UTF-8 + return path + + +def filesys_decode(path): + """ + Ensure that the given path is decoded, + ``None`` when no expected encoding works + """ + + if isinstance(path, str): + return path + + fs_enc = sys.getfilesystemencoding() or 'utf-8' + candidates = fs_enc, 'utf-8' + + for enc in candidates: + try: + return path.decode(enc) + except UnicodeDecodeError: + continue + + return None + + +def try_encode(string, enc): + "turn unicode encoding into a functional routine" + try: + return string.encode(enc) + except UnicodeEncodeError: + return None + + +def _read_utf8_with_fallback(file: str, fallback_encoding=py39.LOCALE_ENCODING) -> str: + """ + First try to read the file with UTF-8, if there is an error fallback to a + different encoding ("locale" by default). Returns the content of the file. + Also useful when reading files that might have been produced by an older version of + setuptools. + """ + try: + with open(file, "r", encoding="utf-8") as f: + return f.read() + except UnicodeDecodeError: # pragma: no cover + _Utf8EncodingNeeded.emit(file=file, fallback_encoding=fallback_encoding) + with open(file, "r", encoding=fallback_encoding) as f: + return f.read() + + +def _cfg_read_utf8_with_fallback( + cfg: RawConfigParser, file: str, fallback_encoding=py39.LOCALE_ENCODING +) -> None: + """Same idea as :func:`_read_utf8_with_fallback`, but for the + :meth:`RawConfigParser.read` method. + + This method may call ``cfg.clear()``. + """ + try: + cfg.read(file, encoding="utf-8") + except UnicodeDecodeError: # pragma: no cover + _Utf8EncodingNeeded.emit(file=file, fallback_encoding=fallback_encoding) + cfg.clear() + cfg.read(file, encoding=fallback_encoding) + + +class _Utf8EncodingNeeded(SetuptoolsDeprecationWarning): + _SUMMARY = """ + `encoding="utf-8"` fails with {file!r}, trying `encoding={fallback_encoding!r}`. + """ + + _DETAILS = """ + Fallback behaviour for UTF-8 is considered **deprecated** and future versions of + `setuptools` may not implement it. + + Please encode {file!r} with "utf-8" to ensure future builds will succeed. + + If this file was produced by `setuptools` itself, cleaning up the cached files + and re-building/re-installing the package with a newer version of `setuptools` + (e.g. by updating `build-system.requires` in its `pyproject.toml`) + might solve the problem. + """ + # TODO: Add a deadline? + # Will we be able to remove this? + # The question comes to mind mainly because of sdists that have been produced + # by old versions of setuptools and published to PyPI... diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/wheel.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/wheel.py new file mode 100644 index 0000000000000000000000000000000000000000..c7ca43b5cfb2aff8d6983bbba4b7e6fdc9d01f83 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/setuptools/wheel.py @@ -0,0 +1,236 @@ +"""Wheels support.""" + +import contextlib +import email +import functools +import itertools +import os +import posixpath +import re +import zipfile + +from packaging.tags import sys_tags +from packaging.utils import canonicalize_name +from packaging.version import Version as parse_version + +import setuptools +from setuptools.archive_util import _unpack_zipfile_obj +from setuptools.command.egg_info import _egg_basename, write_requirements + +from .unicode_utils import _read_utf8_with_fallback + +from distutils.util import get_platform + +WHEEL_NAME = re.compile( + r"""^(?P.+?)-(?P\d.*?) + ((-(?P\d.*?))?-(?P.+?)-(?P.+?)-(?P.+?) + )\.whl$""", + re.VERBOSE, +).match + +NAMESPACE_PACKAGE_INIT = "__import__('pkg_resources').declare_namespace(__name__)\n" + + +@functools.cache +def _get_supported_tags(): + # We calculate the supported tags only once, otherwise calling + # this method on thousands of wheels takes seconds instead of + # milliseconds. + return {(t.interpreter, t.abi, t.platform) for t in sys_tags()} + + +def unpack(src_dir, dst_dir) -> None: + """Move everything under `src_dir` to `dst_dir`, and delete the former.""" + for dirpath, dirnames, filenames in os.walk(src_dir): + subdir = os.path.relpath(dirpath, src_dir) + for f in filenames: + src = os.path.join(dirpath, f) + dst = os.path.join(dst_dir, subdir, f) + os.renames(src, dst) + for n, d in reversed(list(enumerate(dirnames))): + src = os.path.join(dirpath, d) + dst = os.path.join(dst_dir, subdir, d) + if not os.path.exists(dst): + # Directory does not exist in destination, + # rename it and prune it from os.walk list. + os.renames(src, dst) + del dirnames[n] + # Cleanup. + for dirpath, dirnames, filenames in os.walk(src_dir, topdown=True): + assert not filenames + os.rmdir(dirpath) + + +@contextlib.contextmanager +def disable_info_traces(): + """ + Temporarily disable info traces. + """ + from distutils import log + + saved = log.set_threshold(log.WARN) + try: + yield + finally: + log.set_threshold(saved) + + +class Wheel: + def __init__(self, filename) -> None: + match = WHEEL_NAME(os.path.basename(filename)) + if match is None: + raise ValueError(f'invalid wheel name: {filename!r}') + self.filename = filename + for k, v in match.groupdict().items(): + setattr(self, k, v) + + def tags(self): + """List tags (py_version, abi, platform) supported by this wheel.""" + return itertools.product( + self.py_version.split('.'), + self.abi.split('.'), + self.platform.split('.'), + ) + + def is_compatible(self): + """Is the wheel compatible with the current platform?""" + return next((True for t in self.tags() if t in _get_supported_tags()), False) + + def egg_name(self): + return ( + _egg_basename( + self.project_name, + self.version, + platform=(None if self.platform == 'any' else get_platform()), + ) + + ".egg" + ) + + def get_dist_info(self, zf): + # find the correct name of the .dist-info dir in the wheel file + for member in zf.namelist(): + dirname = posixpath.dirname(member) + if dirname.endswith('.dist-info') and canonicalize_name(dirname).startswith( + canonicalize_name(self.project_name) + ): + return dirname + raise ValueError("unsupported wheel format. .dist-info not found") + + def install_as_egg(self, destination_eggdir) -> None: + """Install wheel as an egg directory.""" + with zipfile.ZipFile(self.filename) as zf: + self._install_as_egg(destination_eggdir, zf) + + def _install_as_egg(self, destination_eggdir, zf): + dist_basename = f'{self.project_name}-{self.version}' + dist_info = self.get_dist_info(zf) + dist_data = f'{dist_basename}.data' + egg_info = os.path.join(destination_eggdir, 'EGG-INFO') + + self._convert_metadata(zf, destination_eggdir, dist_info, egg_info) + self._move_data_entries(destination_eggdir, dist_data) + self._fix_namespace_packages(egg_info, destination_eggdir) + + @staticmethod + def _convert_metadata(zf, destination_eggdir, dist_info, egg_info): + import pkg_resources + + def get_metadata(name): + with zf.open(posixpath.join(dist_info, name)) as fp: + value = fp.read().decode('utf-8') + return email.parser.Parser().parsestr(value) + + wheel_metadata = get_metadata('WHEEL') + # Check wheel format version is supported. + wheel_version = parse_version(wheel_metadata.get('Wheel-Version')) + wheel_v1 = parse_version('1.0') <= wheel_version < parse_version('2.0dev0') + if not wheel_v1: + raise ValueError(f'unsupported wheel format version: {wheel_version}') + # Extract to target directory. + _unpack_zipfile_obj(zf, destination_eggdir) + # Convert metadata. + dist_info = os.path.join(destination_eggdir, dist_info) + dist = pkg_resources.Distribution.from_location( + destination_eggdir, + dist_info, + metadata=pkg_resources.PathMetadata(destination_eggdir, dist_info), + ) + + # Note: Evaluate and strip markers now, + # as it's difficult to convert back from the syntax: + # foobar; "linux" in sys_platform and extra == 'test' + def raw_req(req): + req.marker = None + return str(req) + + install_requires = list(map(raw_req, dist.requires())) + extras_require = { + extra: [ + req + for req in map(raw_req, dist.requires((extra,))) + if req not in install_requires + ] + for extra in dist.extras + } + os.rename(dist_info, egg_info) + os.rename( + os.path.join(egg_info, 'METADATA'), + os.path.join(egg_info, 'PKG-INFO'), + ) + setup_dist = setuptools.Distribution( + attrs=dict( + install_requires=install_requires, + extras_require=extras_require, + ), + ) + with disable_info_traces(): + write_requirements( + setup_dist.get_command_obj('egg_info'), + None, + os.path.join(egg_info, 'requires.txt'), + ) + + @staticmethod + def _move_data_entries(destination_eggdir, dist_data): + """Move data entries to their correct location.""" + dist_data = os.path.join(destination_eggdir, dist_data) + dist_data_scripts = os.path.join(dist_data, 'scripts') + if os.path.exists(dist_data_scripts): + egg_info_scripts = os.path.join(destination_eggdir, 'EGG-INFO', 'scripts') + os.mkdir(egg_info_scripts) + for entry in os.listdir(dist_data_scripts): + # Remove bytecode, as it's not properly handled + # during easy_install scripts install phase. + if entry.endswith('.pyc'): + os.unlink(os.path.join(dist_data_scripts, entry)) + else: + os.rename( + os.path.join(dist_data_scripts, entry), + os.path.join(egg_info_scripts, entry), + ) + os.rmdir(dist_data_scripts) + for subdir in filter( + os.path.exists, + ( + os.path.join(dist_data, d) + for d in ('data', 'headers', 'purelib', 'platlib') + ), + ): + unpack(subdir, destination_eggdir) + if os.path.exists(dist_data): + os.rmdir(dist_data) + + @staticmethod + def _fix_namespace_packages(egg_info, destination_eggdir): + namespace_packages = os.path.join(egg_info, 'namespace_packages.txt') + if os.path.exists(namespace_packages): + namespace_packages = _read_utf8_with_fallback(namespace_packages).split() + + for mod in namespace_packages: + mod_dir = os.path.join(destination_eggdir, *mod.split('.')) + mod_init = os.path.join(mod_dir, '__init__.py') + if not os.path.exists(mod_dir): + os.mkdir(mod_dir) + if not os.path.exists(mod_init): + with open(mod_init, 'w', encoding="utf-8") as fp: + fp.write(NAMESPACE_PACKAGE_INIT) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..15f7a90cbd02e5c2cc933cf6aa0374cca68035f1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/__init__.py @@ -0,0 +1,23 @@ +import importlib +import os + +from ._core import ShellDetectionFailure + +__version__ = "1.5.4" + + +def detect_shell(pid=None, max_depth=10): + name = os.name + try: + impl = importlib.import_module(".{}".format(name), __name__) + except ImportError: + message = "Shell detection not implemented for {0!r}".format(name) + raise RuntimeError(message) + try: + get_shell = impl.get_shell + except AttributeError: + raise RuntimeError("get_shell not implemented for {0!r}".format(name)) + shell = get_shell(pid, max_depth=max_depth) + if shell: + return shell + raise ShellDetectionFailure() diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/_core.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/_core.py new file mode 100644 index 0000000000000000000000000000000000000000..13b65417c733b54e48b120e37f573c2baa6ef72b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/_core.py @@ -0,0 +1,11 @@ +SHELL_NAMES = ( + {"sh", "bash", "dash", "ash"} # Bourne. + | {"csh", "tcsh"} # C. + | {"ksh", "zsh", "fish"} # Common alternatives. + | {"cmd", "powershell", "pwsh"} # Microsoft. + | {"elvish", "xonsh", "nu"} # More exotic. +) + + +class ShellDetectionFailure(EnvironmentError): + pass diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/nt.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/nt.py new file mode 100644 index 0000000000000000000000000000000000000000..389551b223a761fa2f97e929b60bf3ca5baed94c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/shellingham/nt.py @@ -0,0 +1,163 @@ +import contextlib +import ctypes +import os + +from ctypes.wintypes import ( + BOOL, + CHAR, + DWORD, + HANDLE, + LONG, + LPWSTR, + MAX_PATH, + PDWORD, + ULONG, +) + +from shellingham._core import SHELL_NAMES + + +INVALID_HANDLE_VALUE = HANDLE(-1).value +ERROR_NO_MORE_FILES = 18 +ERROR_INSUFFICIENT_BUFFER = 122 +TH32CS_SNAPPROCESS = 2 +PROCESS_QUERY_LIMITED_INFORMATION = 0x1000 + + +kernel32 = ctypes.windll.kernel32 + + +def _check_handle(error_val=0): + def check(ret, func, args): + if ret == error_val: + raise ctypes.WinError() + return ret + + return check + + +def _check_expected(expected): + def check(ret, func, args): + if ret: + return True + code = ctypes.GetLastError() + if code == expected: + return False + raise ctypes.WinError(code) + + return check + + +class ProcessEntry32(ctypes.Structure): + _fields_ = ( + ("dwSize", DWORD), + ("cntUsage", DWORD), + ("th32ProcessID", DWORD), + ("th32DefaultHeapID", ctypes.POINTER(ULONG)), + ("th32ModuleID", DWORD), + ("cntThreads", DWORD), + ("th32ParentProcessID", DWORD), + ("pcPriClassBase", LONG), + ("dwFlags", DWORD), + ("szExeFile", CHAR * MAX_PATH), + ) + + +kernel32.CloseHandle.argtypes = [HANDLE] +kernel32.CloseHandle.restype = BOOL + +kernel32.CreateToolhelp32Snapshot.argtypes = [DWORD, DWORD] +kernel32.CreateToolhelp32Snapshot.restype = HANDLE +kernel32.CreateToolhelp32Snapshot.errcheck = _check_handle( # type: ignore + INVALID_HANDLE_VALUE, +) + +kernel32.Process32First.argtypes = [HANDLE, ctypes.POINTER(ProcessEntry32)] +kernel32.Process32First.restype = BOOL +kernel32.Process32First.errcheck = _check_expected( # type: ignore + ERROR_NO_MORE_FILES, +) + +kernel32.Process32Next.argtypes = [HANDLE, ctypes.POINTER(ProcessEntry32)] +kernel32.Process32Next.restype = BOOL +kernel32.Process32Next.errcheck = _check_expected( # type: ignore + ERROR_NO_MORE_FILES, +) + +kernel32.GetCurrentProcessId.argtypes = [] +kernel32.GetCurrentProcessId.restype = DWORD + +kernel32.OpenProcess.argtypes = [DWORD, BOOL, DWORD] +kernel32.OpenProcess.restype = HANDLE +kernel32.OpenProcess.errcheck = _check_handle( # type: ignore + INVALID_HANDLE_VALUE, +) + +kernel32.QueryFullProcessImageNameW.argtypes = [HANDLE, DWORD, LPWSTR, PDWORD] +kernel32.QueryFullProcessImageNameW.restype = BOOL +kernel32.QueryFullProcessImageNameW.errcheck = _check_expected( # type: ignore + ERROR_INSUFFICIENT_BUFFER, +) + + +@contextlib.contextmanager +def _handle(f, *args, **kwargs): + handle = f(*args, **kwargs) + try: + yield handle + finally: + kernel32.CloseHandle(handle) + + +def _iter_processes(): + f = kernel32.CreateToolhelp32Snapshot + with _handle(f, TH32CS_SNAPPROCESS, 0) as snap: + entry = ProcessEntry32() + entry.dwSize = ctypes.sizeof(entry) + ret = kernel32.Process32First(snap, entry) + while ret: + yield entry + ret = kernel32.Process32Next(snap, entry) + + +def _get_full_path(proch): + size = DWORD(MAX_PATH) + while True: + path_buff = ctypes.create_unicode_buffer("", size.value) + if kernel32.QueryFullProcessImageNameW(proch, 0, path_buff, size): + return path_buff.value + size.value *= 2 + + +def get_shell(pid=None, max_depth=10): + proc_map = { + proc.th32ProcessID: (proc.th32ParentProcessID, proc.szExeFile) + for proc in _iter_processes() + } + pid = pid or os.getpid() + + for _ in range(0, max_depth + 1): + try: + ppid, executable = proc_map[pid] + except KeyError: # No such process? Give up. + break + + # The executable name would be encoded with the current code page if + # we're in ANSI mode (usually). Try to decode it into str/unicode, + # replacing invalid characters to be safe (not thoeratically necessary, + # I think). Note that we need to use 'mbcs' instead of encoding + # settings from sys because this is from the Windows API, not Python + # internals (which those settings reflect). (pypa/pipenv#3382) + if isinstance(executable, bytes): + executable = executable.decode("mbcs", "replace") + + name = executable.rpartition(".")[0].lower() + if name not in SHELL_NAMES: + pid = ppid + continue + + key = PROCESS_QUERY_LIMITED_INFORMATION + with _handle(kernel32.OpenProcess, key, 0, pid) as proch: + return (name, _get_full_path(proch)) + + return None diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..63f2f19e409c7f4b3c6c064022e8f104227873aa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/__init__.py @@ -0,0 +1,17 @@ +"""Top-level package for sniffio.""" + +__all__ = [ + "current_async_library", + "AsyncLibraryNotFoundError", + "current_async_library_cvar", + "thread_local", +] + +from ._version import __version__ + +from ._impl import ( + current_async_library, + AsyncLibraryNotFoundError, + current_async_library_cvar, + thread_local, +) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/_impl.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/_impl.py new file mode 100644 index 0000000000000000000000000000000000000000..c1a7bbf218ba985b87cd1d9b23da69222894c1dd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/_impl.py @@ -0,0 +1,95 @@ +from contextvars import ContextVar +from typing import Optional +import sys +import threading + +current_async_library_cvar = ContextVar( + "current_async_library_cvar", default=None +) # type: ContextVar[Optional[str]] + + +class _ThreadLocal(threading.local): + # Since threading.local provides no explicit mechanism is for setting + # a default for a value, a custom class with a class attribute is used + # instead. + name = None # type: Optional[str] + + +thread_local = _ThreadLocal() + + +class AsyncLibraryNotFoundError(RuntimeError): + pass + + +def current_async_library() -> str: + """Detect which async library is currently running. + + The following libraries are currently supported: + + ================ =========== ============================ + Library Requires Magic string + ================ =========== ============================ + **Trio** Trio v0.6+ ``"trio"`` + **Curio** - ``"curio"`` + **asyncio** ``"asyncio"`` + **Trio-asyncio** v0.8.2+ ``"trio"`` or ``"asyncio"``, + depending on current mode + ================ =========== ============================ + + Returns: + A string like ``"trio"``. + + Raises: + AsyncLibraryNotFoundError: if called from synchronous context, + or if the current async library was not recognized. + + Examples: + + .. code-block:: python3 + + from sniffio import current_async_library + + async def generic_sleep(seconds): + library = current_async_library() + if library == "trio": + import trio + await trio.sleep(seconds) + elif library == "asyncio": + import asyncio + await asyncio.sleep(seconds) + # ... and so on ... + else: + raise RuntimeError(f"Unsupported library {library!r}") + + """ + value = thread_local.name + if value is not None: + return value + + value = current_async_library_cvar.get() + if value is not None: + return value + + # Need to sniff for asyncio + if "asyncio" in sys.modules: + import asyncio + try: + current_task = asyncio.current_task # type: ignore[attr-defined] + except AttributeError: + current_task = asyncio.Task.current_task # type: ignore[attr-defined] + try: + if current_task() is not None: + return "asyncio" + except RuntimeError: + pass + + # Sniff for curio (for now) + if 'curio' in sys.modules: + from curio.meta import curio_running + if curio_running(): + return 'curio' + + raise AsyncLibraryNotFoundError( + "unknown async library, or not in async context" + ) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/_version.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/_version.py new file mode 100644 index 0000000000000000000000000000000000000000..0495d10545c9fd515ed51e890309d2b66e2c30bb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/_version.py @@ -0,0 +1,3 @@ +# This file is imported from __init__.py and exec'd from setup.py + +__version__ = "1.3.1" diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/py.typed b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/sniffio/py.typed new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/LICENCE b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/LICENCE new file mode 100644 index 0000000000000000000000000000000000000000..a8922b182e80d9bcb955e8b8ae2bd9a017d72977 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/LICENCE @@ -0,0 +1,49 @@ +`tqdm` is a product of collaborative work. +Unless otherwise stated, all authors (see commit logs) retain copyright +for their respective work, and release the work under the MIT licence +(text below). + +Exceptions or notable authors are listed below +in reverse chronological order: + +* files: * + MPL-2.0 2015-2024 (c) Casper da Costa-Luis + [casperdcl](https://github.com/casperdcl). +* files: tqdm/_tqdm.py + MIT 2016 (c) [PR #96] on behalf of Google Inc. +* files: tqdm/_tqdm.py README.rst .gitignore + MIT 2013 (c) Noam Yorav-Raphael, original author. + +[PR #96]: https://github.com/tqdm/tqdm/pull/96 + + +Mozilla Public Licence (MPL) v. 2.0 - Exhibit A +----------------------------------------------- + +This Source Code Form is subject to the terms of the +Mozilla Public License, v. 2.0. +If a copy of the MPL was not distributed with this project, +You can obtain one at https://mozilla.org/MPL/2.0/. + + +MIT License (MIT) +----------------- + +Copyright (c) 2013 noamraph + +Permission is hereby granted, free of charge, to any person obtaining a copy of +this software and associated documentation files (the "Software"), to deal in +the Software without restriction, including without limitation the rights to +use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR +COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..181b4dc8b2f8697d1c0374a612ffd8b2f2db346a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/METADATA @@ -0,0 +1,1594 @@ +Metadata-Version: 2.1 +Name: tqdm +Version: 4.67.1 +Summary: Fast, Extensible Progress Meter +Maintainer-email: tqdm developers +License: MPL-2.0 AND MIT +Project-URL: homepage, https://tqdm.github.io +Project-URL: repository, https://github.com/tqdm/tqdm +Project-URL: changelog, https://tqdm.github.io/releases +Project-URL: wiki, https://github.com/tqdm/tqdm/wiki +Keywords: progressbar,progressmeter,progress,bar,meter,rate,eta,console,terminal,time +Classifier: Development Status :: 5 - Production/Stable +Classifier: Environment :: Console +Classifier: Environment :: MacOS X +Classifier: Environment :: Other Environment +Classifier: Environment :: Win32 (MS Windows) +Classifier: Environment :: X11 Applications +Classifier: Framework :: IPython +Classifier: Framework :: Jupyter +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Education +Classifier: Intended Audience :: End Users/Desktop +Classifier: Intended Audience :: Other Audience +Classifier: Intended Audience :: System Administrators +Classifier: License :: OSI Approved :: MIT License +Classifier: License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0) +Classifier: Operating System :: MacOS +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: Microsoft +Classifier: Operating System :: Microsoft :: MS-DOS +Classifier: Operating System :: Microsoft :: Windows +Classifier: Operating System :: POSIX +Classifier: Operating System :: POSIX :: BSD +Classifier: Operating System :: POSIX :: BSD :: FreeBSD +Classifier: Operating System :: POSIX :: Linux +Classifier: Operating System :: POSIX :: SunOS/Solaris +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.7 +Classifier: Programming Language :: Python :: 3.8 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation +Classifier: Programming Language :: Python :: Implementation :: IronPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Programming Language :: Unix Shell +Classifier: Topic :: Desktop Environment +Classifier: Topic :: Education :: Computer Aided Instruction (CAI) +Classifier: Topic :: Education :: Testing +Classifier: Topic :: Office/Business +Classifier: Topic :: Other/Nonlisted Topic +Classifier: Topic :: Software Development :: Build Tools +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Classifier: Topic :: Software Development :: Pre-processors +Classifier: Topic :: Software Development :: User Interfaces +Classifier: Topic :: System :: Installation/Setup +Classifier: Topic :: System :: Logging +Classifier: Topic :: System :: Monitoring +Classifier: Topic :: System :: Shells +Classifier: Topic :: Terminals +Classifier: Topic :: Utilities +Requires-Python: >=3.7 +Description-Content-Type: text/x-rst +License-File: LICENCE +Requires-Dist: colorama; platform_system == "Windows" +Provides-Extra: dev +Requires-Dist: pytest>=6; extra == "dev" +Requires-Dist: pytest-cov; extra == "dev" +Requires-Dist: pytest-timeout; extra == "dev" +Requires-Dist: pytest-asyncio>=0.24; extra == "dev" +Requires-Dist: nbval; extra == "dev" +Provides-Extra: discord +Requires-Dist: requests; extra == "discord" +Provides-Extra: slack +Requires-Dist: slack-sdk; extra == "slack" +Provides-Extra: telegram +Requires-Dist: requests; extra == "telegram" +Provides-Extra: notebook +Requires-Dist: ipywidgets>=6; extra == "notebook" + +|Logo| + +tqdm +==== + +|Py-Versions| |Versions| |Conda-Forge-Status| |Docker| |Snapcraft| + +|Build-Status| |Coverage-Status| |Branch-Coverage-Status| |Codacy-Grade| |Libraries-Rank| |PyPI-Downloads| + +|LICENCE| |OpenHub-Status| |binder-demo| |awesome-python| + +``tqdm`` derives from the Arabic word *taqaddum* (تقدّم) which can mean "progress," +and is an abbreviation for "I love you so much" in Spanish (*te quiero demasiado*). + +Instantly make your loops show a smart progress meter - just wrap any +iterable with ``tqdm(iterable)``, and you're done! + +.. code:: python + + from tqdm import tqdm + for i in tqdm(range(10000)): + ... + +``76%|████████████████████████        | 7568/10000 [00:33<00:10, 229.00it/s]`` + +``trange(N)`` can be also used as a convenient shortcut for +``tqdm(range(N))``. + +|Screenshot| + |Video| |Slides| |Merch| + +It can also be executed as a module with pipes: + +.. code:: sh + + $ seq 9999999 | tqdm --bytes | wc -l + 75.2MB [00:00, 217MB/s] + 9999999 + + $ tar -zcf - docs/ | tqdm --bytes --total `du -sb docs/ | cut -f1` \ + > backup.tgz + 32%|██████████▍ | 8.89G/27.9G [00:42<01:31, 223MB/s] + +Overhead is low -- about 60ns per iteration (80ns with ``tqdm.gui``), and is +unit tested against performance regression. +By comparison, the well-established +`ProgressBar `__ has +an 800ns/iter overhead. + +In addition to its low overhead, ``tqdm`` uses smart algorithms to predict +the remaining time and to skip unnecessary iteration displays, which allows +for a negligible overhead in most cases. + +``tqdm`` works on any platform +(Linux, Windows, Mac, FreeBSD, NetBSD, Solaris/SunOS), +in any console or in a GUI, and is also friendly with IPython/Jupyter notebooks. + +``tqdm`` does not require any dependencies (not even ``curses``!), just +Python and an environment supporting ``carriage return \r`` and +``line feed \n`` control characters. + +------------------------------------------ + +.. contents:: Table of contents + :backlinks: top + :local: + + +Installation +------------ + +Latest PyPI stable release +~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|Versions| |PyPI-Downloads| |Libraries-Dependents| + +.. code:: sh + + pip install tqdm + +Latest development release on GitHub +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +|GitHub-Status| |GitHub-Stars| |GitHub-Commits| |GitHub-Forks| |GitHub-Updated| + +Pull and install pre-release ``devel`` branch: + +.. code:: sh + + pip install "git+https://github.com/tqdm/tqdm.git@devel#egg=tqdm" + +Latest Conda release +~~~~~~~~~~~~~~~~~~~~ + +|Conda-Forge-Status| + +.. code:: sh + + conda install -c conda-forge tqdm + +Latest Snapcraft release +~~~~~~~~~~~~~~~~~~~~~~~~ + +|Snapcraft| + +There are 3 channels to choose from: + +.. code:: sh + + snap install tqdm # implies --stable, i.e. latest tagged release + snap install tqdm --candidate # master branch + snap install tqdm --edge # devel branch + +Note that ``snap`` binaries are purely for CLI use (not ``import``-able), and +automatically set up ``bash`` tab-completion. + +Latest Docker release +~~~~~~~~~~~~~~~~~~~~~ + +|Docker| + +.. code:: sh + + docker pull tqdm/tqdm + docker run -i --rm tqdm/tqdm --help + +Other +~~~~~ + +There are other (unofficial) places where ``tqdm`` may be downloaded, particularly for CLI use: + +|Repology| + +.. |Repology| image:: https://repology.org/badge/tiny-repos/python:tqdm.svg + :target: https://repology.org/project/python:tqdm/versions + +Changelog +--------- + +The list of all changes is available either on GitHub's Releases: +|GitHub-Status|, on the +`wiki `__, or on the +`website `__. + + +Usage +----- + +``tqdm`` is very versatile and can be used in a number of ways. +The three main ones are given below. + +Iterable-based +~~~~~~~~~~~~~~ + +Wrap ``tqdm()`` around any iterable: + +.. code:: python + + from tqdm import tqdm + from time import sleep + + text = "" + for char in tqdm(["a", "b", "c", "d"]): + sleep(0.25) + text = text + char + +``trange(i)`` is a special optimised instance of ``tqdm(range(i))``: + +.. code:: python + + from tqdm import trange + + for i in trange(100): + sleep(0.01) + +Instantiation outside of the loop allows for manual control over ``tqdm()``: + +.. code:: python + + pbar = tqdm(["a", "b", "c", "d"]) + for char in pbar: + sleep(0.25) + pbar.set_description("Processing %s" % char) + +Manual +~~~~~~ + +Manual control of ``tqdm()`` updates using a ``with`` statement: + +.. code:: python + + with tqdm(total=100) as pbar: + for i in range(10): + sleep(0.1) + pbar.update(10) + +If the optional variable ``total`` (or an iterable with ``len()``) is +provided, predictive stats are displayed. + +``with`` is also optional (you can just assign ``tqdm()`` to a variable, +but in this case don't forget to ``del`` or ``close()`` at the end: + +.. code:: python + + pbar = tqdm(total=100) + for i in range(10): + sleep(0.1) + pbar.update(10) + pbar.close() + +Module +~~~~~~ + +Perhaps the most wonderful use of ``tqdm`` is in a script or on the command +line. Simply inserting ``tqdm`` (or ``python -m tqdm``) between pipes will pass +through all ``stdin`` to ``stdout`` while printing progress to ``stderr``. + +The example below demonstrate counting the number of lines in all Python files +in the current directory, with timing information included. + +.. code:: sh + + $ time find . -name '*.py' -type f -exec cat \{} \; | wc -l + 857365 + + real 0m3.458s + user 0m0.274s + sys 0m3.325s + + $ time find . -name '*.py' -type f -exec cat \{} \; | tqdm | wc -l + 857366it [00:03, 246471.31it/s] + 857365 + + real 0m3.585s + user 0m0.862s + sys 0m3.358s + +Note that the usual arguments for ``tqdm`` can also be specified. + +.. code:: sh + + $ find . -name '*.py' -type f -exec cat \{} \; | + tqdm --unit loc --unit_scale --total 857366 >> /dev/null + 100%|█████████████████████████████████| 857K/857K [00:04<00:00, 246Kloc/s] + +Backing up a large directory? + +.. code:: sh + + $ tar -zcf - docs/ | tqdm --bytes --total `du -sb docs/ | cut -f1` \ + > backup.tgz + 44%|██████████████▊ | 153M/352M [00:14<00:18, 11.0MB/s] + +This can be beautified further: + +.. code:: sh + + $ BYTES=$(du -sb docs/ | cut -f1) + $ tar -cf - docs/ \ + | tqdm --bytes --total "$BYTES" --desc Processing | gzip \ + | tqdm --bytes --total "$BYTES" --desc Compressed --position 1 \ + > ~/backup.tgz + Processing: 100%|██████████████████████| 352M/352M [00:14<00:00, 30.2MB/s] + Compressed: 42%|█████████▎ | 148M/352M [00:14<00:19, 10.9MB/s] + +Or done on a file level using 7-zip: + +.. code:: sh + + $ 7z a -bd -r backup.7z docs/ | grep Compressing \ + | tqdm --total $(find docs/ -type f | wc -l) --unit files \ + | grep -v Compressing + 100%|██████████████████████████▉| 15327/15327 [01:00<00:00, 712.96files/s] + +Pre-existing CLI programs already outputting basic progress information will +benefit from ``tqdm``'s ``--update`` and ``--update_to`` flags: + +.. code:: sh + + $ seq 3 0.1 5 | tqdm --total 5 --update_to --null + 100%|████████████████████████████████████| 5.0/5 [00:00<00:00, 9673.21it/s] + $ seq 10 | tqdm --update --null # 1 + 2 + ... + 10 = 55 iterations + 55it [00:00, 90006.52it/s] + +FAQ and Known Issues +-------------------- + +|GitHub-Issues| + +The most common issues relate to excessive output on multiple lines, instead +of a neat one-line progress bar. + +- Consoles in general: require support for carriage return (``CR``, ``\r``). + + * Some cloud logging consoles which don't support ``\r`` properly + (`cloudwatch `__, + `K8s `__) may benefit from + ``export TQDM_POSITION=-1``. + +- Nested progress bars: + + * Consoles in general: require support for moving cursors up to the + previous line. For example, + `IDLE `__, + `ConEmu `__ and + `PyCharm `__ (also + `here `__, + `here `__, and + `here `__) + lack full support. + * Windows: additionally may require the Python module ``colorama`` + to ensure nested bars stay within their respective lines. + +- Unicode: + + * Environments which report that they support unicode will have solid smooth + progressbars. The fallback is an ``ascii``-only bar. + * Windows consoles often only partially support unicode and thus + `often require explicit ascii=True `__ + (also `here `__). This is due to + either normal-width unicode characters being incorrectly displayed as + "wide", or some unicode characters not rendering. + +- Wrapping generators: + + * Generator wrapper functions tend to hide the length of iterables. + ``tqdm`` does not. + * Replace ``tqdm(enumerate(...))`` with ``enumerate(tqdm(...))`` or + ``tqdm(enumerate(x), total=len(x), ...)``. + The same applies to ``numpy.ndenumerate``. + * Replace ``tqdm(zip(a, b))`` with ``zip(tqdm(a), b)`` or even + ``zip(tqdm(a), tqdm(b))``. + * The same applies to ``itertools``. + * Some useful convenience functions can be found under ``tqdm.contrib``. + +- `No intermediate output in docker-compose `__: + use ``docker-compose run`` instead of ``docker-compose up`` and ``tty: true``. + +- Overriding defaults via environment variables: + e.g. in CI/cloud jobs, ``export TQDM_MININTERVAL=5`` to avoid log spam. + This override logic is handled by the ``tqdm.utils.envwrap`` decorator + (useful independent of ``tqdm``). + +If you come across any other difficulties, browse and file |GitHub-Issues|. + +Documentation +------------- + +|Py-Versions| |README-Hits| (Since 19 May 2016) + +.. code:: python + + class tqdm(): + """ + Decorate an iterable object, returning an iterator which acts exactly + like the original iterable, but prints a dynamically updating + progressbar every time a value is requested. + """ + + @envwrap("TQDM_") # override defaults via env vars + def __init__(self, iterable=None, desc=None, total=None, leave=True, + file=None, ncols=None, mininterval=0.1, + maxinterval=10.0, miniters=None, ascii=None, disable=False, + unit='it', unit_scale=False, dynamic_ncols=False, + smoothing=0.3, bar_format=None, initial=0, position=None, + postfix=None, unit_divisor=1000, write_bytes=False, + lock_args=None, nrows=None, colour=None, delay=0): + +Parameters +~~~~~~~~~~ + +* iterable : iterable, optional + Iterable to decorate with a progressbar. + Leave blank to manually manage the updates. +* desc : str, optional + Prefix for the progressbar. +* total : int or float, optional + The number of expected iterations. If unspecified, + len(iterable) is used if possible. If float("inf") or as a last + resort, only basic progress statistics are displayed + (no ETA, no progressbar). + If ``gui`` is True and this parameter needs subsequent updating, + specify an initial arbitrary large positive number, + e.g. 9e9. +* leave : bool, optional + If [default: True], keeps all traces of the progressbar + upon termination of iteration. + If ``None``, will leave only if ``position`` is ``0``. +* file : ``io.TextIOWrapper`` or ``io.StringIO``, optional + Specifies where to output the progress messages + (default: sys.stderr). Uses ``file.write(str)`` and ``file.flush()`` + methods. For encoding, see ``write_bytes``. +* ncols : int, optional + The width of the entire output message. If specified, + dynamically resizes the progressbar to stay within this bound. + If unspecified, attempts to use environment width. The + fallback is a meter width of 10 and no limit for the counter and + statistics. If 0, will not print any meter (only stats). +* mininterval : float, optional + Minimum progress display update interval [default: 0.1] seconds. +* maxinterval : float, optional + Maximum progress display update interval [default: 10] seconds. + Automatically adjusts ``miniters`` to correspond to ``mininterval`` + after long display update lag. Only works if ``dynamic_miniters`` + or monitor thread is enabled. +* miniters : int or float, optional + Minimum progress display update interval, in iterations. + If 0 and ``dynamic_miniters``, will automatically adjust to equal + ``mininterval`` (more CPU efficient, good for tight loops). + If > 0, will skip display of specified number of iterations. + Tweak this and ``mininterval`` to get very efficient loops. + If your progress is erratic with both fast and slow iterations + (network, skipping items, etc) you should set miniters=1. +* ascii : bool or str, optional + If unspecified or False, use unicode (smooth blocks) to fill + the meter. The fallback is to use ASCII characters " 123456789#". +* disable : bool, optional + Whether to disable the entire progressbar wrapper + [default: False]. If set to None, disable on non-TTY. +* unit : str, optional + String that will be used to define the unit of each iteration + [default: it]. +* unit_scale : bool or int or float, optional + If 1 or True, the number of iterations will be reduced/scaled + automatically and a metric prefix following the + International System of Units standard will be added + (kilo, mega, etc.) [default: False]. If any other non-zero + number, will scale ``total`` and ``n``. +* dynamic_ncols : bool, optional + If set, constantly alters ``ncols`` and ``nrows`` to the + environment (allowing for window resizes) [default: False]. +* smoothing : float, optional + Exponential moving average smoothing factor for speed estimates + (ignored in GUI mode). Ranges from 0 (average speed) to 1 + (current/instantaneous speed) [default: 0.3]. +* bar_format : str, optional + Specify a custom bar string formatting. May impact performance. + [default: '{l_bar}{bar}{r_bar}'], where + l_bar='{desc}: {percentage:3.0f}%|' and + r_bar='| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, ' + '{rate_fmt}{postfix}]' + Possible vars: l_bar, bar, r_bar, n, n_fmt, total, total_fmt, + percentage, elapsed, elapsed_s, ncols, nrows, desc, unit, + rate, rate_fmt, rate_noinv, rate_noinv_fmt, + rate_inv, rate_inv_fmt, postfix, unit_divisor, + remaining, remaining_s, eta. + Note that a trailing ": " is automatically removed after {desc} + if the latter is empty. +* initial : int or float, optional + The initial counter value. Useful when restarting a progress + bar [default: 0]. If using float, consider specifying ``{n:.3f}`` + or similar in ``bar_format``, or specifying ``unit_scale``. +* position : int, optional + Specify the line offset to print this bar (starting from 0) + Automatic if unspecified. + Useful to manage multiple bars at once (eg, from threads). +* postfix : dict or ``*``, optional + Specify additional stats to display at the end of the bar. + Calls ``set_postfix(**postfix)`` if possible (dict). +* unit_divisor : float, optional + [default: 1000], ignored unless ``unit_scale`` is True. +* write_bytes : bool, optional + Whether to write bytes. If (default: False) will write unicode. +* lock_args : tuple, optional + Passed to ``refresh`` for intermediate output + (initialisation, iterating, and updating). +* nrows : int, optional + The screen height. If specified, hides nested bars outside this + bound. If unspecified, attempts to use environment height. + The fallback is 20. +* colour : str, optional + Bar colour (e.g. 'green', '#00ff00'). +* delay : float, optional + Don't display until [default: 0] seconds have elapsed. + +Extra CLI Options +~~~~~~~~~~~~~~~~~ + +* delim : chr, optional + Delimiting character [default: '\n']. Use '\0' for null. + N.B.: on Windows systems, Python converts '\n' to '\r\n'. +* buf_size : int, optional + String buffer size in bytes [default: 256] + used when ``delim`` is specified. +* bytes : bool, optional + If true, will count bytes, ignore ``delim``, and default + ``unit_scale`` to True, ``unit_divisor`` to 1024, and ``unit`` to 'B'. +* tee : bool, optional + If true, passes ``stdin`` to both ``stderr`` and ``stdout``. +* update : bool, optional + If true, will treat input as newly elapsed iterations, + i.e. numbers to pass to ``update()``. Note that this is slow + (~2e5 it/s) since every input must be decoded as a number. +* update_to : bool, optional + If true, will treat input as total elapsed iterations, + i.e. numbers to assign to ``self.n``. Note that this is slow + (~2e5 it/s) since every input must be decoded as a number. +* null : bool, optional + If true, will discard input (no stdout). +* manpath : str, optional + Directory in which to install tqdm man pages. +* comppath : str, optional + Directory in which to place tqdm completion. +* log : str, optional + CRITICAL|FATAL|ERROR|WARN(ING)|[default: 'INFO']|DEBUG|NOTSET. + +Returns +~~~~~~~ + +* out : decorated iterator. + +.. code:: python + + class tqdm(): + def update(self, n=1): + """ + Manually update the progress bar, useful for streams + such as reading files. + E.g.: + >>> t = tqdm(total=filesize) # Initialise + >>> for current_buffer in stream: + ... ... + ... t.update(len(current_buffer)) + >>> t.close() + The last line is highly recommended, but possibly not necessary if + ``t.update()`` will be called in such a way that ``filesize`` will be + exactly reached and printed. + + Parameters + ---------- + n : int or float, optional + Increment to add to the internal counter of iterations + [default: 1]. If using float, consider specifying ``{n:.3f}`` + or similar in ``bar_format``, or specifying ``unit_scale``. + + Returns + ------- + out : bool or None + True if a ``display()`` was triggered. + """ + + def close(self): + """Cleanup and (if leave=False) close the progressbar.""" + + def clear(self, nomove=False): + """Clear current bar display.""" + + def refresh(self): + """ + Force refresh the display of this bar. + + Parameters + ---------- + nolock : bool, optional + If ``True``, does not lock. + If [default: ``False``]: calls ``acquire()`` on internal lock. + lock_args : tuple, optional + Passed to internal lock's ``acquire()``. + If specified, will only ``display()`` if ``acquire()`` returns ``True``. + """ + + def unpause(self): + """Restart tqdm timer from last print time.""" + + def reset(self, total=None): + """ + Resets to 0 iterations for repeated use. + + Consider combining with ``leave=True``. + + Parameters + ---------- + total : int or float, optional. Total to use for the new bar. + """ + + def set_description(self, desc=None, refresh=True): + """ + Set/modify description of the progress bar. + + Parameters + ---------- + desc : str, optional + refresh : bool, optional + Forces refresh [default: True]. + """ + + def set_postfix(self, ordered_dict=None, refresh=True, **tqdm_kwargs): + """ + Set/modify postfix (additional stats) + with automatic formatting based on datatype. + + Parameters + ---------- + ordered_dict : dict or OrderedDict, optional + refresh : bool, optional + Forces refresh [default: True]. + kwargs : dict, optional + """ + + @classmethod + def write(cls, s, file=sys.stdout, end="\n"): + """Print a message via tqdm (without overlap with bars).""" + + @property + def format_dict(self): + """Public API for read-only member access.""" + + def display(self, msg=None, pos=None): + """ + Use ``self.sp`` to display ``msg`` in the specified ``pos``. + + Consider overloading this function when inheriting to use e.g.: + ``self.some_frontend(**self.format_dict)`` instead of ``self.sp``. + + Parameters + ---------- + msg : str, optional. What to display (default: ``repr(self)``). + pos : int, optional. Position to ``moveto`` + (default: ``abs(self.pos)``). + """ + + @classmethod + @contextmanager + def wrapattr(cls, stream, method, total=None, bytes=True, **tqdm_kwargs): + """ + stream : file-like object. + method : str, "read" or "write". The result of ``read()`` and + the first argument of ``write()`` should have a ``len()``. + + >>> with tqdm.wrapattr(file_obj, "read", total=file_obj.size) as fobj: + ... while True: + ... chunk = fobj.read(chunk_size) + ... if not chunk: + ... break + """ + + @classmethod + def pandas(cls, *targs, **tqdm_kwargs): + """Registers the current `tqdm` class with `pandas`.""" + + def trange(*args, **tqdm_kwargs): + """Shortcut for `tqdm(range(*args), **tqdm_kwargs)`.""" + +Convenience Functions +~~~~~~~~~~~~~~~~~~~~~ + +.. code:: python + + def tqdm.contrib.tenumerate(iterable, start=0, total=None, + tqdm_class=tqdm.auto.tqdm, **tqdm_kwargs): + """Equivalent of `numpy.ndenumerate` or builtin `enumerate`.""" + + def tqdm.contrib.tzip(iter1, *iter2plus, **tqdm_kwargs): + """Equivalent of builtin `zip`.""" + + def tqdm.contrib.tmap(function, *sequences, **tqdm_kwargs): + """Equivalent of builtin `map`.""" + +Submodules +~~~~~~~~~~ + +.. code:: python + + class tqdm.notebook.tqdm(tqdm.tqdm): + """IPython/Jupyter Notebook widget.""" + + class tqdm.auto.tqdm(tqdm.tqdm): + """Automatically chooses beween `tqdm.notebook` and `tqdm.tqdm`.""" + + class tqdm.asyncio.tqdm(tqdm.tqdm): + """Asynchronous version.""" + @classmethod + def as_completed(cls, fs, *, loop=None, timeout=None, total=None, + **tqdm_kwargs): + """Wrapper for `asyncio.as_completed`.""" + + class tqdm.gui.tqdm(tqdm.tqdm): + """Matplotlib GUI version.""" + + class tqdm.tk.tqdm(tqdm.tqdm): + """Tkinter GUI version.""" + + class tqdm.rich.tqdm(tqdm.tqdm): + """`rich.progress` version.""" + + class tqdm.keras.TqdmCallback(keras.callbacks.Callback): + """Keras callback for epoch and batch progress.""" + + class tqdm.dask.TqdmCallback(dask.callbacks.Callback): + """Dask callback for task progress.""" + + +``contrib`` ++++++++++++ + +The ``tqdm.contrib`` package also contains experimental modules: + +- ``tqdm.contrib.itertools``: Thin wrappers around ``itertools`` +- ``tqdm.contrib.concurrent``: Thin wrappers around ``concurrent.futures`` +- ``tqdm.contrib.slack``: Posts to `Slack `__ bots +- ``tqdm.contrib.discord``: Posts to `Discord `__ bots +- ``tqdm.contrib.telegram``: Posts to `Telegram `__ bots +- ``tqdm.contrib.bells``: Automagically enables all optional features + + * ``auto``, ``pandas``, ``slack``, ``discord``, ``telegram`` + +Examples and Advanced Usage +--------------------------- + +- See the `examples `__ + folder; +- import the module and run ``help()``; +- consult the `wiki `__; + + * this has an + `excellent article `__ + on how to make a **great** progressbar; + +- check out the `slides from PyData London `__, or +- run the |binder-demo|. + +Description and additional stats +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Custom information can be displayed and updated dynamically on ``tqdm`` bars +with the ``desc`` and ``postfix`` arguments: + +.. code:: python + + from tqdm import tqdm, trange + from random import random, randint + from time import sleep + + with trange(10) as t: + for i in t: + # Description will be displayed on the left + t.set_description('GEN %i' % i) + # Postfix will be displayed on the right, + # formatted automatically based on argument's datatype + t.set_postfix(loss=random(), gen=randint(1,999), str='h', + lst=[1, 2]) + sleep(0.1) + + with tqdm(total=10, bar_format="{postfix[0]} {postfix[1][value]:>8.2g}", + postfix=["Batch", {"value": 0}]) as t: + for i in range(10): + sleep(0.1) + t.postfix[1]["value"] = i / 2 + t.update() + +Points to remember when using ``{postfix[...]}`` in the ``bar_format`` string: + +- ``postfix`` also needs to be passed as an initial argument in a compatible + format, and +- ``postfix`` will be auto-converted to a string if it is a ``dict``-like + object. To prevent this behaviour, insert an extra item into the dictionary + where the key is not a string. + +Additional ``bar_format`` parameters may also be defined by overriding +``format_dict``, and the bar itself may be modified using ``ascii``: + +.. code:: python + + from tqdm import tqdm + class TqdmExtraFormat(tqdm): + """Provides a `total_time` format parameter""" + @property + def format_dict(self): + d = super().format_dict + total_time = d["elapsed"] * (d["total"] or 0) / max(d["n"], 1) + d.update(total_time=self.format_interval(total_time) + " in total") + return d + + for i in TqdmExtraFormat( + range(9), ascii=" .oO0", + bar_format="{total_time}: {percentage:.0f}%|{bar}{r_bar}"): + if i == 4: + break + +.. code:: + + 00:00 in total: 44%|0000. | 4/9 [00:00<00:00, 962.93it/s] + +Note that ``{bar}`` also supports a format specifier ``[width][type]``. + +- ``width`` + + * unspecified (default): automatic to fill ``ncols`` + * ``int >= 0``: fixed width overriding ``ncols`` logic + * ``int < 0``: subtract from the automatic default + +- ``type`` + + * ``a``: ascii (``ascii=True`` override) + * ``u``: unicode (``ascii=False`` override) + * ``b``: blank (``ascii=" "`` override) + +This means a fixed bar with right-justified text may be created by using: +``bar_format="{l_bar}{bar:10}|{bar:-10b}right-justified"`` + +Nested progress bars +~~~~~~~~~~~~~~~~~~~~ + +``tqdm`` supports nested progress bars. Here's an example: + +.. code:: python + + from tqdm.auto import trange + from time import sleep + + for i in trange(4, desc='1st loop'): + for j in trange(5, desc='2nd loop'): + for k in trange(50, desc='3rd loop', leave=False): + sleep(0.01) + +For manual control over positioning (e.g. for multi-processing use), +you may specify ``position=n`` where ``n=0`` for the outermost bar, +``n=1`` for the next, and so on. +However, it's best to check if ``tqdm`` can work without manual ``position`` +first. + +.. code:: python + + from time import sleep + from tqdm import trange, tqdm + from multiprocessing import Pool, RLock, freeze_support + + L = list(range(9)) + + def progresser(n): + interval = 0.001 / (n + 2) + total = 5000 + text = f"#{n}, est. {interval * total:<04.2}s" + for _ in trange(total, desc=text, position=n): + sleep(interval) + + if __name__ == '__main__': + freeze_support() # for Windows support + tqdm.set_lock(RLock()) # for managing output contention + p = Pool(initializer=tqdm.set_lock, initargs=(tqdm.get_lock(),)) + p.map(progresser, L) + +Note that in Python 3, ``tqdm.write`` is thread-safe: + +.. code:: python + + from time import sleep + from tqdm import tqdm, trange + from concurrent.futures import ThreadPoolExecutor + + L = list(range(9)) + + def progresser(n): + interval = 0.001 / (n + 2) + total = 5000 + text = f"#{n}, est. {interval * total:<04.2}s" + for _ in trange(total, desc=text): + sleep(interval) + if n == 6: + tqdm.write("n == 6 completed.") + tqdm.write("`tqdm.write()` is thread-safe in py3!") + + if __name__ == '__main__': + with ThreadPoolExecutor() as p: + p.map(progresser, L) + +Hooks and callbacks +~~~~~~~~~~~~~~~~~~~ + +``tqdm`` can easily support callbacks/hooks and manual updates. +Here's an example with ``urllib``: + +**``urllib.urlretrieve`` documentation** + + | [...] + | If present, the hook function will be called once + | on establishment of the network connection and once after each block read + | thereafter. The hook will be passed three arguments; a count of blocks + | transferred so far, a block size in bytes, and the total size of the file. + | [...] + +.. code:: python + + import urllib, os + from tqdm import tqdm + urllib = getattr(urllib, 'request', urllib) + + class TqdmUpTo(tqdm): + """Provides `update_to(n)` which uses `tqdm.update(delta_n)`.""" + def update_to(self, b=1, bsize=1, tsize=None): + """ + b : int, optional + Number of blocks transferred so far [default: 1]. + bsize : int, optional + Size of each block (in tqdm units) [default: 1]. + tsize : int, optional + Total size (in tqdm units). If [default: None] remains unchanged. + """ + if tsize is not None: + self.total = tsize + return self.update(b * bsize - self.n) # also sets self.n = b * bsize + + eg_link = "https://caspersci.uk.to/matryoshka.zip" + with TqdmUpTo(unit='B', unit_scale=True, unit_divisor=1024, miniters=1, + desc=eg_link.split('/')[-1]) as t: # all optional kwargs + urllib.urlretrieve(eg_link, filename=os.devnull, + reporthook=t.update_to, data=None) + t.total = t.n + +Inspired by `twine#242 `__. +Functional alternative in +`examples/tqdm_wget.py `__. + +It is recommend to use ``miniters=1`` whenever there is potentially +large differences in iteration speed (e.g. downloading a file over +a patchy connection). + +**Wrapping read/write methods** + +To measure throughput through a file-like object's ``read`` or ``write`` +methods, use ``CallbackIOWrapper``: + +.. code:: python + + from tqdm.auto import tqdm + from tqdm.utils import CallbackIOWrapper + + with tqdm(total=file_obj.size, + unit='B', unit_scale=True, unit_divisor=1024) as t: + fobj = CallbackIOWrapper(t.update, file_obj, "read") + while True: + chunk = fobj.read(chunk_size) + if not chunk: + break + t.reset() + # ... continue to use `t` for something else + +Alternatively, use the even simpler ``wrapattr`` convenience function, +which would condense both the ``urllib`` and ``CallbackIOWrapper`` examples +down to: + +.. code:: python + + import urllib, os + from tqdm import tqdm + + eg_link = "https://caspersci.uk.to/matryoshka.zip" + response = getattr(urllib, 'request', urllib).urlopen(eg_link) + with tqdm.wrapattr(open(os.devnull, "wb"), "write", + miniters=1, desc=eg_link.split('/')[-1], + total=getattr(response, 'length', None)) as fout: + for chunk in response: + fout.write(chunk) + +The ``requests`` equivalent is nearly identical: + +.. code:: python + + import requests, os + from tqdm import tqdm + + eg_link = "https://caspersci.uk.to/matryoshka.zip" + response = requests.get(eg_link, stream=True) + with tqdm.wrapattr(open(os.devnull, "wb"), "write", + miniters=1, desc=eg_link.split('/')[-1], + total=int(response.headers.get('content-length', 0))) as fout: + for chunk in response.iter_content(chunk_size=4096): + fout.write(chunk) + +**Custom callback** + +``tqdm`` is known for intelligently skipping unnecessary displays. To make a +custom callback take advantage of this, simply use the return value of +``update()``. This is set to ``True`` if a ``display()`` was triggered. + +.. code:: python + + from tqdm.auto import tqdm as std_tqdm + + def external_callback(*args, **kwargs): + ... + + class TqdmExt(std_tqdm): + def update(self, n=1): + displayed = super().update(n) + if displayed: + external_callback(**self.format_dict) + return displayed + +``asyncio`` +~~~~~~~~~~~ + +Note that ``break`` isn't currently caught by asynchronous iterators. +This means that ``tqdm`` cannot clean up after itself in this case: + +.. code:: python + + from tqdm.asyncio import tqdm + + async for i in tqdm(range(9)): + if i == 2: + break + +Instead, either call ``pbar.close()`` manually or use the context manager syntax: + +.. code:: python + + from tqdm.asyncio import tqdm + + with tqdm(range(9)) as pbar: + async for i in pbar: + if i == 2: + break + +Pandas Integration +~~~~~~~~~~~~~~~~~~ + +Due to popular demand we've added support for ``pandas`` -- here's an example +for ``DataFrame.progress_apply`` and ``DataFrameGroupBy.progress_apply``: + +.. code:: python + + import pandas as pd + import numpy as np + from tqdm import tqdm + + df = pd.DataFrame(np.random.randint(0, 100, (100000, 6))) + + # Register `pandas.progress_apply` and `pandas.Series.map_apply` with `tqdm` + # (can use `tqdm.gui.tqdm`, `tqdm.notebook.tqdm`, optional kwargs, etc.) + tqdm.pandas(desc="my bar!") + + # Now you can use `progress_apply` instead of `apply` + # and `progress_map` instead of `map` + df.progress_apply(lambda x: x**2) + # can also groupby: + # df.groupby(0).progress_apply(lambda x: x**2) + +In case you're interested in how this works (and how to modify it for your +own callbacks), see the +`examples `__ +folder or import the module and run ``help()``. + +Keras Integration +~~~~~~~~~~~~~~~~~ + +A ``keras`` callback is also available: + +.. code:: python + + from tqdm.keras import TqdmCallback + + ... + + model.fit(..., verbose=0, callbacks=[TqdmCallback()]) + +Dask Integration +~~~~~~~~~~~~~~~~ + +A ``dask`` callback is also available: + +.. code:: python + + from tqdm.dask import TqdmCallback + + with TqdmCallback(desc="compute"): + ... + arr.compute() + + # or use callback globally + cb = TqdmCallback(desc="global") + cb.register() + arr.compute() + +IPython/Jupyter Integration +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +IPython/Jupyter is supported via the ``tqdm.notebook`` submodule: + +.. code:: python + + from tqdm.notebook import trange, tqdm + from time import sleep + + for i in trange(3, desc='1st loop'): + for j in tqdm(range(100), desc='2nd loop'): + sleep(0.01) + +In addition to ``tqdm`` features, the submodule provides a native Jupyter +widget (compatible with IPython v1-v4 and Jupyter), fully working nested bars +and colour hints (blue: normal, green: completed, red: error/interrupt, +light blue: no ETA); as demonstrated below. + +|Screenshot-Jupyter1| +|Screenshot-Jupyter2| +|Screenshot-Jupyter3| + +The ``notebook`` version supports percentage or pixels for overall width +(e.g.: ``ncols='100%'`` or ``ncols='480px'``). + +It is also possible to let ``tqdm`` automatically choose between +console or notebook versions by using the ``autonotebook`` submodule: + +.. code:: python + + from tqdm.autonotebook import tqdm + tqdm.pandas() + +Note that this will issue a ``TqdmExperimentalWarning`` if run in a notebook +since it is not meant to be possible to distinguish between ``jupyter notebook`` +and ``jupyter console``. Use ``auto`` instead of ``autonotebook`` to suppress +this warning. + +Note that notebooks will display the bar in the cell where it was created. +This may be a different cell from the one where it is used. +If this is not desired, either + +- delay the creation of the bar to the cell where it must be displayed, or +- create the bar with ``display=False``, and in a later cell call + ``display(bar.container)``: + +.. code:: python + + from tqdm.notebook import tqdm + pbar = tqdm(..., display=False) + +.. code:: python + + # different cell + display(pbar.container) + +The ``keras`` callback has a ``display()`` method which can be used likewise: + +.. code:: python + + from tqdm.keras import TqdmCallback + cbk = TqdmCallback(display=False) + +.. code:: python + + # different cell + cbk.display() + model.fit(..., verbose=0, callbacks=[cbk]) + +Another possibility is to have a single bar (near the top of the notebook) +which is constantly re-used (using ``reset()`` rather than ``close()``). +For this reason, the notebook version (unlike the CLI version) does not +automatically call ``close()`` upon ``Exception``. + +.. code:: python + + from tqdm.notebook import tqdm + pbar = tqdm() + +.. code:: python + + # different cell + iterable = range(100) + pbar.reset(total=len(iterable)) # initialise with new `total` + for i in iterable: + pbar.update() + pbar.refresh() # force print final status but don't `close()` + +Custom Integration +~~~~~~~~~~~~~~~~~~ + +To change the default arguments (such as making ``dynamic_ncols=True``), +simply use built-in Python magic: + +.. code:: python + + from functools import partial + from tqdm import tqdm as std_tqdm + tqdm = partial(std_tqdm, dynamic_ncols=True) + +For further customisation, +``tqdm`` may be inherited from to create custom callbacks (as with the +``TqdmUpTo`` example `above <#hooks-and-callbacks>`__) or for custom frontends +(e.g. GUIs such as notebook or plotting packages). In the latter case: + +1. ``def __init__()`` to call ``super().__init__(..., gui=True)`` to disable + terminal ``status_printer`` creation. +2. Redefine: ``close()``, ``clear()``, ``display()``. + +Consider overloading ``display()`` to use e.g. +``self.frontend(**self.format_dict)`` instead of ``self.sp(repr(self))``. + +Some submodule examples of inheritance: + +- `tqdm/notebook.py `__ +- `tqdm/gui.py `__ +- `tqdm/tk.py `__ +- `tqdm/contrib/slack.py `__ +- `tqdm/contrib/discord.py `__ +- `tqdm/contrib/telegram.py `__ + +Dynamic Monitor/Meter +~~~~~~~~~~~~~~~~~~~~~ + +You can use a ``tqdm`` as a meter which is not monotonically increasing. +This could be because ``n`` decreases (e.g. a CPU usage monitor) or ``total`` +changes. + +One example would be recursively searching for files. The ``total`` is the +number of objects found so far, while ``n`` is the number of those objects which +are files (rather than folders): + +.. code:: python + + from tqdm import tqdm + import os.path + + def find_files_recursively(path, show_progress=True): + files = [] + # total=1 assumes `path` is a file + t = tqdm(total=1, unit="file", disable=not show_progress) + if not os.path.exists(path): + raise IOError("Cannot find:" + path) + + def append_found_file(f): + files.append(f) + t.update() + + def list_found_dir(path): + """returns os.listdir(path) assuming os.path.isdir(path)""" + listing = os.listdir(path) + # subtract 1 since a "file" we found was actually this directory + t.total += len(listing) - 1 + # fancy way to give info without forcing a refresh + t.set_postfix(dir=path[-10:], refresh=False) + t.update(0) # may trigger a refresh + return listing + + def recursively_search(path): + if os.path.isdir(path): + for f in list_found_dir(path): + recursively_search(os.path.join(path, f)) + else: + append_found_file(path) + + recursively_search(path) + t.set_postfix(dir=path) + t.close() + return files + +Using ``update(0)`` is a handy way to let ``tqdm`` decide when to trigger a +display refresh to avoid console spamming. + +Writing messages +~~~~~~~~~~~~~~~~ + +This is a work in progress (see +`#737 `__). + +Since ``tqdm`` uses a simple printing mechanism to display progress bars, +you should not write any message in the terminal using ``print()`` while +a progressbar is open. + +To write messages in the terminal without any collision with ``tqdm`` bar +display, a ``.write()`` method is provided: + +.. code:: python + + from tqdm.auto import tqdm, trange + from time import sleep + + bar = trange(10) + for i in bar: + # Print using tqdm class method .write() + sleep(0.1) + if not (i % 3): + tqdm.write("Done task %i" % i) + # Can also use bar.write() + +By default, this will print to standard output ``sys.stdout``. but you can +specify any file-like object using the ``file`` argument. For example, this +can be used to redirect the messages writing to a log file or class. + +Redirecting writing +~~~~~~~~~~~~~~~~~~~ + +If using a library that can print messages to the console, editing the library +by replacing ``print()`` with ``tqdm.write()`` may not be desirable. +In that case, redirecting ``sys.stdout`` to ``tqdm.write()`` is an option. + +To redirect ``sys.stdout``, create a file-like class that will write +any input string to ``tqdm.write()``, and supply the arguments +``file=sys.stdout, dynamic_ncols=True``. + +A reusable canonical example is given below: + +.. code:: python + + from time import sleep + import contextlib + import sys + from tqdm import tqdm + from tqdm.contrib import DummyTqdmFile + + + @contextlib.contextmanager + def std_out_err_redirect_tqdm(): + orig_out_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = map(DummyTqdmFile, orig_out_err) + yield orig_out_err[0] + # Relay exceptions + except Exception as exc: + raise exc + # Always restore sys.stdout/err if necessary + finally: + sys.stdout, sys.stderr = orig_out_err + + def some_fun(i): + print("Fee, fi, fo,".split()[i]) + + # Redirect stdout to tqdm.write() (don't forget the `as save_stdout`) + with std_out_err_redirect_tqdm() as orig_stdout: + # tqdm needs the original stdout + # and dynamic_ncols=True to autodetect console width + for i in tqdm(range(3), file=orig_stdout, dynamic_ncols=True): + sleep(.5) + some_fun(i) + + # After the `with`, printing is restored + print("Done!") + +Redirecting ``logging`` +~~~~~~~~~~~~~~~~~~~~~~~ + +Similar to ``sys.stdout``/``sys.stderr`` as detailed above, console ``logging`` +may also be redirected to ``tqdm.write()``. + +Warning: if also redirecting ``sys.stdout``/``sys.stderr``, make sure to +redirect ``logging`` first if needed. + +Helper methods are available in ``tqdm.contrib.logging``. For example: + +.. code:: python + + import logging + from tqdm import trange + from tqdm.contrib.logging import logging_redirect_tqdm + + LOG = logging.getLogger(__name__) + + if __name__ == '__main__': + logging.basicConfig(level=logging.INFO) + with logging_redirect_tqdm(): + for i in trange(9): + if i == 4: + LOG.info("console logging redirected to `tqdm.write()`") + # logging restored + +Monitoring thread, intervals and miniters +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +``tqdm`` implements a few tricks to increase efficiency and reduce overhead. + +- Avoid unnecessary frequent bar refreshing: ``mininterval`` defines how long + to wait between each refresh. ``tqdm`` always gets updated in the background, + but it will display only every ``mininterval``. +- Reduce number of calls to check system clock/time. +- ``mininterval`` is more intuitive to configure than ``miniters``. + A clever adjustment system ``dynamic_miniters`` will automatically adjust + ``miniters`` to the amount of iterations that fit into time ``mininterval``. + Essentially, ``tqdm`` will check if it's time to print without actually + checking time. This behaviour can be still be bypassed by manually setting + ``miniters``. + +However, consider a case with a combination of fast and slow iterations. +After a few fast iterations, ``dynamic_miniters`` will set ``miniters`` to a +large number. When iteration rate subsequently slows, ``miniters`` will +remain large and thus reduce display update frequency. To address this: + +- ``maxinterval`` defines the maximum time between display refreshes. + A concurrent monitoring thread checks for overdue updates and forces one + where necessary. + +The monitoring thread should not have a noticeable overhead, and guarantees +updates at least every 10 seconds by default. +This value can be directly changed by setting the ``monitor_interval`` of +any ``tqdm`` instance (i.e. ``t = tqdm.tqdm(...); t.monitor_interval = 2``). +The monitor thread may be disabled application-wide by setting +``tqdm.tqdm.monitor_interval = 0`` before instantiation of any ``tqdm`` bar. + + +Merch +----- + +You can buy `tqdm branded merch `__ now! + +Contributions +------------- + +|GitHub-Commits| |GitHub-Issues| |GitHub-PRs| |OpenHub-Status| |GitHub-Contributions| |CII Best Practices| + +All source code is hosted on `GitHub `__. +Contributions are welcome. + +See the +`CONTRIBUTING `__ +file for more information. + +Developers who have made significant contributions, ranked by *SLoC* +(surviving lines of code, +`git fame `__ ``-wMC --excl '\.(png|gif|jpg)$'``), +are: + +==================== ======================================================== ==== ================================ +Name ID SLoC Notes +==================== ======================================================== ==== ================================ +Casper da Costa-Luis `casperdcl `__ ~80% primary maintainer |Gift-Casper| +Stephen Larroque `lrq3000 `__ ~9% team member +Martin Zugnoni `martinzugnoni `__ ~3% +Daniel Ecer `de-code `__ ~2% +Richard Sheridan `richardsheridan `__ ~1% +Guangshuo Chen `chengs `__ ~1% +Helio Machado `0x2b3bfa0 `__ ~1% +Kyle Altendorf `altendky `__ <1% +Noam Yorav-Raphael `noamraph `__ <1% original author +Matthew Stevens `mjstevens777 `__ <1% +Hadrien Mary `hadim `__ <1% team member +Mikhail Korobov `kmike `__ <1% team member +==================== ======================================================== ==== ================================ + +Ports to Other Languages +~~~~~~~~~~~~~~~~~~~~~~~~ + +A list is available on +`this wiki page `__. + + +LICENCE +------- + +Open Source (OSI approved): |LICENCE| + +Citation information: |DOI| + +|README-Hits| (Since 19 May 2016) + +.. |Logo| image:: https://tqdm.github.io/img/logo.gif +.. |Screenshot| image:: https://tqdm.github.io/img/tqdm.gif +.. |Video| image:: https://tqdm.github.io/img/video.jpg + :target: https://tqdm.github.io/video +.. |Slides| image:: https://tqdm.github.io/img/slides.jpg + :target: https://tqdm.github.io/PyData2019/slides.html +.. |Merch| image:: https://tqdm.github.io/img/merch.jpg + :target: https://tqdm.github.io/merch +.. |Build-Status| image:: https://img.shields.io/github/actions/workflow/status/tqdm/tqdm/test.yml?branch=master&label=tqdm&logo=GitHub + :target: https://github.com/tqdm/tqdm/actions/workflows/test.yml +.. |Coverage-Status| image:: https://img.shields.io/coveralls/github/tqdm/tqdm/master?logo=coveralls + :target: https://coveralls.io/github/tqdm/tqdm +.. |Branch-Coverage-Status| image:: https://codecov.io/gh/tqdm/tqdm/branch/master/graph/badge.svg + :target: https://codecov.io/gh/tqdm/tqdm +.. |Codacy-Grade| image:: https://app.codacy.com/project/badge/Grade/3f965571598f44549c7818f29cdcf177 + :target: https://www.codacy.com/gh/tqdm/tqdm/dashboard +.. |CII Best Practices| image:: https://bestpractices.coreinfrastructure.org/projects/3264/badge + :target: https://bestpractices.coreinfrastructure.org/projects/3264 +.. |GitHub-Status| image:: https://img.shields.io/github/tag/tqdm/tqdm.svg?maxAge=86400&logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/releases +.. |GitHub-Forks| image:: https://img.shields.io/github/forks/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/network +.. |GitHub-Stars| image:: https://img.shields.io/github/stars/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/stargazers +.. |GitHub-Commits| image:: https://img.shields.io/github/commit-activity/y/tqdm/tqdm.svg?logo=git&logoColor=white + :target: https://github.com/tqdm/tqdm/graphs/commit-activity +.. |GitHub-Issues| image:: https://img.shields.io/github/issues-closed/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/issues?q= +.. |GitHub-PRs| image:: https://img.shields.io/github/issues-pr-closed/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/pulls +.. |GitHub-Contributions| image:: https://img.shields.io/github/contributors/tqdm/tqdm.svg?logo=github&logoColor=white + :target: https://github.com/tqdm/tqdm/graphs/contributors +.. |GitHub-Updated| image:: https://img.shields.io/github/last-commit/tqdm/tqdm/master.svg?logo=github&logoColor=white&label=pushed + :target: https://github.com/tqdm/tqdm/pulse +.. |Gift-Casper| image:: https://img.shields.io/badge/dynamic/json.svg?color=ff69b4&label=gifts%20received&prefix=%C2%A3&query=%24..sum&url=https%3A%2F%2Fcaspersci.uk.to%2Fgifts.json + :target: https://cdcl.ml/sponsor +.. |Versions| image:: https://img.shields.io/pypi/v/tqdm.svg + :target: https://tqdm.github.io/releases +.. |PyPI-Downloads| image:: https://img.shields.io/pypi/dm/tqdm.svg?label=pypi%20downloads&logo=PyPI&logoColor=white + :target: https://pepy.tech/project/tqdm +.. |Py-Versions| image:: https://img.shields.io/pypi/pyversions/tqdm.svg?logo=python&logoColor=white + :target: https://pypi.org/project/tqdm +.. |Conda-Forge-Status| image:: https://img.shields.io/conda/v/conda-forge/tqdm.svg?label=conda-forge&logo=conda-forge + :target: https://anaconda.org/conda-forge/tqdm +.. |Snapcraft| image:: https://img.shields.io/badge/snap-install-82BEA0.svg?logo=snapcraft + :target: https://snapcraft.io/tqdm +.. |Docker| image:: https://img.shields.io/badge/docker-pull-blue.svg?logo=docker&logoColor=white + :target: https://hub.docker.com/r/tqdm/tqdm +.. |Libraries-Rank| image:: https://img.shields.io/librariesio/sourcerank/pypi/tqdm.svg?logo=koding&logoColor=white + :target: https://libraries.io/pypi/tqdm +.. |Libraries-Dependents| image:: https://img.shields.io/librariesio/dependent-repos/pypi/tqdm.svg?logo=koding&logoColor=white + :target: https://github.com/tqdm/tqdm/network/dependents +.. |OpenHub-Status| image:: https://www.openhub.net/p/tqdm/widgets/project_thin_badge?format=gif + :target: https://www.openhub.net/p/tqdm?ref=Thin+badge +.. |awesome-python| image:: https://awesome.re/mentioned-badge.svg + :target: https://github.com/vinta/awesome-python +.. |LICENCE| image:: https://img.shields.io/pypi/l/tqdm.svg + :target: https://raw.githubusercontent.com/tqdm/tqdm/master/LICENCE +.. |DOI| image:: https://img.shields.io/badge/DOI-10.5281/zenodo.595120-blue.svg + :target: https://doi.org/10.5281/zenodo.595120 +.. |binder-demo| image:: https://mybinder.org/badge_logo.svg + :target: https://mybinder.org/v2/gh/tqdm/tqdm/master?filepath=DEMO.ipynb +.. |Screenshot-Jupyter1| image:: https://tqdm.github.io/img/jupyter-1.gif +.. |Screenshot-Jupyter2| image:: https://tqdm.github.io/img/jupyter-2.gif +.. |Screenshot-Jupyter3| image:: https://tqdm.github.io/img/jupyter-3.gif +.. |README-Hits| image:: https://cgi.cdcl.ml/hits?q=tqdm&style=social&r=https://github.com/tqdm/tqdm&l=https://tqdm.github.io/img/favicon.png&f=https://tqdm.github.io/img/logo.gif + :target: https://cgi.cdcl.ml/hits?q=tqdm&a=plot&r=https://github.com/tqdm/tqdm&l=https://tqdm.github.io/img/favicon.png&f=https://tqdm.github.io/img/logo.gif&style=social diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..74e784dd1d287a072a4631113be4c72461cbea74 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/RECORD @@ -0,0 +1,74 @@ +../../../bin/tqdm,sha256=7Orfhg-RUY2zVs0iiqlaP1W3qlQu2Btx1ThreG0SPus,269 +tqdm-4.67.1.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +tqdm-4.67.1.dist-info/LICENCE,sha256=3DMlLoKQFeOxUAhvubOkD2rW-zLC9GEM6BL6Z301mGo,1985 +tqdm-4.67.1.dist-info/METADATA,sha256=aIoWMt9SWhmP7FLc_vsSRtMerO6cA1qsrC1-r42P9mk,57675 +tqdm-4.67.1.dist-info/RECORD,, +tqdm-4.67.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91 +tqdm-4.67.1.dist-info/entry_points.txt,sha256=ReJCH7Ui3Zyh6M16E4OhsZ1oU7WtMXCfbtoyBhGO29Y,39 +tqdm-4.67.1.dist-info/top_level.txt,sha256=NLiUJNfmc9At15s7JURiwvqMEjUi9G5PMGRrmMYzNSM,5 +tqdm/__init__.py,sha256=9mQNYSSqP99JasubEC1POJLMmhkkBH6cJZxPIR5G2pQ,1572 +tqdm/__main__.py,sha256=bYt9eEaoRQWdejEHFD8REx9jxVEdZptECFsV7F49Ink,30 +tqdm/__pycache__/__init__.cpython-312.pyc,, +tqdm/__pycache__/__main__.cpython-312.pyc,, +tqdm/__pycache__/_dist_ver.cpython-312.pyc,, +tqdm/__pycache__/_main.cpython-312.pyc,, +tqdm/__pycache__/_monitor.cpython-312.pyc,, +tqdm/__pycache__/_tqdm.cpython-312.pyc,, +tqdm/__pycache__/_tqdm_gui.cpython-312.pyc,, +tqdm/__pycache__/_tqdm_notebook.cpython-312.pyc,, +tqdm/__pycache__/_tqdm_pandas.cpython-312.pyc,, +tqdm/__pycache__/_utils.cpython-312.pyc,, +tqdm/__pycache__/asyncio.cpython-312.pyc,, +tqdm/__pycache__/auto.cpython-312.pyc,, +tqdm/__pycache__/autonotebook.cpython-312.pyc,, +tqdm/__pycache__/cli.cpython-312.pyc,, +tqdm/__pycache__/dask.cpython-312.pyc,, +tqdm/__pycache__/gui.cpython-312.pyc,, +tqdm/__pycache__/keras.cpython-312.pyc,, +tqdm/__pycache__/notebook.cpython-312.pyc,, +tqdm/__pycache__/rich.cpython-312.pyc,, +tqdm/__pycache__/std.cpython-312.pyc,, +tqdm/__pycache__/tk.cpython-312.pyc,, +tqdm/__pycache__/utils.cpython-312.pyc,, +tqdm/__pycache__/version.cpython-312.pyc,, +tqdm/_dist_ver.py,sha256=m5AdYI-jB-v6P0VJ_70isH_p24EzSOGSwVvuAZmkmKY,23 +tqdm/_main.py,sha256=9ySvgmi_2Sw4CAo5UDW0Q2dxfTryboEWGHohfCJz0sA,283 +tqdm/_monitor.py,sha256=Uku-DPWgzJ7dO5CK08xKJK-E_F6qQ-JB3ksuXczSYR0,3699 +tqdm/_tqdm.py,sha256=LfLCuJ6bpsVo9xilmtBXyEm1vGnUCFrliW85j3J-nD4,283 +tqdm/_tqdm_gui.py,sha256=03Hc8KayxJveieI5-0-2NGiDpLvw9jZekofJUV7CCwk,287 +tqdm/_tqdm_notebook.py,sha256=BuHiLuxu6uEfZFaPJW3RPpPaxaVctEQA3kdSJSDL1hw,307 +tqdm/_tqdm_pandas.py,sha256=c9jptUgigN6axRDhRd4Rif98Tmxeopc1nFNFhIpbFUE,888 +tqdm/_utils.py,sha256=_4E73bfDj4f1s3sM42NLHNrZDOkijZoWq-n6xWLkdZ8,553 +tqdm/asyncio.py,sha256=Kp2rSkNRf9KRqa3d9YpgeZQ7L7EZf2Ki4bSc7UPIyoo,2757 +tqdm/auto.py,sha256=nDZflj6p2zKkjBCNBourrhS81zYfZy1_dQvbckrdW8o,871 +tqdm/autonotebook.py,sha256=Yb9F5uaiBPhfbDDFpbtoG8I2YUw3uQJ89rUDLbfR6ws,956 +tqdm/cli.py,sha256=SbKlN8QyZ2ogenqt-wT_p6_sx2OOdCjCyhoZBFnlmyI,11010 +tqdm/completion.sh,sha256=j79KbSmpIj_E11jfTfBXrGnUTzKXVpQ1vGVQvsyDRl4,946 +tqdm/contrib/__init__.py,sha256=OgSwVXm-vlDJ-2imtoQ9z8qdom4snMSRztH72KMA82A,2494 +tqdm/contrib/__pycache__/__init__.cpython-312.pyc,, +tqdm/contrib/__pycache__/bells.cpython-312.pyc,, +tqdm/contrib/__pycache__/concurrent.cpython-312.pyc,, +tqdm/contrib/__pycache__/discord.cpython-312.pyc,, +tqdm/contrib/__pycache__/itertools.cpython-312.pyc,, +tqdm/contrib/__pycache__/logging.cpython-312.pyc,, +tqdm/contrib/__pycache__/slack.cpython-312.pyc,, +tqdm/contrib/__pycache__/telegram.cpython-312.pyc,, +tqdm/contrib/__pycache__/utils_worker.cpython-312.pyc,, +tqdm/contrib/bells.py,sha256=Yx1HqGCmHrESCAO700j5wE__JCleNODJxedh1ijPLD0,837 +tqdm/contrib/concurrent.py,sha256=K1yjloKS5WRNFyjLRth0DmU5PAnDbF0A-GD27N-J4a8,3986 +tqdm/contrib/discord.py,sha256=MtVIL1s_dxH21G4sL8FBgQ4Wei23ho9Ek5T-AommvNc,5243 +tqdm/contrib/itertools.py,sha256=WdKKQU5eSzsqHu29SN_oH12huYZo0Jihqoi9-nVhwz4,774 +tqdm/contrib/logging.py,sha256=NsYtnKttj2mMrGm58mEdo5a9DP_2vv8pZyrimSuWulA,3760 +tqdm/contrib/slack.py,sha256=eP_Mr5sQonYniHxxQNGue3jk2JkIPmPWFZqIYxnOui0,4007 +tqdm/contrib/telegram.py,sha256=vn_9SATMbbwn2PAbzSDyOX6av3eBB01QBug11P4H-Og,5008 +tqdm/contrib/utils_worker.py,sha256=HJP5Mz1S1xyzEke2JaqJ2sYLHXADYoo2epT5AzQ38eA,1207 +tqdm/dask.py,sha256=9Ei58eVqTossRLhAfWyUFCduXYKjmLmwkaXIy-CHYfs,1319 +tqdm/gui.py,sha256=STIB3K8iDzDgkNUqWIpvcI_u0OGtbGNy5NwpALXhfWs,5479 +tqdm/keras.py,sha256=op9sBkb6q6c6dw2wJ0SD2ZwpPK7yM1Vbg4l1Qiy3MIo,4373 +tqdm/notebook.py,sha256=GtZ3IapLL1v8WNDaTSvPw0bJGTyfp71Vfz5HDnAzx1M,10895 +tqdm/rich.py,sha256=YyMPkEHVyYUVUR3adJKbVX26iTmNKpNMf3DEqmm-m60,5021 +tqdm/std.py,sha256=tWjz6-QCa92aqYjz7PIdkLUCAfiy-lJZheBtZyIIyO0,57461 +tqdm/tk.py,sha256=Gu0uwXwLCGPRGHORdi3WvBLGiseUp_xxX_h_gp9VpK0,6701 +tqdm/tqdm.1,sha256=aILyUPk2S4OPe_uWy2P4AMjUf0oQ6PUW0nLYXB-BWwI,7889 +tqdm/utils.py,sha256=6E0BQw3Sg7uGWKBM_cDn3P42tXswRhzkggbhBgLDjl8,11821 +tqdm/version.py,sha256=-1yWjfu3P0eghVsysHH07fbzdiADNRdzRtYPqOaqR2A,333 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..ae527e7d64811439e61b93aa375defb30e06edfe --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/entry_points.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/entry_points.txt new file mode 100644 index 0000000000000000000000000000000000000000..540e60f4e073bc53a5f0a521a3639e0d80780af4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/entry_points.txt @@ -0,0 +1,2 @@ +[console_scripts] +tqdm = tqdm.cli:main diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/top_level.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..78620c472c9d799a14ccb02a0233f4669b3bcdcb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/tqdm-4.67.1.dist-info/top_level.txt @@ -0,0 +1 @@ +tqdm diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/INSTALLER b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/INSTALLER new file mode 100644 index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/INSTALLER @@ -0,0 +1 @@ +pip diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/LICENSE b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/LICENSE new file mode 100644 index 0000000000000000000000000000000000000000..c77c54e2000bce9d89c581402ef4ec0074aabd6a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2017-2024 Tsuyoshi Hombashi + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/METADATA b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/METADATA new file mode 100644 index 0000000000000000000000000000000000000000..62d9c58a4a320f70c609947f35badabd81e7d664 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/METADATA @@ -0,0 +1,239 @@ +Metadata-Version: 2.1 +Name: typepy +Version: 1.3.4 +Summary: typepy is a Python library for variable type checker/validator/converter at a run time. +Home-page: https://github.com/thombashi/typepy +Author: Tsuyoshi Hombashi +Author-email: tsuyoshi.hombashi@gmail.com +License: MIT License +Project-URL: Changelog, https://github.com/thombashi/typepy/releases +Project-URL: Documentation, https://typepy.rtfd.io/ +Project-URL: Source, https://github.com/thombashi/typepy +Project-URL: Tracker, https://github.com/thombashi/typepy/issues +Keywords: library,type-checking,type-conversion,validator +Classifier: Development Status :: 5 - Production/Stable +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Information Technology +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent +Classifier: Programming Language :: Python :: 3 +Classifier: Programming Language :: Python :: 3.9 +Classifier: Programming Language :: Python :: 3.10 +Classifier: Programming Language :: Python :: 3.11 +Classifier: Programming Language :: Python :: 3.12 +Classifier: Programming Language :: Python :: 3.13 +Classifier: Programming Language :: Python :: 3 :: Only +Classifier: Programming Language :: Python :: Implementation :: CPython +Classifier: Programming Language :: Python :: Implementation :: PyPy +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Software Development :: Libraries :: Python Modules +Requires-Python: >=3.9 +Description-Content-Type: text/x-rst +License-File: LICENSE +Requires-Dist: mbstrdecoder<2,>=1.0.0 +Provides-Extra: datetime +Requires-Dist: python-dateutil<3.0.0,>=2.8.0; extra == "datetime" +Requires-Dist: pytz>=2018.9; extra == "datetime" +Requires-Dist: packaging; extra == "datetime" +Provides-Extra: test +Requires-Dist: pytest>=6.0.1; extra == "test" +Requires-Dist: tcolorpy; extra == "test" +Requires-Dist: python-dateutil<3.0.0,>=2.8.0; extra == "test" +Requires-Dist: pytz>=2018.9; extra == "test" +Requires-Dist: packaging; extra == "test" + +.. contents:: **typepy** + :backlinks: top + :depth: 2 + +Summary +========= +`typepy `__ is a Python library for variable type checker/validator/converter at a run time. + +.. image:: https://badge.fury.io/py/typepy.svg + :target: https://badge.fury.io/py/typepy + :alt: PyPI package version + +.. image:: https://anaconda.org/conda-forge/typepy/badges/version.svg + :target: https://anaconda.org/conda-forge/typepy + :alt: conda-forge package version + +.. image:: https://img.shields.io/pypi/pyversions/typepy.svg + :target: https://pypi.org/project/typepy + :alt: Supported Python versions + +.. image:: https://img.shields.io/pypi/implementation/typepy.svg + :target: https://pypi.org/project/typepy + :alt: Supported Python implementations + +.. image:: https://github.com/thombashi/typepy/workflows/Tests/badge.svg + :target: https://github.com/thombashi/typepy/actions?query=workflow%3ATests + :alt: Linux/macOS/Windows CI status + +.. image:: https://coveralls.io/repos/github/thombashi/typepy/badge.svg?branch=master + :target: https://coveralls.io/github/thombashi/typepy?branch=master + :alt: Test coverage + +.. image:: https://github.com/thombashi/typepy/actions/workflows/github-code-scanning/codeql/badge.svg + :target: https://github.com/thombashi/typepy/actions/workflows/github-code-scanning/codeql + :alt: CodeQL + +Features +========== +- checking a value type +- validate a value for a type +- convert a value from one type to the other type + +The correspondence between Python types and ``typepy`` classes are as follows: + +.. table:: Supported Types + + ================================================ ======================================================================================================= + Python Type typepy: Type Class + ================================================ ======================================================================================================= + ``bool`` `Bool `__ + ``datetime`` `DateTime `__ + ``dict`` `Dictionary `__ + ``float``/``decimal.Decimal`` (not infinity/NaN) `RealNumber `__ + ``float``/``decimal.Decimal`` (infinity) `Infinity `__ + ``float``/``decimal.Decimal`` (NaN) `Nan `__ + ``int`` `Integer `__ + ``list`` `List `__ + ``None`` `None `__ + ``str`` (not null) `String `__ + ``str`` (null) `NullString `__ + ``str`` (IP address) `IpAddress `__ + ================================================ ======================================================================================================= + +Installation +============ + +Installation: pip +------------------------------ +:: + + pip install typepy + +Install additional dependency packages with the following command if using ``typepy.DateTime`` class + +:: + + pip install typepy[datetime] + +Installation: conda +------------------------------ +:: + + conda install -c conda-forge typepy + +Installation: apt +------------------------------ +:: + + sudo add-apt-repository ppa:thombashi/ppa + sudo apt update + sudo apt install python3-typepy + + +Dependencies +============ +- Python 3.9+ +- `Python package dependencies (automatically installed) `__ + +Optional dependencies +---------------------------------- +These packages can be installed via ``pip install typepy[datetime]``: + +- `python-dateutil `__ +- `pytz `__ + +Usage +======= +Type Check Method +---------------------- +:Examples: + .. code-block:: pycon + + >>> from typepy import Integer + >>> Integer(1).is_type() + True + >>> Integer(1.1).is_type() + False + + +Type Validation Method +-------------------------------------------- +:Examples: + .. code-block:: pycon + + >>> from typepy import Integer + >>> Integer(1).validate() + >>> try: + ... Integer(1.1).validate() + ... except TypeError as e: + ... # validate() raised TypeError when the value unmatched the type class + ... print(e) + ... + invalid value type: expected=INTEGER, actual= + + +Type Conversion Methods +-------------------------------------------- + +convert method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +:Examples: + .. code-block:: pycon + + >>> from typepy import Integer, TypeConversionError + >>> Integer("1").convert() + 1 + >>> try: + ... Integer(1.1).convert() + ... except TypeConversionError as e: + ... # convert() raised TypeConversionError when conversion failed + ... print(e) + ... + failed to convert from float to INTEGER + +try_convert method +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +:Examples: + .. code-block:: pycon + + >>> from typepy import Integer + >>> Integer("1").try_convert() + 1 + >>> print(Integer(1.1).try_convert()) # try_convert() returned None when conversion failed + None + +force_convert +~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +:Examples: + .. code-block:: pycon + + >>> from typepy import Integer, TypeConversionError + >>> Integer("1").force_convert() # force_convert() forcibly convert the value + 1 + >>> Integer(1.1).force_convert() + 1 + >>> try: + ... Integer("abc").force_convert() + ... except TypeConversionError as e: + ... # force_convert() raised TypeConversionError when the value was not convertible + ... print(e) + ... + failed to force_convert to int: type= + + +For more information +-------------------------------------------- +Type check/validate/convert results differed according to +``strict_level`` value which can pass to typepy class constructors as an argument. +More information can be found in the +`API reference `__. + +Documentation +=============== +https://typepy.rtfd.io/ + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/RECORD b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/RECORD new file mode 100644 index 0000000000000000000000000000000000000000..266bb8deb40277f6b0902ab9c313f405038f99a2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/RECORD @@ -0,0 +1,107 @@ +typepy-1.3.4.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4 +typepy-1.3.4.dist-info/LICENSE,sha256=RDTzU6Et0CNqmeXT_Qf-OzpFP5Fk_hDFehHWyoa1DAg,1079 +typepy-1.3.4.dist-info/METADATA,sha256=ApIlMmvXHhWBaKZNu8X8rYZR1r16cqw534mdX1CAQIE,9247 +typepy-1.3.4.dist-info/RECORD,, +typepy-1.3.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91 +typepy-1.3.4.dist-info/top_level.txt,sha256=JS3pVzz8HrmCDbSyNrOs7vCirWUXl5es6HfIxEtbP2M,7 +typepy/__init__.py,sha256=5zLVjvpplr5TiN4lfvTsIQ96AgqqhFuYq7lvIXtHpYs,1223 +typepy/__pycache__/__init__.cpython-312.pyc,, +typepy/__pycache__/__version__.cpython-312.pyc,, +typepy/__pycache__/_common.cpython-312.pyc,, +typepy/__pycache__/_const.cpython-312.pyc,, +typepy/__pycache__/_function.cpython-312.pyc,, +typepy/__pycache__/_typecode.cpython-312.pyc,, +typepy/__pycache__/error.cpython-312.pyc,, +typepy/__version__.py,sha256=B9mmM6D8T-2-FpZ28w-0ka1e3vx1vV0GEF9GHZ4iMfI,206 +typepy/_common.py,sha256=NV8Cr2hVr4zs7kkU2pZjPOXON8YQgn8JJptYh9j-JR0,401 +typepy/_const.py,sha256=CPuhx_t7xV5QdCJ6UKGvglDmZBoJIEIt5kSFV7pxGUo,312 +typepy/_function.py,sha256=tqaCHUHBpeOUMfOADekhUCvPwP8NUT9ZAL-9wh9Hw58,1090 +typepy/_typecode.py,sha256=Oi_zWK5ULiyCjmjb1Urtb3bm-J2LYqyX2TqiCmizuPs,397 +typepy/checker/__init__.py,sha256=Aj1kUaY7OQZd61SW_sktpim10dFkkPD5v8CkQ4mqEe4,1000 +typepy/checker/__pycache__/__init__.cpython-312.pyc,, +typepy/checker/__pycache__/_bool.cpython-312.pyc,, +typepy/checker/__pycache__/_bytes.cpython-312.pyc,, +typepy/checker/__pycache__/_checker.cpython-312.pyc,, +typepy/checker/__pycache__/_common.cpython-312.pyc,, +typepy/checker/__pycache__/_datetime.cpython-312.pyc,, +typepy/checker/__pycache__/_dictionary.cpython-312.pyc,, +typepy/checker/__pycache__/_infinity.cpython-312.pyc,, +typepy/checker/__pycache__/_integer.cpython-312.pyc,, +typepy/checker/__pycache__/_interface.cpython-312.pyc,, +typepy/checker/__pycache__/_ipaddress.cpython-312.pyc,, +typepy/checker/__pycache__/_list.cpython-312.pyc,, +typepy/checker/__pycache__/_nan.cpython-312.pyc,, +typepy/checker/__pycache__/_none.cpython-312.pyc,, +typepy/checker/__pycache__/_realnumber.cpython-312.pyc,, +typepy/checker/__pycache__/_string.cpython-312.pyc,, +typepy/checker/_bool.py,sha256=O6EITPb7OsdZOApFcdRMJADFV0N59D3ulsJkAPFzW9o,1198 +typepy/checker/_bytes.py,sha256=mYmu2ksd0h9qoBsO3SaVm9ple8edQOpV6s3RErjHscA,646 +typepy/checker/_checker.py,sha256=TxeIAXVIBXukmd1881PeXGNW7ZNapsPA9vS53wwVKck,2485 +typepy/checker/_common.py,sha256=PpinqXPbViDkTZ-HSXqemU01EeeSvaoUDuCwndPGbRc,539 +typepy/checker/_datetime.py,sha256=TffEQDoClczSF6P4o29yMbzLl-yzIAIccWXplBybFtY,1159 +typepy/checker/_dictionary.py,sha256=qXhH9plLosj967PEGCTgZvRbHZoreomnPQV6SCu1GC8,885 +typepy/checker/_infinity.py,sha256=CRgTptMZLRHzPjp93ZtgHpYM3WpV-D4p7aLt1ZyttNM,916 +typepy/checker/_integer.py,sha256=eBzHsd4xWprtzaHJglRvET_Dhi0KqrstlWyT5eCKJ88,2002 +typepy/checker/_interface.py,sha256=IiMShH1pAWhl_6JlUxtb10ofao2sUspUlGxDC1nnZps,315 +typepy/checker/_ipaddress.py,sha256=hqPXXD50x-ndLKj-DPjfLCZscN1CXawMTvOodSc1bsc,1070 +typepy/checker/_list.py,sha256=iGkycr08dVONSkerYl51WfYyidlzI3JfN5LJ_zwKU5U,985 +typepy/checker/_nan.py,sha256=SBTyHoTKtQO-wIMZ5lW-y88CGk5MLIoteig29lIQQtc,826 +typepy/checker/_none.py,sha256=xM_PEJQx1WpPUYAN6Bwgl-_IqAmhrV1oie8XIbm_C5Y,617 +typepy/checker/_realnumber.py,sha256=jtA-rv19NUBRjwAS7owNbXehJLB10dS2RCt8sLbIv5Y,1905 +typepy/checker/_string.py,sha256=Wyte6y2c2RYDCvi9BF5exzFXGscgtBrDPY3K5rhrzYs,2055 +typepy/converter/__init__.py,sha256=aN7I5tHOx93voqOeakOoad6JF07mdAKnEk0kknduo3Q,824 +typepy/converter/__pycache__/__init__.cpython-312.pyc,, +typepy/converter/__pycache__/_bool.cpython-312.pyc,, +typepy/converter/__pycache__/_bytes.cpython-312.pyc,, +typepy/converter/__pycache__/_datetime.cpython-312.pyc,, +typepy/converter/__pycache__/_dictionary.cpython-312.pyc,, +typepy/converter/__pycache__/_integer.cpython-312.pyc,, +typepy/converter/__pycache__/_interface.cpython-312.pyc,, +typepy/converter/__pycache__/_ipaddress.cpython-312.pyc,, +typepy/converter/__pycache__/_list.cpython-312.pyc,, +typepy/converter/__pycache__/_nop.cpython-312.pyc,, +typepy/converter/__pycache__/_realnumber.cpython-312.pyc,, +typepy/converter/__pycache__/_string.cpython-312.pyc,, +typepy/converter/_bool.py,sha256=ROvCowqO6nBk_Ywxcc6SUIvDVcO9acWUfsZ1fCo4Dig,1306 +typepy/converter/_bytes.py,sha256=L8e4DJ3qVqLkc2g9zlD2EMyJHYwBL4aB2EyH9w50-B4,291 +typepy/converter/_datetime.py,sha256=AaSsLhJtaNXB6TzaL98m2dpu61ZQBhbJ5MnF_sTiw5U,5382 +typepy/converter/_dictionary.py,sha256=v2ZCaSNq2U-QOgF86BTGfr9fcD0DM8UD2Oe0yFRGVAA,655 +typepy/converter/_integer.py,sha256=9ivAktrr4UQ2Yj4GVZ8Bij_Q11lqGpCxSiTU0VWtGQs,1015 +typepy/converter/_interface.py,sha256=TcqsYIsnbM3LX20k0vx7eCZnxk_Wyo6fnbBvlw4C5RY,661 +typepy/converter/_ipaddress.py,sha256=KrGcw-kn8oD0fnc3R6yaqrbgkXJWdr8XDAqFt6HJoog,843 +typepy/converter/_list.py,sha256=35ERzQ7mQXO0g5ax2Rvk93nC5FYVuAzNEQtAcZKp92E,426 +typepy/converter/_nop.py,sha256=DOkVEKioITGa_pPpgj14VClVj0ELLOjX0sgLN4nl-WI,222 +typepy/converter/_realnumber.py,sha256=7oPwNB8zWtDR6rB1JsRE4JD0CfXFGGGp-LCOmKlDPiw,1265 +typepy/converter/_string.py,sha256=Hj1G3tq0n6Jrbt3RCFAihbMlASm-QmQKgaWcsMxIbYw,498 +typepy/error.py,sha256=9tKHKExk8rOLQGtLfQQewBBaqnS39egkJYpiC7G1pWo,178 +typepy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0 +typepy/type/__init__.py,sha256=BVyVbzsx2snaim_h-ibWpXELDkRxTqjLCE-xK5FliAg,743 +typepy/type/__pycache__/__init__.cpython-312.pyc,, +typepy/type/__pycache__/_base.cpython-312.pyc,, +typepy/type/__pycache__/_binary.cpython-312.pyc,, +typepy/type/__pycache__/_bool.cpython-312.pyc,, +typepy/type/__pycache__/_bytes.cpython-312.pyc,, +typepy/type/__pycache__/_datetime.cpython-312.pyc,, +typepy/type/__pycache__/_dictionary.cpython-312.pyc,, +typepy/type/__pycache__/_infinity.cpython-312.pyc,, +typepy/type/__pycache__/_integer.cpython-312.pyc,, +typepy/type/__pycache__/_ipaddress.cpython-312.pyc,, +typepy/type/__pycache__/_list.cpython-312.pyc,, +typepy/type/__pycache__/_nan.cpython-312.pyc,, +typepy/type/__pycache__/_none.cpython-312.pyc,, +typepy/type/__pycache__/_realnumber.cpython-312.pyc,, +typepy/type/__pycache__/_string.cpython-312.pyc,, +typepy/type/_base.py,sha256=lhZlxkgDqMYlpoM3VyYhxe3isaA46Jn5O5_YblDE8xg,3564 +typepy/type/_binary.py,sha256=1LV28p-B7q7KTqIWPd2ONPrQH8AmkGOV8imH_a95V_M,794 +typepy/type/_bool.py,sha256=QHYMUKTDTKEeXFilPqoVGFi9cz69qfqrSSPJhRQMGbM,844 +typepy/type/_bytes.py,sha256=i58k-iFWQUMTbp6fJ998KWKoEazvSWwly6LKmB2lD08,792 +typepy/type/_datetime.py,sha256=0hiq7E2DkfOzgQZY7TQ6y8B7edFCqQ7CUMZAOTkmXSI,835 +typepy/type/_dictionary.py,sha256=0UnNrzvsisNQxD6eCcCpNtwRLdXfxSDUgcA9M9geeJ0,849 +typepy/type/_infinity.py,sha256=8pnVfBwlKIEo35tbvSqiCOYsnDpZ1TA_F0tn3oDFAhQ,829 +typepy/type/_integer.py,sha256=HBgXgCfT7TtbHNigVK__-Lbaocb7CY29n6WzkWLbYjY,864 +typepy/type/_ipaddress.py,sha256=MroUImyV-POnQTvmpMpOGH7-PAwAFQz-HU5rjTV9A0c,843 +typepy/type/_list.py,sha256=_KBDpeKDdIEb3dLMJv8vkHjAY9awiKz7RoMQdnCMF6o,807 +typepy/type/_nan.py,sha256=9jbhUqOLXstqr8-FuBKikImEOk5C5RO0WtqpRbj27zU,804 +typepy/type/_none.py,sha256=VGIXkKgdONkcu2CXGPfgusoo-VL-xkEV3rKldPDAbac,859 +typepy/type/_realnumber.py,sha256=_dNKpzDr8FaImBbx84Yr2fsJNBicRE_1QdRu8NrzDq4,913 +typepy/type/_string.py,sha256=Dd0mOTQ_uxXQND-l-AFkzwNFK8EwGg_C-EyQw4TxOXg,1505 diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/WHEEL b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/WHEEL new file mode 100644 index 0000000000000000000000000000000000000000..ae527e7d64811439e61b93aa375defb30e06edfe --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/WHEEL @@ -0,0 +1,5 @@ +Wheel-Version: 1.0 +Generator: setuptools (75.6.0) +Root-Is-Purelib: true +Tag: py3-none-any + diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/top_level.txt b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/top_level.txt new file mode 100644 index 0000000000000000000000000000000000000000..8245b7ba766af5b5d3536966c639c373c6fc5f57 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/typepy-1.3.4.dist-info/top_level.txt @@ -0,0 +1 @@ +typepy