BryanW commited on Mar 23

Commit

9f88436

verified ·

1 Parent(s): 04b5c8e

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/INSTALLER +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/LICENSE +201 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/METADATA +123 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/RECORD +10 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/WHEEL +6 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/top_level.txt +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.py +69 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.pyi +263 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/converters.py +3 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/exceptions.py +3 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/filters.py +3 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/py.typed +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/setters.py +3 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/validators.py +3 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/__init__.py +79 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_writer.py +746 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/combine.py +215 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/data_files.py +825 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/fingerprint.py +494 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/hub.py +230 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/info.py +593 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/metric.py +652 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/search.py +785 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/splits.py +635 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/table.py +2422 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.py +98 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.pyi +47 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/_frozenlist.pyx +123 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/py.typed +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_models.py +516 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_ssl.py +9 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_utils.py +37 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/py.typed +0 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/INSTALLER +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/METADATA +84 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/RECORD +57 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/WHEEL +4 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/entry_points.txt +3 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/INSTALLER +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/METADATA +103 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/RECORD +204 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/WHEEL +6 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/top_level.txt +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER +1 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/License.txt +1568 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/METADATA +44 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/RECORD +32 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL +5 -0
Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt +1 -0

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright 2013-2019 Nikolay Kim and Andrew Svetlov
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,123 @@

+Metadata-Version: 2.1
+Name: aiosignal
+Version: 1.3.2
+Summary: aiosignal: a list of registered asynchronous callbacks
+Home-page: https://github.com/aio-libs/aiosignal
+Maintainer: aiohttp team <team@aiohttp.org>
+Maintainer-email: team@aiohttp.org
+License: Apache 2.0
+Project-URL: Chat: Gitter, https://gitter.im/aio-libs/Lobby
+Project-URL: CI: GitHub Actions, https://github.com/aio-libs/aiosignal/actions
+Project-URL: Coverage: codecov, https://codecov.io/github/aio-libs/aiosignal
+Project-URL: Docs: RTD, https://docs.aiosignal.org
+Project-URL: GitHub: issues, https://github.com/aio-libs/aiosignal/issues
+Project-URL: GitHub: repo, https://github.com/aio-libs/aiosignal
+Classifier: License :: OSI Approved :: Apache Software License
+Classifier: Intended Audience :: Developers
+Classifier: Programming Language :: Python
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Operating System :: POSIX
+Classifier: Operating System :: MacOS :: MacOS X
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Framework :: AsyncIO
+Requires-Python: >=3.9
+Description-Content-Type: text/x-rst
+License-File: LICENSE
+Requires-Dist: frozenlist>=1.1.0
+=========
+aiosignal
+=========
+.. image:: https://github.com/aio-libs/aiosignal/workflows/CI/badge.svg
+   :target: https://github.com/aio-libs/aiosignal/actions?query=workflow%3ACI
+   :alt: GitHub status for master branch
+.. image:: https://codecov.io/gh/aio-libs/aiosignal/branch/master/graph/badge.svg
+   :target: https://codecov.io/gh/aio-libs/aiosignal
+   :alt: codecov.io status for master branch
+.. image:: https://badge.fury.io/py/aiosignal.svg
+   :target: https://pypi.org/project/aiosignal
+   :alt: Latest PyPI package version
+.. image:: https://readthedocs.org/projects/aiosignal/badge/?version=latest
+   :target: https://aiosignal.readthedocs.io/
+   :alt: Latest Read The Docs
+.. image:: https://img.shields.io/discourse/topics?server=https%3A%2F%2Faio-libs.discourse.group%2F
+   :target: https://aio-libs.discourse.group/
+   :alt: Discourse group for io-libs
+.. image:: https://badges.gitter.im/Join%20Chat.svg
+   :target: https://gitter.im/aio-libs/Lobby
+   :alt: Chat on Gitter
+Introduction
+============
+A project to manage callbacks in `asyncio` projects.
+``Signal`` is a list of registered asynchronous callbacks.
+The signal's life-cycle has two stages: after creation its content
+could be filled by using standard list operations: ``sig.append()``
+etc.
+After you call ``sig.freeze()`` the signal is *frozen*: adding, removing
+and dropping callbacks is forbidden.
+The only available operation is calling the previously registered
+callbacks by using ``await sig.send(data)``.
+For concrete usage examples see the `Signals
+<https://docs.aiohttp.org/en/stable/web_advanced.html#aiohttp-web-signals>
+section of the `Web Server Advanced
+<https://docs.aiohttp.org/en/stable/web_advanced.html>` chapter of the `aiohttp
+documentation`_.
+Installation
+------------
+::
+   $ pip install aiosignal
+The library requires Python 3.8 or newer.
+Documentation
+=============
+https://aiosignal.readthedocs.io/
+Communication channels
+======================
+*gitter chat* https://gitter.im/aio-libs/Lobby
+Requirements
+============
+- Python >= 3.8
+- frozenlist >= 1.0.0
+License
+=======
+``aiosignal`` is offered under the Apache 2 license.
+Source code
+===========
+The project is hosted on GitHub_
+Please file an issue in the `bug tracker
+<https://github.com/aio-libs/aiosignal/issues>`_ if you have found a bug
+or have some suggestions to improve the library.
+.. _GitHub: https://github.com/aio-libs/aiosignal
+.. _aiohttp documentation: https://docs.aiohttp.org/

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,10 @@

+aiosignal-1.3.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+aiosignal-1.3.2.dist-info/LICENSE,sha256=b9UkPpLdf5jsacesN3co50kFcJ_1J6W_mNbQJjwE9bY,11332
+aiosignal-1.3.2.dist-info/METADATA,sha256=TeI_xgZ191qgx37rviEnpMWC0QnYsg_j9EGVivNqqjc,3753
+aiosignal-1.3.2.dist-info/RECORD,,
+aiosignal-1.3.2.dist-info/WHEEL,sha256=pxeNX5JdtCe58PUSYP9upmc7jdRPgvT0Gm9kb1SHlVw,109
+aiosignal-1.3.2.dist-info/top_level.txt,sha256=z45aNOKGDdrI1roqZY3BGXQ22kJFPHBmVdwtLYLtXC0,10
+aiosignal/__init__.py,sha256=1oIrRl6kNpqFh32e7HfMFbMV_35v8sqJJFfnuKgmtEU,867
+aiosignal/__init__.pyi,sha256=xeCddYSS8fZAkz8S4HuKSR2IDe3N7RW_LKcXDPPA1Xk,311
+aiosignal/__pycache__/__init__.cpython-312.pyc,,
+aiosignal/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,6 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.6.0)
+Root-Is-Purelib: true
+Tag: py2-none-any
+Tag: py3-none-any

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/aiosignal-1.3.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ aiosignal

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.py ADDED Viewed

	@@ -0,0 +1,69 @@

+# SPDX-License-Identifier: MIT
+from attr import (
+    NOTHING,
+    Attribute,
+    AttrsInstance,
+    Converter,
+    Factory,
+    NothingType,
+    _make_getattr,
+    assoc,
+    cmp_using,
+    define,
+    evolve,
+    field,
+    fields,
+    fields_dict,
+    frozen,
+    has,
+    make_class,
+    mutable,
+    resolve_types,
+    validate,
+)
+from attr._next_gen import asdict, astuple
+from . import converters, exceptions, filters, setters, validators
+__all__ = [
+    "NOTHING",
+    "Attribute",
+    "AttrsInstance",
+    "Converter",
+    "Factory",
+    "NothingType",
+    "__author__",
+    "__copyright__",
+    "__description__",
+    "__doc__",
+    "__email__",
+    "__license__",
+    "__title__",
+    "__url__",
+    "__version__",
+    "__version_info__",
+    "asdict",
+    "assoc",
+    "astuple",
+    "cmp_using",
+    "converters",
+    "define",
+    "evolve",
+    "exceptions",
+    "field",
+    "fields",
+    "fields_dict",
+    "filters",
+    "frozen",
+    "has",
+    "make_class",
+    "mutable",
+    "resolve_types",
+    "setters",
+    "validate",
+    "validators",
+]
+__getattr__ = _make_getattr(__name__)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/__init__.pyi ADDED Viewed

	@@ -0,0 +1,263 @@

+import sys
+from typing import (
+    Any,
+    Callable,
+    Mapping,
+    Sequence,
+    overload,
+    TypeVar,
+)
+# Because we need to type our own stuff, we have to make everything from
+# attr explicitly public too.
+from attr import __author__ as __author__
+from attr import __copyright__ as __copyright__
+from attr import __description__ as __description__
+from attr import __email__ as __email__
+from attr import __license__ as __license__
+from attr import __title__ as __title__
+from attr import __url__ as __url__
+from attr import __version__ as __version__
+from attr import __version_info__ as __version_info__
+from attr import assoc as assoc
+from attr import Attribute as Attribute
+from attr import AttrsInstance as AttrsInstance
+from attr import cmp_using as cmp_using
+from attr import converters as converters
+from attr import Converter as Converter
+from attr import evolve as evolve
+from attr import exceptions as exceptions
+from attr import Factory as Factory
+from attr import fields as fields
+from attr import fields_dict as fields_dict
+from attr import filters as filters
+from attr import has as has
+from attr import make_class as make_class
+from attr import NOTHING as NOTHING
+from attr import resolve_types as resolve_types
+from attr import setters as setters
+from attr import validate as validate
+from attr import validators as validators
+from attr import attrib, asdict as asdict, astuple as astuple
+from attr import NothingType as NothingType
+if sys.version_info >= (3, 11):
+    from typing import dataclass_transform
+else:
+    from typing_extensions import dataclass_transform
+_T = TypeVar("_T")
+_C = TypeVar("_C", bound=type)
+_EqOrderType = bool | Callable[[Any], Any]
+_ValidatorType = Callable[[Any, "Attribute[_T]", _T], Any]
+_CallableConverterType = Callable[[Any], Any]
+_ConverterType = _CallableConverterType | Converter[Any, Any]
+_ReprType = Callable[[Any], str]
+_ReprArgType = bool | _ReprType
+_OnSetAttrType = Callable[[Any, "Attribute[Any]", Any], Any]
+_OnSetAttrArgType = _OnSetAttrType | list[_OnSetAttrType] | setters._NoOpType
+_FieldTransformer = Callable[
+    [type, list["Attribute[Any]"]], list["Attribute[Any]"]
+]
+# FIXME: in reality, if multiple validators are passed they must be in a list
+# or tuple, but those are invariant and so would prevent subtypes of
+# _ValidatorType from working when passed in a list or tuple.
+_ValidatorArgType = _ValidatorType[_T] | Sequence[_ValidatorType[_T]]
+@overload
+def field(
+    *,
+    default: None = ...,
+    validator: None = ...,
+    repr: _ReprArgType = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    metadata: Mapping[Any, Any] | None = ...,
+    converter: None = ...,
+    factory: None = ...,
+    kw_only: bool = ...,
+    eq: bool | None = ...,
+    order: bool | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    alias: str | None = ...,
+    type: type | None = ...,
+) -> Any: ...
+# This form catches an explicit None or no default and infers the type from the
+# other arguments.
+@overload
+def field(
+    *,
+    default: None = ...,
+    validator: _ValidatorArgType[_T] | None = ...,
+    repr: _ReprArgType = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    metadata: Mapping[Any, Any] | None = ...,
+    converter: _ConverterType
+    | list[_ConverterType]
+    | tuple[_ConverterType]
+    | None = ...,
+    factory: Callable[[], _T] | None = ...,
+    kw_only: bool = ...,
+    eq: _EqOrderType | None = ...,
+    order: _EqOrderType | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    alias: str | None = ...,
+    type: type | None = ...,
+) -> _T: ...
+# This form catches an explicit default argument.
+@overload
+def field(
+    *,
+    default: _T,
+    validator: _ValidatorArgType[_T] | None = ...,
+    repr: _ReprArgType = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    metadata: Mapping[Any, Any] | None = ...,
+    converter: _ConverterType
+    | list[_ConverterType]
+    | tuple[_ConverterType]
+    | None = ...,
+    factory: Callable[[], _T] | None = ...,
+    kw_only: bool = ...,
+    eq: _EqOrderType | None = ...,
+    order: _EqOrderType | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    alias: str | None = ...,
+    type: type | None = ...,
+) -> _T: ...
+# This form covers type=non-Type: e.g. forward references (str), Any
+@overload
+def field(
+    *,
+    default: _T | None = ...,
+    validator: _ValidatorArgType[_T] | None = ...,
+    repr: _ReprArgType = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    metadata: Mapping[Any, Any] | None = ...,
+    converter: _ConverterType
+    | list[_ConverterType]
+    | tuple[_ConverterType]
+    | None = ...,
+    factory: Callable[[], _T] | None = ...,
+    kw_only: bool = ...,
+    eq: _EqOrderType | None = ...,
+    order: _EqOrderType | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    alias: str | None = ...,
+    type: type | None = ...,
+) -> Any: ...
+@overload
+@dataclass_transform(field_specifiers=(attrib, field))
+def define(
+    maybe_cls: _C,
+    *,
+    these: dict[str, Any] | None = ...,
+    repr: bool = ...,
+    unsafe_hash: bool | None = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    slots: bool = ...,
+    frozen: bool = ...,
+    weakref_slot: bool = ...,
+    str: bool = ...,
+    auto_attribs: bool = ...,
+    kw_only: bool = ...,
+    cache_hash: bool = ...,
+    auto_exc: bool = ...,
+    eq: bool | None = ...,
+    order: bool | None = ...,
+    auto_detect: bool = ...,
+    getstate_setstate: bool | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    field_transformer: _FieldTransformer | None = ...,
+    match_args: bool = ...,
+) -> _C: ...
+@overload
+@dataclass_transform(field_specifiers=(attrib, field))
+def define(
+    maybe_cls: None = ...,
+    *,
+    these: dict[str, Any] | None = ...,
+    repr: bool = ...,
+    unsafe_hash: bool | None = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    slots: bool = ...,
+    frozen: bool = ...,
+    weakref_slot: bool = ...,
+    str: bool = ...,
+    auto_attribs: bool = ...,
+    kw_only: bool = ...,
+    cache_hash: bool = ...,
+    auto_exc: bool = ...,
+    eq: bool | None = ...,
+    order: bool | None = ...,
+    auto_detect: bool = ...,
+    getstate_setstate: bool | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    field_transformer: _FieldTransformer | None = ...,
+    match_args: bool = ...,
+) -> Callable[[_C], _C]: ...
+mutable = define
+@overload
+@dataclass_transform(frozen_default=True, field_specifiers=(attrib, field))
+def frozen(
+    maybe_cls: _C,
+    *,
+    these: dict[str, Any] | None = ...,
+    repr: bool = ...,
+    unsafe_hash: bool | None = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    slots: bool = ...,
+    frozen: bool = ...,
+    weakref_slot: bool = ...,
+    str: bool = ...,
+    auto_attribs: bool = ...,
+    kw_only: bool = ...,
+    cache_hash: bool = ...,
+    auto_exc: bool = ...,
+    eq: bool | None = ...,
+    order: bool | None = ...,
+    auto_detect: bool = ...,
+    getstate_setstate: bool | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    field_transformer: _FieldTransformer | None = ...,
+    match_args: bool = ...,
+) -> _C: ...
+@overload
+@dataclass_transform(frozen_default=True, field_specifiers=(attrib, field))
+def frozen(
+    maybe_cls: None = ...,
+    *,
+    these: dict[str, Any] | None = ...,
+    repr: bool = ...,
+    unsafe_hash: bool | None = ...,
+    hash: bool | None = ...,
+    init: bool = ...,
+    slots: bool = ...,
+    frozen: bool = ...,
+    weakref_slot: bool = ...,
+    str: bool = ...,
+    auto_attribs: bool = ...,
+    kw_only: bool = ...,
+    cache_hash: bool = ...,
+    auto_exc: bool = ...,
+    eq: bool | None = ...,
+    order: bool | None = ...,
+    auto_detect: bool = ...,
+    getstate_setstate: bool | None = ...,
+    on_setattr: _OnSetAttrArgType | None = ...,
+    field_transformer: _FieldTransformer | None = ...,
+    match_args: bool = ...,
+) -> Callable[[_C], _C]: ...

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/converters.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # SPDX-License-Identifier: MIT
2	+
3	+ from attr.converters import * # noqa: F403

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/exceptions.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # SPDX-License-Identifier: MIT
2	+
3	+ from attr.exceptions import * # noqa: F403

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/filters.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # SPDX-License-Identifier: MIT
2	+
3	+ from attr.filters import * # noqa: F403

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/py.typed ADDED Viewed

File without changes

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/setters.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # SPDX-License-Identifier: MIT
2	+
3	+ from attr.setters import * # noqa: F403

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/attrs/validators.py ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # SPDX-License-Identifier: MIT
2	+
3	+ from attr.validators import * # noqa: F403

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/__init__.py ADDED Viewed

	@@ -0,0 +1,79 @@

+# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+__version__ = "2.20.0"
+from .arrow_dataset import Dataset
+from .arrow_reader import ReadInstruction
+from .builder import ArrowBasedBuilder, BeamBasedBuilder, BuilderConfig, DatasetBuilder, GeneratorBasedBuilder
+from .combine import concatenate_datasets, interleave_datasets
+from .dataset_dict import DatasetDict, IterableDatasetDict
+from .download import *
+from .features import *
+from .fingerprint import disable_caching, enable_caching, is_caching_enabled, set_caching_enabled
+from .info import DatasetInfo, MetricInfo
+from .inspect import (
+    get_dataset_config_info,
+    get_dataset_config_names,
+    get_dataset_default_config_name,
+    get_dataset_infos,
+    get_dataset_split_names,
+    inspect_dataset,
+    inspect_metric,
+    list_datasets,
+    list_metrics,
+)
+from .iterable_dataset import IterableDataset
+from .load import load_dataset, load_dataset_builder, load_from_disk, load_metric
+from .metric import Metric
+from .splits import (
+    NamedSplit,
+    NamedSplitAll,
+    Split,
+    SplitBase,
+    SplitDict,
+    SplitGenerator,
+    SplitInfo,
+    SubSplitInfo,
+    percent,
+)
+from .tasks import *
+from .utils import *
+from .utils import logging
+# isort: split
+# Deprecated modules
+from . import arrow_dataset as _arrow_dataset
+from . import utils as _utils
+from .exceptions import ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits
+from .utils import download_manager as _deprecated_download_manager
+from .utils import info_utils as _deprecated_info_utils
+_arrow_dataset.concatenate_datasets = concatenate_datasets
+_utils.DownloadConfig = DownloadConfig
+_utils.DownloadManager = DownloadManager
+_utils.DownloadMode = DownloadMode
+_deprecated_download_manager.DownloadConfig = DownloadConfig
+_deprecated_download_manager.DownloadMode = DownloadMode
+_deprecated_download_manager.DownloadManager = DownloadManager
+_deprecated_info_utils.ExpectedMoreDownloadedFiles = ExpectedMoreDownloadedFiles
+_deprecated_info_utils.ExpectedMoreSplits = ExpectedMoreSplits
+_deprecated_info_utils.UnexpectedDownloadedFile = UnexpectedDownloadedFile
+_deprecated_info_utils.UnexpectedSplits = UnexpectedSplits
+del _arrow_dataset, _utils, _deprecated_download_manager
+del _deprecated_info_utils, ExpectedMoreDownloadedFiles, ExpectedMoreSplits, UnexpectedDownloadedFile, UnexpectedSplits

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/arrow_writer.py ADDED Viewed

	@@ -0,0 +1,746 @@

+# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""To write records into Parquet files."""
+import errno
+import json
+import os
+import sys
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Tuple, Union
+import fsspec
+import numpy as np
+import pyarrow as pa
+import pyarrow.parquet as pq
+from fsspec.core import url_to_fs
+from . import config
+from .features import Features, Image, Value
+from .features.features import (
+    FeatureType,
+    _ArrayXDExtensionType,
+    cast_to_python_objects,
+    generate_from_arrow_type,
+    get_nested_type,
+    list_of_np_array_to_pyarrow_listarray,
+    numpy_to_pyarrow_listarray,
+    to_pyarrow_listarray,
+)
+from .filesystems import is_remote_filesystem
+from .info import DatasetInfo
+from .keyhash import DuplicatedKeysError, KeyHasher
+from .table import array_cast, cast_array_to_feature, embed_table_storage, table_cast
+from .utils import logging
+from .utils import tqdm as hf_tqdm
+from .utils.file_utils import hash_url_to_filename
+from .utils.py_utils import asdict, first_non_null_value
+logger = logging.get_logger(__name__)
+type_ = type  # keep python's type function
+class SchemaInferenceError(ValueError):
+    pass
+class TypedSequence:
+    """
+    This data container generalizes the typing when instantiating pyarrow arrays, tables or batches.
+    More specifically it adds several features:
+    - Support extension types like ``datasets.features.Array2DExtensionType``:
+        By default pyarrow arrays don't return extension arrays. One has to call
+        ``pa.ExtensionArray.from_storage(type, pa.array(data, type.storage_type))``
+        in order to get an extension array.
+    - Support for ``try_type`` parameter that can be used instead of ``type``:
+        When an array is transformed, we like to keep the same type as before if possible.
+        For example when calling :func:`datasets.Dataset.map`, we don't want to change the type
+        of each column by default.
+    - Better error message when a pyarrow array overflows.
+    Example::
+        from datasets.features import Array2D, Array2DExtensionType, Value
+        from datasets.arrow_writer import TypedSequence
+        import pyarrow as pa
+        arr = pa.array(TypedSequence([1, 2, 3], type=Value("int32")))
+        assert arr.type == pa.int32()
+        arr = pa.array(TypedSequence([1, 2, 3], try_type=Value("int32")))
+        assert arr.type == pa.int32()
+        arr = pa.array(TypedSequence(["foo", "bar"], try_type=Value("int32")))
+        assert arr.type == pa.string()
+        arr = pa.array(TypedSequence([[[1, 2, 3]]], type=Array2D((1, 3), "int64")))
+        assert arr.type == Array2DExtensionType((1, 3), "int64")
+        table = pa.Table.from_pydict({
+            "image": TypedSequence([[[1, 2, 3]]], type=Array2D((1, 3), "int64"))
+        })
+        assert table["image"].type == Array2DExtensionType((1, 3), "int64")
+    """
+    def __init__(
+        self,
+        data: Iterable,
+        type: Optional[FeatureType] = None,
+        try_type: Optional[FeatureType] = None,
+        optimized_int_type: Optional[FeatureType] = None,
+    ):
+        # assert type is None or try_type is None,
+        if type is not None and try_type is not None:
+            raise ValueError("You cannot specify both type and try_type")
+        # set attributes
+        self.data = data
+        self.type = type
+        self.try_type = try_type  # is ignored if it doesn't match the data
+        self.optimized_int_type = optimized_int_type
+        # when trying a type (is ignored if data is not compatible)
+        self.trying_type = self.try_type is not None
+        self.trying_int_optimization = optimized_int_type is not None and type is None and try_type is None
+        # used to get back the inferred type after __arrow_array__() is called once
+        self._inferred_type = None
+    def get_inferred_type(self) -> FeatureType:
+        """Return the inferred feature type.
+        This is done by converting the sequence to an Arrow array, and getting the corresponding
+        feature type.
+        Since building the Arrow array can be expensive, the value of the inferred type is cached
+        as soon as pa.array is called on the typed sequence.
+        Returns:
+            FeatureType: inferred feature type of the sequence.
+        """
+        if self._inferred_type is None:
+            self._inferred_type = generate_from_arrow_type(pa.array(self).type)
+        return self._inferred_type
+    @staticmethod
+    def _infer_custom_type_and_encode(data: Iterable) -> Tuple[Iterable, Optional[FeatureType]]:
+        """Implement type inference for custom objects like PIL.Image.Image -> Image type.
+        This function is only used for custom python objects that can't be direclty passed to build
+        an Arrow array. In such cases is infers the feature type to use, and it encodes the data so
+        that they can be passed to an Arrow array.
+        Args:
+            data (Iterable): array of data to infer the type, e.g. a list of PIL images.
+        Returns:
+            Tuple[Iterable, Optional[FeatureType]]: a tuple with:
+                - the (possibly encoded) array, if the inferred feature type requires encoding
+                - the inferred feature type if the array is made of supported custom objects like
+                    PIL images, else None.
+        """
+        if config.PIL_AVAILABLE and "PIL" in sys.modules:
+            import PIL.Image
+            non_null_idx, non_null_value = first_non_null_value(data)
+            if isinstance(non_null_value, PIL.Image.Image):
+                return [Image().encode_example(value) if value is not None else None for value in data], Image()
+        return data, None
+    def __arrow_array__(self, type: Optional[pa.DataType] = None):
+        """This function is called when calling pa.array(typed_sequence)"""
+        if type is not None:
+            raise ValueError("TypedSequence is supposed to be used with pa.array(typed_sequence, type=None)")
+        del type  # make sure we don't use it
+        data = self.data
+        # automatic type inference for custom objects
+        if self.type is None and self.try_type is None:
+            data, self._inferred_type = self._infer_custom_type_and_encode(data)
+        if self._inferred_type is None:
+            type = self.try_type if self.trying_type else self.type
+        else:
+            type = self._inferred_type
+        pa_type = get_nested_type(type) if type is not None else None
+        optimized_int_pa_type = (
+            get_nested_type(self.optimized_int_type) if self.optimized_int_type is not None else None
+        )
+        trying_cast_to_python_objects = False
+        try:
+            # custom pyarrow types
+            if isinstance(pa_type, _ArrayXDExtensionType):
+                storage = to_pyarrow_listarray(data, pa_type)
+                return pa.ExtensionArray.from_storage(pa_type, storage)
+            # efficient np array to pyarrow array
+            if isinstance(data, np.ndarray):
+                out = numpy_to_pyarrow_listarray(data)
+            elif isinstance(data, list) and data and isinstance(first_non_null_value(data)[1], np.ndarray):
+                out = list_of_np_array_to_pyarrow_listarray(data)
+            else:
+                trying_cast_to_python_objects = True
+                out = pa.array(cast_to_python_objects(data, only_1d_for_numpy=True))
+            # use smaller integer precisions if possible
+            if self.trying_int_optimization:
+                if pa.types.is_int64(out.type):
+                    out = out.cast(optimized_int_pa_type)
+                elif pa.types.is_list(out.type):
+                    if pa.types.is_int64(out.type.value_type):
+                        out = array_cast(out, pa.list_(optimized_int_pa_type))
+                    elif pa.types.is_list(out.type.value_type) and pa.types.is_int64(out.type.value_type.value_type):
+                        out = array_cast(out, pa.list_(pa.list_(optimized_int_pa_type)))
+            # otherwise we can finally use the user's type
+            elif type is not None:
+                # We use cast_array_to_feature to support casting to custom types like Audio and Image
+                # Also, when trying type "string", we don't want to convert integers or floats to "string".
+                # We only do it if trying_type is False - since this is what the user asks for.
+                out = cast_array_to_feature(
+                    out, type, allow_primitive_to_str=not self.trying_type, allow_decimal_to_str=not self.trying_type
+                )
+            return out
+        except (
+            TypeError,
+            pa.lib.ArrowInvalid,
+            pa.lib.ArrowNotImplementedError,
+        ) as e:  # handle type errors and overflows
+            # Ignore ArrowNotImplementedError caused by trying type, otherwise re-raise
+            if not self.trying_type and isinstance(e, pa.lib.ArrowNotImplementedError):
+                raise
+            if self.trying_type:
+                try:  # second chance
+                    if isinstance(data, np.ndarray):
+                        return numpy_to_pyarrow_listarray(data)
+                    elif isinstance(data, list) and data and any(isinstance(value, np.ndarray) for value in data):
+                        return list_of_np_array_to_pyarrow_listarray(data)
+                    else:
+                        trying_cast_to_python_objects = True
+                        return pa.array(cast_to_python_objects(data, only_1d_for_numpy=True))
+                except pa.lib.ArrowInvalid as e:
+                    if "overflow" in str(e):
+                        raise OverflowError(
+                            f"There was an overflow with type {type_(data)}. Try to reduce writer_batch_size to have batches smaller than 2GB.\n({e})"
+                        ) from None
+                    elif self.trying_int_optimization and "not in range" in str(e):
+                        optimized_int_pa_type_str = np.dtype(optimized_int_pa_type.to_pandas_dtype()).name
+                        logger.info(
+                            f"Failed to cast a sequence to {optimized_int_pa_type_str}. Falling back to int64."
+                        )
+                        return out
+                    elif trying_cast_to_python_objects and "Could not convert" in str(e):
+                        out = pa.array(
+                            cast_to_python_objects(data, only_1d_for_numpy=True, optimize_list_casting=False)
+                        )
+                        if type is not None:
+                            out = cast_array_to_feature(
+                                out, type, allow_primitive_to_str=True, allow_decimal_to_str=True
+                            )
+                        return out
+                    else:
+                        raise
+            elif "overflow" in str(e):
+                raise OverflowError(
+                    f"There was an overflow with type {type_(data)}. Try to reduce writer_batch_size to have batches smaller than 2GB.\n({e})"
+                ) from None
+            elif self.trying_int_optimization and "not in range" in str(e):
+                optimized_int_pa_type_str = np.dtype(optimized_int_pa_type.to_pandas_dtype()).name
+                logger.info(f"Failed to cast a sequence to {optimized_int_pa_type_str}. Falling back to int64.")
+                return out
+            elif trying_cast_to_python_objects and "Could not convert" in str(e):
+                out = pa.array(cast_to_python_objects(data, only_1d_for_numpy=True, optimize_list_casting=False))
+                if type is not None:
+                    out = cast_array_to_feature(out, type, allow_primitive_to_str=True, allow_decimal_to_str=True)
+                return out
+            else:
+                raise
+class OptimizedTypedSequence(TypedSequence):
+    def __init__(
+        self,
+        data,
+        type: Optional[FeatureType] = None,
+        try_type: Optional[FeatureType] = None,
+        col: Optional[str] = None,
+        optimized_int_type: Optional[FeatureType] = None,
+    ):
+        optimized_int_type_by_col = {
+            "attention_mask": Value("int8"),  # binary tensor
+            "special_tokens_mask": Value("int8"),
+            "input_ids": Value("int32"),  # typical vocab size: 0-50k (max ~500k, never > 1M)
+            "token_type_ids": Value(
+                "int8"
+            ),  # binary mask; some (XLNetModel) use an additional token represented by a 2
+        }
+        if type is None and try_type is None:
+            optimized_int_type = optimized_int_type_by_col.get(col, None)
+        super().__init__(data, type=type, try_type=try_type, optimized_int_type=optimized_int_type)
+class ArrowWriter:
+    """Shuffles and writes Examples to Arrow files."""
+    _WRITER_CLASS = pa.RecordBatchStreamWriter
+    def __init__(
+        self,
+        schema: Optional[pa.Schema] = None,
+        features: Optional[Features] = None,
+        path: Optional[str] = None,
+        stream: Optional[pa.NativeFile] = None,
+        fingerprint: Optional[str] = None,
+        writer_batch_size: Optional[int] = None,
+        hash_salt: Optional[str] = None,
+        check_duplicates: Optional[bool] = False,
+        disable_nullable: bool = False,
+        update_features: bool = False,
+        with_metadata: bool = True,
+        unit: str = "examples",
+        embed_local_files: bool = False,
+        storage_options: Optional[dict] = None,
+    ):
+        if path is None and stream is None:
+            raise ValueError("At least one of path and stream must be provided.")
+        if features is not None:
+            self._features = features
+            self._schema = None
+        elif schema is not None:
+            self._schema: pa.Schema = schema
+            self._features = Features.from_arrow_schema(self._schema)
+        else:
+            self._features = None
+            self._schema = None
+        if hash_salt is not None:
+            # Create KeyHasher instance using split name as hash salt
+            self._hasher = KeyHasher(hash_salt)
+        else:
+            self._hasher = KeyHasher("")
+        self._check_duplicates = check_duplicates
+        self._disable_nullable = disable_nullable
+        if stream is None:
+            fs, path = url_to_fs(path, **(storage_options or {}))
+            self._fs: fsspec.AbstractFileSystem = fs
+            self._path = path if not is_remote_filesystem(self._fs) else self._fs.unstrip_protocol(path)
+            self.stream = self._fs.open(path, "wb")
+            self._closable_stream = True
+        else:
+            self._fs = None
+            self._path = None
+            self.stream = stream
+            self._closable_stream = False
+        self.fingerprint = fingerprint
+        self.disable_nullable = disable_nullable
+        self.writer_batch_size = writer_batch_size or config.DEFAULT_MAX_BATCH_SIZE
+        self.update_features = update_features
+        self.with_metadata = with_metadata
+        self.unit = unit
+        self.embed_local_files = embed_local_files
+        self._num_examples = 0
+        self._num_bytes = 0
+        self.current_examples: List[Tuple[Dict[str, Any], str]] = []
+        self.current_rows: List[pa.Table] = []
+        self.pa_writer: Optional[pa.RecordBatchStreamWriter] = None
+        self.hkey_record = []
+    def __len__(self):
+        """Return the number of writed and staged examples"""
+        return self._num_examples + len(self.current_examples) + len(self.current_rows)
+    def __enter__(self):
+        return self
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        self.close()
+    def close(self):
+        # Try closing if opened; if closed: pyarrow.lib.ArrowInvalid: Invalid operation on closed file
+        if self.pa_writer:  # it might be None
+            try:
+                self.pa_writer.close()
+            except Exception:  # pyarrow.lib.ArrowInvalid, OSError
+                pass
+        if self._closable_stream and not self.stream.closed:
+            self.stream.close()  # This also closes self.pa_writer if it is opened
+    def _build_writer(self, inferred_schema: pa.Schema):
+        schema = self.schema
+        inferred_features = Features.from_arrow_schema(inferred_schema)
+        if self._features is not None:
+            if self.update_features:  # keep original features it they match, or update them
+                fields = {field.name: field for field in self._features.type}
+                for inferred_field in inferred_features.type:
+                    name = inferred_field.name
+                    if name in fields:
+                        if inferred_field == fields[name]:
+                            inferred_features[name] = self._features[name]
+                self._features = inferred_features
+                schema: pa.Schema = inferred_schema
+        else:
+            self._features = inferred_features
+            schema: pa.Schema = inferred_features.arrow_schema
+        if self.disable_nullable:
+            schema = pa.schema(pa.field(field.name, field.type, nullable=False) for field in schema)
+        if self.with_metadata:
+            schema = schema.with_metadata(self._build_metadata(DatasetInfo(features=self._features), self.fingerprint))
+        else:
+            schema = schema.with_metadata({})
+        self._schema = schema
+        self.pa_writer = self._WRITER_CLASS(self.stream, schema)
+    @property
+    def schema(self):
+        _schema = (
+            self._schema
+            if self._schema is not None
+            else (pa.schema(self._features.type) if self._features is not None else None)
+        )
+        if self._disable_nullable and _schema is not None:
+            _schema = pa.schema(pa.field(field.name, field.type, nullable=False) for field in _schema)
+        return _schema if _schema is not None else []
+    @staticmethod
+    def _build_metadata(info: DatasetInfo, fingerprint: Optional[str] = None) -> Dict[str, str]:
+        info_keys = ["features"]  # we can add support for more DatasetInfo keys in the future
+        info_as_dict = asdict(info)
+        metadata = {}
+        metadata["info"] = {key: info_as_dict[key] for key in info_keys}
+        if fingerprint is not None:
+            metadata["fingerprint"] = fingerprint
+        return {"huggingface": json.dumps(metadata)}
+    def write_examples_on_file(self):
+        """Write stored examples from the write-pool of examples. It makes a table out of the examples and write it."""
+        if not self.current_examples:
+            return
+        # preserve the order the columns
+        if self.schema:
+            schema_cols = set(self.schema.names)
+            examples_cols = self.current_examples[0][0].keys()  # .keys() preserves the order (unlike set)
+            common_cols = [col for col in self.schema.names if col in examples_cols]
+            extra_cols = [col for col in examples_cols if col not in schema_cols]
+            cols = common_cols + extra_cols
+        else:
+            cols = list(self.current_examples[0][0])
+        batch_examples = {}
+        for col in cols:
+            # We use row[0][col] since current_examples contains (example, key) tuples.
+            # Morever, examples could be Arrow arrays of 1 element.
+            # This can happen in `.map()` when we want to re-write the same Arrow data
+            if all(isinstance(row[0][col], (pa.Array, pa.ChunkedArray)) for row in self.current_examples):
+                arrays = [row[0][col] for row in self.current_examples]
+                arrays = [
+                    chunk
+                    for array in arrays
+                    for chunk in (array.chunks if isinstance(array, pa.ChunkedArray) else [array])
+                ]
+                batch_examples[col] = pa.concat_arrays(arrays)
+            else:
+                batch_examples[col] = [
+                    row[0][col].to_pylist()[0] if isinstance(row[0][col], (pa.Array, pa.ChunkedArray)) else row[0][col]
+                    for row in self.current_examples
+                ]
+        self.write_batch(batch_examples=batch_examples)
+        self.current_examples = []
+    def write_rows_on_file(self):
+        """Write stored rows from the write-pool of rows. It concatenates the single-row tables and it writes the resulting table."""
+        if not self.current_rows:
+            return
+        table = pa.concat_tables(self.current_rows)
+        self.write_table(table)
+        self.current_rows = []
+    def write(
+        self,
+        example: Dict[str, Any],
+        key: Optional[Union[str, int, bytes]] = None,
+        writer_batch_size: Optional[int] = None,
+    ):
+        """Add a given (Example,Key) pair to the write-pool of examples which is written to file.
+        Args:
+            example: the Example to add.
+            key: Optional, a unique identifier(str, int or bytes) associated with each example
+        """
+        # Utilize the keys and duplicate checking when `self._check_duplicates` is passed True
+        if self._check_duplicates:
+            # Create unique hash from key and store as (key, example) pairs
+            hash = self._hasher.hash(key)
+            self.current_examples.append((example, hash))
+            # Maintain record of keys and their respective hashes for checking duplicates
+            self.hkey_record.append((hash, key))
+        else:
+            # Store example as a tuple so as to keep the structure of `self.current_examples` uniform
+            self.current_examples.append((example, ""))
+        if writer_batch_size is None:
+            writer_batch_size = self.writer_batch_size
+        if writer_batch_size is not None and len(self.current_examples) >= writer_batch_size:
+            if self._check_duplicates:
+                self.check_duplicate_keys()
+                # Re-intializing to empty list for next batch
+                self.hkey_record = []
+            self.write_examples_on_file()
+    def check_duplicate_keys(self):
+        """Raises error if duplicates found in a batch"""
+        tmp_record = set()
+        for hash, key in self.hkey_record:
+            if hash in tmp_record:
+                duplicate_key_indices = [
+                    str(self._num_examples + index)
+                    for index, (duplicate_hash, _) in enumerate(self.hkey_record)
+                    if duplicate_hash == hash
+                ]
+                raise DuplicatedKeysError(key, duplicate_key_indices)
+            else:
+                tmp_record.add(hash)
+    def write_row(self, row: pa.Table, writer_batch_size: Optional[int] = None):
+        """Add a given single-row Table to the write-pool of rows which is written to file.
+        Args:
+            row: the row to add.
+        """
+        if len(row) != 1:
+            raise ValueError(f"Only single-row pyarrow tables are allowed but got table with {len(row)} rows.")
+        self.current_rows.append(row)
+        if writer_batch_size is None:
+            writer_batch_size = self.writer_batch_size
+        if writer_batch_size is not None and len(self.current_rows) >= writer_batch_size:
+            self.write_rows_on_file()
+    def write_batch(
+        self,
+        batch_examples: Dict[str, List],
+        writer_batch_size: Optional[int] = None,
+    ):
+        """Write a batch of Example to file.
+        Ignores the batch if it appears to be empty,
+        preventing a potential schema update of unknown types.
+        Args:
+            batch_examples: the batch of examples to add.
+        """
+        if batch_examples and len(next(iter(batch_examples.values()))) == 0:
+            return
+        features = None if self.pa_writer is None and self.update_features else self._features
+        try_features = self._features if self.pa_writer is None and self.update_features else None
+        arrays = []
+        inferred_features = Features()
+        # preserve the order the columns
+        if self.schema:
+            schema_cols = set(self.schema.names)
+            batch_cols = batch_examples.keys()  # .keys() preserves the order (unlike set)
+            common_cols = [col for col in self.schema.names if col in batch_cols]
+            extra_cols = [col for col in batch_cols if col not in schema_cols]
+            cols = common_cols + extra_cols
+        else:
+            cols = list(batch_examples)
+        for col in cols:
+            col_values = batch_examples[col]
+            col_type = features[col] if features else None
+            if isinstance(col_values, (pa.Array, pa.ChunkedArray)):
+                array = cast_array_to_feature(col_values, col_type) if col_type is not None else col_values
+                arrays.append(array)
+                inferred_features[col] = generate_from_arrow_type(col_values.type)
+            else:
+                col_try_type = try_features[col] if try_features is not None and col in try_features else None
+                typed_sequence = OptimizedTypedSequence(col_values, type=col_type, try_type=col_try_type, col=col)
+                arrays.append(pa.array(typed_sequence))
+                inferred_features[col] = typed_sequence.get_inferred_type()
+        schema = inferred_features.arrow_schema if self.pa_writer is None else self.schema
+        pa_table = pa.Table.from_arrays(arrays, schema=schema)
+        self.write_table(pa_table, writer_batch_size)
+    def write_table(self, pa_table: pa.Table, writer_batch_size: Optional[int] = None):
+        """Write a Table to file.
+        Args:
+            example: the Table to add.
+        """
+        if writer_batch_size is None:
+            writer_batch_size = self.writer_batch_size
+        if self.pa_writer is None:
+            self._build_writer(inferred_schema=pa_table.schema)
+        pa_table = pa_table.combine_chunks()
+        pa_table = table_cast(pa_table, self._schema)
+        if self.embed_local_files:
+            pa_table = embed_table_storage(pa_table)
+        self._num_bytes += pa_table.nbytes
+        self._num_examples += pa_table.num_rows
+        self.pa_writer.write_table(pa_table, writer_batch_size)
+    def finalize(self, close_stream=True):
+        self.write_rows_on_file()
+        # In case current_examples < writer_batch_size, but user uses finalize()
+        if self._check_duplicates:
+            self.check_duplicate_keys()
+            # Re-intializing to empty list for next batch
+            self.hkey_record = []
+        self.write_examples_on_file()
+        # If schema is known, infer features even if no examples were written
+        if self.pa_writer is None and self.schema:
+            self._build_writer(self.schema)
+        if self.pa_writer is not None:
+            self.pa_writer.close()
+            self.pa_writer = None
+            if close_stream:
+                self.stream.close()
+        else:
+            if close_stream:
+                self.stream.close()
+            raise SchemaInferenceError("Please pass `features` or at least one example when writing data")
+        logger.debug(
+            f"Done writing {self._num_examples} {self.unit} in {self._num_bytes} bytes {self._path if self._path else ''}."
+        )
+        return self._num_examples, self._num_bytes
+class ParquetWriter(ArrowWriter):
+    _WRITER_CLASS = pq.ParquetWriter
+class BeamWriter:
+    """
+    Shuffles and writes Examples to Arrow files.
+    The Arrow files are converted from Parquet files that are the output of Apache Beam pipelines.
+    """
+    def __init__(
+        self,
+        features: Optional[Features] = None,
+        schema: Optional[pa.Schema] = None,
+        path: Optional[str] = None,
+        namespace: Optional[str] = None,
+        cache_dir: Optional[str] = None,
+    ):
+        if features is None and schema is None:
+            raise ValueError("At least one of features and schema must be provided.")
+        if path is None:
+            raise ValueError("Path must be provided.")
+        if features is not None:
+            self._features: Features = features
+            self._schema: pa.Schema = features.arrow_schema
+        else:
+            self._schema: pa.Schema = schema
+            self._features: Features = Features.from_arrow_schema(schema)
+        self._path = path
+        self._parquet_path = os.path.splitext(path)[0]  # remove extension
+        self._namespace = namespace or "default"
+        self._num_examples = None
+        self._cache_dir = cache_dir or config.HF_DATASETS_CACHE
+    def write_from_pcollection(self, pcoll_examples):
+        """Add the final steps of the beam pipeline: write to parquet files."""
+        import apache_beam as beam
+        def inc_num_examples(example):
+            beam.metrics.Metrics.counter(self._namespace, "num_examples").inc()
+        # count examples
+        _ = pcoll_examples | "Count N. Examples" >> beam.Map(inc_num_examples)
+        # save dataset
+        return (
+            pcoll_examples
+            | "Get values" >> beam.Values()
+            | "Save to parquet"
+            >> beam.io.parquetio.WriteToParquet(
+                self._parquet_path, self._schema, shard_name_template="-SSSSS-of-NNNNN.parquet"
+            )
+        )
+    def finalize(self, metrics_query_result: dict):
+        """
+        Run after the pipeline has finished.
+        It converts the resulting parquet files to arrow and it completes the info from the pipeline metrics.
+        Args:
+            metrics_query_result: `dict` obtained from pipeline_results.metrics().query(m_filter). Make sure
+                that the filter keeps only the metrics for the considered split, under the namespace `split_name`.
+        """
+        # Beam FileSystems require the system's path separator in the older versions
+        fs, parquet_path = url_to_fs(self._parquet_path)
+        parquet_path = str(Path(parquet_path)) if not is_remote_filesystem(fs) else fs.unstrip_protocol(parquet_path)
+        shards = fs.glob(parquet_path + "*.parquet")
+        num_bytes = sum(fs.sizes(shards))
+        shard_lengths = get_parquet_lengths(shards)
+        # Convert to arrow
+        if self._path.endswith(".arrow"):
+            logger.info(f"Converting parquet files {self._parquet_path} to arrow {self._path}")
+            try:  # stream conversion
+                num_bytes = 0
+                for shard in hf_tqdm(shards, unit="shards"):
+                    with fs.open(shard, "rb") as source:
+                        with fs.open(shard.replace(".parquet", ".arrow"), "wb") as destination:
+                            shard_num_bytes, _ = parquet_to_arrow(source, destination)
+                            num_bytes += shard_num_bytes
+            except OSError as e:  # broken pipe can happen if the connection is unstable, do local conversion instead
+                if e.errno != errno.EPIPE:  # not a broken pipe
+                    raise
+                logger.warning(
+                    "Broken Pipe during stream conversion from parquet to arrow. Using local convert instead"
+                )
+                local_convert_dir = os.path.join(self._cache_dir, "beam_convert")
+                os.makedirs(local_convert_dir, exist_ok=True)
+                num_bytes = 0
+                for shard in hf_tqdm(shards, unit="shards"):
+                    local_parquet_path = os.path.join(local_convert_dir, hash_url_to_filename(shard) + ".parquet")
+                    fs.download(shard, local_parquet_path)
+                    local_arrow_path = local_parquet_path.replace(".parquet", ".arrow")
+                    shard_num_bytes, _ = parquet_to_arrow(local_parquet_path, local_arrow_path)
+                    num_bytes += shard_num_bytes
+                    remote_arrow_path = shard.replace(".parquet", ".arrow")
+                    fs.upload(local_arrow_path, remote_arrow_path)
+        # Save metrics
+        counters_dict = {metric.key.metric.name: metric.result for metric in metrics_query_result["counters"]}
+        self._num_examples = counters_dict["num_examples"]
+        self._num_bytes = num_bytes
+        self._shard_lengths = shard_lengths
+        return self._num_examples, self._num_bytes
+def get_parquet_lengths(sources) -> List[int]:
+    shard_lengths = []
+    for source in hf_tqdm(sources, unit="parquet files"):
+        parquet_file = pa.parquet.ParquetFile(source)
+        shard_lengths.append(parquet_file.metadata.num_rows)
+    return shard_lengths
+def parquet_to_arrow(source, destination) -> List[int]:
+    """Convert parquet file to arrow file. Inputs can be str paths or file-like objects"""
+    stream = None if isinstance(destination, str) else destination
+    parquet_file = pa.parquet.ParquetFile(source)
+    # Beam can create empty Parquet files, so we need to pass the source Parquet file's schema
+    with ArrowWriter(schema=parquet_file.schema_arrow, path=destination, stream=stream) as writer:
+        for record_batch in parquet_file.iter_batches():
+            pa_table = pa.Table.from_batches([record_batch])
+            writer.write_table(pa_table)
+        num_bytes, num_examples = writer.finalize()
+    return num_bytes, num_examples

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/combine.py ADDED Viewed

	@@ -0,0 +1,215 @@

+from typing import List, Optional, TypeVar
+from .arrow_dataset import Dataset, _concatenate_map_style_datasets, _interleave_map_style_datasets
+from .dataset_dict import DatasetDict, IterableDatasetDict
+from .info import DatasetInfo
+from .iterable_dataset import IterableDataset, _concatenate_iterable_datasets, _interleave_iterable_datasets
+from .splits import NamedSplit
+from .utils import logging
+from .utils.py_utils import Literal
+logger = logging.get_logger(__name__)
+DatasetType = TypeVar("DatasetType", Dataset, IterableDataset)
+def interleave_datasets(
+    datasets: List[DatasetType],
+    probabilities: Optional[List[float]] = None,
+    seed: Optional[int] = None,
+    info: Optional[DatasetInfo] = None,
+    split: Optional[NamedSplit] = None,
+    stopping_strategy: Literal["first_exhausted", "all_exhausted"] = "first_exhausted",
+) -> DatasetType:
+    """
+    Interleave several datasets (sources) into a single dataset.
+    The new dataset is constructed by alternating between the sources to get the examples.
+    You can use this function on a list of [`Dataset`] objects, or on a list of [`IterableDataset`] objects.
+        - If `probabilities` is `None` (default) the new dataset is constructed by cycling between each source to get the examples.
+        - If `probabilities` is not `None`, the new dataset is constructed by getting examples from a random source at a time according to the provided probabilities.
+    The resulting dataset ends when one of the source datasets runs out of examples except when `oversampling` is `True`,
+    in which case, the resulting dataset ends when all datasets have ran out of examples at least one time.
+    Note for iterable datasets:
+    In a distributed setup or in PyTorch DataLoader workers, the stopping strategy is applied per process.
+    Therefore the "first_exhausted" strategy on an sharded iterable dataset can generate less samples in total (up to 1 missing sample per subdataset per worker).
+    Args:
+        datasets (`List[Dataset]` or `List[IterableDataset]`):
+            List of datasets to interleave.
+        probabilities (`List[float]`, *optional*, defaults to `None`):
+            If specified, the new dataset is constructed by sampling
+            examples from one source at a time according to these probabilities.
+        seed (`int`, *optional*, defaults to `None`):
+            The random seed used to choose a source for each example.
+        info ([`DatasetInfo`], *optional*):
+            Dataset information, like description, citation, etc.
+            <Added version="2.4.0"/>
+        split ([`NamedSplit`], *optional*):
+            Name of the dataset split.
+            <Added version="2.4.0"/>
+        stopping_strategy (`str`, defaults to `first_exhausted`):
+            Two strategies are proposed right now, `first_exhausted` and `all_exhausted`.
+            By default, `first_exhausted` is an undersampling strategy, i.e the dataset construction is stopped as soon as one dataset has ran out of samples.
+            If the strategy is `all_exhausted`,  we use an oversampling strategy, i.e the dataset construction is stopped as soon as every samples of every dataset has been added at least once.
+            Note that if the strategy is `all_exhausted`, the interleaved dataset size can get enormous:
+            - with no probabilities, the resulting dataset will have `max_length_datasets*nb_dataset` samples.
+            - with given probabilities, the resulting dataset will have more samples if some datasets have really low probability of visiting.
+    Returns:
+        [`Dataset`] or [`IterableDataset`]: Return type depends on the input `datasets`
+        parameter. `Dataset` if the input is a list of `Dataset`, `IterableDataset` if the input is a list of
+        `IterableDataset`.
+    Example:
+        For regular datasets (map-style):
+        ```python
+        >>> from datasets import Dataset, interleave_datasets
+        >>> d1 = Dataset.from_dict({"a": [0, 1, 2]})
+        >>> d2 = Dataset.from_dict({"a": [10, 11, 12]})
+        >>> d3 = Dataset.from_dict({"a": [20, 21, 22]})
+        >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42, stopping_strategy="all_exhausted")
+        >>> dataset["a"]
+        [10, 0, 11, 1, 2, 20, 12, 10, 0, 1, 2, 21, 0, 11, 1, 2, 0, 1, 12, 2, 10, 0, 22]
+        >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42)
+        >>> dataset["a"]
+        [10, 0, 11, 1, 2]
+        >>> dataset = interleave_datasets([d1, d2, d3])
+        >>> dataset["a"]
+        [0, 10, 20, 1, 11, 21, 2, 12, 22]
+        >>> dataset = interleave_datasets([d1, d2, d3], stopping_strategy="all_exhausted")
+        >>> dataset["a"]
+        [0, 10, 20, 1, 11, 21, 2, 12, 22]
+        >>> d1 = Dataset.from_dict({"a": [0, 1, 2]})
+        >>> d2 = Dataset.from_dict({"a": [10, 11, 12, 13]})
+        >>> d3 = Dataset.from_dict({"a": [20, 21, 22, 23, 24]})
+        >>> dataset = interleave_datasets([d1, d2, d3])
+        >>> dataset["a"]
+        [0, 10, 20, 1, 11, 21, 2, 12, 22]
+        >>> dataset = interleave_datasets([d1, d2, d3], stopping_strategy="all_exhausted")
+        >>> dataset["a"]
+        [0, 10, 20, 1, 11, 21, 2, 12, 22, 0, 13, 23, 1, 10, 24]
+        >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42)
+        >>> dataset["a"]
+        [10, 0, 11, 1, 2]
+        >>> dataset = interleave_datasets([d1, d2, d3], probabilities=[0.7, 0.2, 0.1], seed=42, stopping_strategy="all_exhausted")
+        >>> dataset["a"]
+        [10, 0, 11, 1, 2, 20, 12, 13, ..., 0, 1, 2, 0, 24]
+        For datasets in streaming mode (iterable):
+        >>> from datasets import load_dataset, interleave_datasets
+        >>> d1 = load_dataset("oscar", "unshuffled_deduplicated_en", split="train", streaming=True)
+        >>> d2 = load_dataset("oscar", "unshuffled_deduplicated_fr", split="train", streaming=True)
+        >>> dataset = interleave_datasets([d1, d2])
+        >>> iterator = iter(dataset)
+        >>> next(iterator)
+        {'text': 'Mtendere Village was inspired by the vision...}
+        >>> next(iterator)
+        {'text': "Média de débat d'idées, de culture...}
+        ```
+    """
+    from .arrow_dataset import Dataset
+    from .iterable_dataset import IterableDataset
+    if not datasets:
+        raise ValueError("Unable to interleave an empty list of datasets.")
+    for i, dataset in enumerate(datasets):
+        if not isinstance(dataset, (Dataset, IterableDataset)):
+            if isinstance(dataset, (DatasetDict, IterableDatasetDict)):
+                if not dataset:
+                    raise ValueError(
+                        f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} "
+                        "is an empty dataset dictionary."
+                    )
+                raise ValueError(
+                    f"Dataset at position {i} has at least one split: {list(dataset)}\n"
+                    f"Please pick one to interleave with the other datasets, for example: dataset['{next(iter(dataset))}']"
+                )
+            raise ValueError(
+                f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} is a {type(dataset).__name__}."
+            )
+        if i == 0:
+            dataset_type, other_type = (
+                (Dataset, IterableDataset) if isinstance(dataset, Dataset) else (IterableDataset, Dataset)
+            )
+        elif not isinstance(dataset, dataset_type):
+            raise ValueError(
+                f"Unable to interleave a {dataset_type.__name__} (at position 0) with a {other_type.__name__} (at position {i}). Expected a list of Dataset objects or a list of IterableDataset objects."
+            )
+    if stopping_strategy not in ["first_exhausted", "all_exhausted"]:
+        raise ValueError(f"{stopping_strategy} is not supported. Please enter a valid stopping_strategy.")
+    if dataset_type is Dataset:
+        return _interleave_map_style_datasets(
+            datasets, probabilities, seed, info=info, split=split, stopping_strategy=stopping_strategy
+        )
+    else:
+        return _interleave_iterable_datasets(
+            datasets, probabilities, seed, info=info, split=split, stopping_strategy=stopping_strategy
+        )
+def concatenate_datasets(
+    dsets: List[DatasetType],
+    info: Optional[DatasetInfo] = None,
+    split: Optional[NamedSplit] = None,
+    axis: int = 0,
+) -> DatasetType:
+    """
+    Converts a list of [`Dataset`] with the same schema into a single [`Dataset`].
+    Args:
+        dsets (`List[datasets.Dataset]`):
+            List of Datasets to concatenate.
+        info (`DatasetInfo`, *optional*):
+            Dataset information, like description, citation, etc.
+        split (`NamedSplit`, *optional*):
+            Name of the dataset split.
+        axis (`{0, 1}`, defaults to `0`):
+            Axis to concatenate over, where `0` means over rows (vertically) and `1` means over columns
+            (horizontally).
+            <Added version="1.6.0"/>
+    Example:
+    ```py
+    >>> ds3 = concatenate_datasets([ds1, ds2])
+    ```
+    """
+    if not dsets:
+        raise ValueError("Unable to concatenate an empty list of datasets.")
+    for i, dataset in enumerate(dsets):
+        if not isinstance(dataset, (Dataset, IterableDataset)):
+            if isinstance(dataset, (DatasetDict, IterableDatasetDict)):
+                if not dataset:
+                    raise ValueError(
+                        f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} "
+                        "is an empty dataset dictionary."
+                    )
+                raise ValueError(
+                    f"Dataset at position {i} has at least one split: {list(dataset)}\n"
+                    f"Please pick one to interleave with the other datasets, for example: dataset['{next(iter(dataset))}']"
+                )
+            raise ValueError(
+                f"Expected a list of Dataset objects or a list of IterableDataset objects, but element at position {i} is a {type(dataset).__name__}."
+            )
+        if i == 0:
+            dataset_type, other_type = (
+                (Dataset, IterableDataset) if isinstance(dataset, Dataset) else (IterableDataset, Dataset)
+            )
+        elif not isinstance(dataset, dataset_type):
+            raise ValueError(
+                f"Unable to interleave a {dataset_type.__name__} (at position 0) with a {other_type.__name__} (at position {i}). Expected a list of Dataset objects or a list of IterableDataset objects."
+            )
+    if dataset_type is Dataset:
+        return _concatenate_map_style_datasets(dsets, info=info, split=split, axis=axis)
+    else:
+        return _concatenate_iterable_datasets(dsets, info=info, split=split, axis=axis)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/data_files.py ADDED Viewed

	@@ -0,0 +1,825 @@

+import os
+import re
+from functools import partial
+from glob import has_magic
+from pathlib import Path, PurePath
+from typing import Callable, Dict, List, Optional, Set, Tuple, Union
+import huggingface_hub
+from fsspec.core import url_to_fs
+from fsspec.implementations.http import HTTPFileSystem
+from huggingface_hub import HfFileSystem
+from packaging import version
+from tqdm.contrib.concurrent import thread_map
+from . import config
+from .download import DownloadConfig
+from .naming import _split_re
+from .splits import Split
+from .utils import logging
+from .utils import tqdm as hf_tqdm
+from .utils.file_utils import _prepare_path_and_storage_options, is_local_path, is_relative_path, xbasename, xjoin
+from .utils.py_utils import glob_pattern_to_regex, string_to_dict
+SingleOriginMetadata = Union[Tuple[str, str], Tuple[str], Tuple[()]]
+SANITIZED_DEFAULT_SPLIT = str(Split.TRAIN)
+logger = logging.get_logger(__name__)
+class Url(str):
+    pass
+class EmptyDatasetError(FileNotFoundError):
+    pass
+SPLIT_PATTERN_SHARDED = "data/{split}-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*"
+SPLIT_KEYWORDS = {
+    Split.TRAIN: ["train", "training"],
+    Split.VALIDATION: ["validation", "valid", "dev", "val"],
+    Split.TEST: ["test", "testing", "eval", "evaluation"],
+}
+NON_WORDS_CHARS = "-._ 0-9"
+if config.FSSPEC_VERSION < version.parse("2023.9.0"):
+    KEYWORDS_IN_FILENAME_BASE_PATTERNS = ["**[{sep}/]{keyword}[{sep}]*", "{keyword}[{sep}]*"]
+    KEYWORDS_IN_DIR_NAME_BASE_PATTERNS = [
+        "{keyword}/**",
+        "{keyword}[{sep}]*/**",
+        "**[{sep}/]{keyword}/**",
+        "**[{sep}/]{keyword}[{sep}]*/**",
+    ]
+elif config.FSSPEC_VERSION < version.parse("2023.12.0"):
+    KEYWORDS_IN_FILENAME_BASE_PATTERNS = ["**/*[{sep}/]{keyword}[{sep}]*", "{keyword}[{sep}]*"]
+    KEYWORDS_IN_DIR_NAME_BASE_PATTERNS = [
+        "{keyword}/**/*",
+        "{keyword}[{sep}]*/**/*",
+        "**/*[{sep}/]{keyword}/**/*",
+        "**/*[{sep}/]{keyword}[{sep}]*/**/*",
+    ]
+else:
+    KEYWORDS_IN_FILENAME_BASE_PATTERNS = ["**/{keyword}[{sep}]*", "**/*[{sep}]{keyword}[{sep}]*"]
+    KEYWORDS_IN_DIR_NAME_BASE_PATTERNS = [
+        "**/{keyword}/**",
+        "**/{keyword}[{sep}]*/**",
+        "**/*[{sep}]{keyword}/**",
+        "**/*[{sep}]{keyword}[{sep}]*/**",
+    ]
+DEFAULT_SPLITS = [Split.TRAIN, Split.VALIDATION, Split.TEST]
+DEFAULT_PATTERNS_SPLIT_IN_FILENAME = {
+    split: [
+        pattern.format(keyword=keyword, sep=NON_WORDS_CHARS)
+        for keyword in SPLIT_KEYWORDS[split]
+        for pattern in KEYWORDS_IN_FILENAME_BASE_PATTERNS
+    ]
+    for split in DEFAULT_SPLITS
+}
+DEFAULT_PATTERNS_SPLIT_IN_DIR_NAME = {
+    split: [
+        pattern.format(keyword=keyword, sep=NON_WORDS_CHARS)
+        for keyword in SPLIT_KEYWORDS[split]
+        for pattern in KEYWORDS_IN_DIR_NAME_BASE_PATTERNS
+    ]
+    for split in DEFAULT_SPLITS
+}
+DEFAULT_PATTERNS_ALL = {
+    Split.TRAIN: ["**"],
+}
+ALL_SPLIT_PATTERNS = [SPLIT_PATTERN_SHARDED]
+ALL_DEFAULT_PATTERNS = [
+    DEFAULT_PATTERNS_SPLIT_IN_DIR_NAME,
+    DEFAULT_PATTERNS_SPLIT_IN_FILENAME,
+    DEFAULT_PATTERNS_ALL,
+]
+if config.FSSPEC_VERSION < version.parse("2023.9.0"):
+    METADATA_PATTERNS = [
+        "metadata.csv",
+        "**/metadata.csv",
+        "metadata.jsonl",
+        "**/metadata.jsonl",
+    ]  # metadata file for ImageFolder and AudioFolder
+else:
+    METADATA_PATTERNS = [
+        "**/metadata.csv",
+        "**/metadata.jsonl",
+    ]  # metadata file for ImageFolder and AudioFolder
+WILDCARD_CHARACTERS = "*[]"
+FILES_TO_IGNORE = [
+    "README.md",
+    "config.json",
+    "dataset_info.json",
+    "dataset_infos.json",
+    "dummy_data.zip",
+    "dataset_dict.json",
+]
+def contains_wildcards(pattern: str) -> bool:
+    return any(wilcard_character in pattern for wilcard_character in WILDCARD_CHARACTERS)
+def sanitize_patterns(patterns: Union[Dict, List, str]) -> Dict[str, Union[List[str], "DataFilesList"]]:
+    """
+    Take the data_files patterns from the user, and format them into a dictionary.
+    Each key is the name of the split, and each value is a list of data files patterns (paths or urls).
+    The default split is "train".
+    Returns:
+        patterns: dictionary of split_name -> list of patterns
+    """
+    if isinstance(patterns, dict):
+        return {str(key): value if isinstance(value, list) else [value] for key, value in patterns.items()}
+    elif isinstance(patterns, str):
+        return {SANITIZED_DEFAULT_SPLIT: [patterns]}
+    elif isinstance(patterns, list):
+        if any(isinstance(pattern, dict) for pattern in patterns):
+            for pattern in patterns:
+                if not (
+                    isinstance(pattern, dict)
+                    and len(pattern) == 2
+                    and "split" in pattern
+                    and isinstance(pattern.get("path"), (str, list))
+                ):
+                    raise ValueError(
+                        f"Expected each split to have a 'path' key which can be a string or a list of strings, but got {pattern}"
+                    )
+            splits = [pattern["split"] for pattern in patterns]
+            if len(set(splits)) != len(splits):
+                raise ValueError(f"Some splits are duplicated in data_files: {splits}")
+            return {
+                str(pattern["split"]): pattern["path"] if isinstance(pattern["path"], list) else [pattern["path"]]
+                for pattern in patterns
+            }
+        else:
+            return {SANITIZED_DEFAULT_SPLIT: patterns}
+    else:
+        return sanitize_patterns(list(patterns))
+def _is_inside_unrequested_special_dir(matched_rel_path: str, pattern: str) -> bool:
+    """
+    When a path matches a pattern, we additionnally check if it's inside a special directory
+    we ignore by default (if it starts with a double underscore).
+    Users can still explicitly request a filepath inside such a directory if "__pycache__" is
+    mentioned explicitly in the requested pattern.
+    Some examples:
+    base directory:
+        ./
+        └── __pycache__
+            └── b.txt
+    >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "**")
+    True
+    >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "*/b.txt")
+    True
+    >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "__pycache__/*")
+    False
+    >>> _is_inside_unrequested_special_dir("__pycache__/b.txt", "__*/*")
+    False
+    """
+    # We just need to check if every special directories from the path is present explicly in the pattern.
+    # Since we assume that the path matches the pattern, it's equivalent to counting that both
+    # the parent path and the parent pattern have the same number of special directories.
+    data_dirs_to_ignore_in_path = [part for part in PurePath(matched_rel_path).parent.parts if part.startswith("__")]
+    data_dirs_to_ignore_in_pattern = [part for part in PurePath(pattern).parent.parts if part.startswith("__")]
+    return len(data_dirs_to_ignore_in_path) != len(data_dirs_to_ignore_in_pattern)
+def _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(matched_rel_path: str, pattern: str) -> bool:
+    """
+    When a path matches a pattern, we additionnally check if it's a hidden file or if it's inside
+    a hidden directory we ignore by default, i.e. if the file name or a parent directory name starts with a dot.
+    Users can still explicitly request a filepath that is hidden or is inside a hidden directory
+    if the hidden part is mentioned explicitly in the requested pattern.
+    Some examples:
+    base directory:
+        ./
+        └── .hidden_file.txt
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_file.txt", "**")
+    True
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_file.txt", ".*")
+    False
+    base directory:
+        ./
+        └── .hidden_dir
+            └── a.txt
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/a.txt", "**")
+    True
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/a.txt", ".*/*")
+    False
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/a.txt", ".hidden_dir/*")
+    False
+    base directory:
+        ./
+        └── .hidden_dir
+            └── .hidden_file.txt
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", "**")
+    True
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".*/*")
+    True
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".*/.*")
+    False
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".hidden_dir/*")
+    True
+    >>> _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(".hidden_dir/.hidden_file.txt", ".hidden_dir/.*")
+    False
+    """
+    # We just need to check if every hidden part from the path is present explicly in the pattern.
+    # Since we assume that the path matches the pattern, it's equivalent to counting that both
+    # the path and the pattern have the same number of hidden parts.
+    hidden_directories_in_path = [
+        part for part in PurePath(matched_rel_path).parts if part.startswith(".") and not set(part) == {"."}
+    ]
+    hidden_directories_in_pattern = [
+        part for part in PurePath(pattern).parts if part.startswith(".") and not set(part) == {"."}
+    ]
+    return len(hidden_directories_in_path) != len(hidden_directories_in_pattern)
+def _get_data_files_patterns(pattern_resolver: Callable[[str], List[str]]) -> Dict[str, List[str]]:
+    """
+    Get the default pattern from a directory or repository by testing all the supported patterns.
+    The first patterns to return a non-empty list of data files is returned.
+    In order, it first tests if SPLIT_PATTERN_SHARDED works, otherwise it tests the patterns in ALL_DEFAULT_PATTERNS.
+    """
+    # first check the split patterns like data/{split}-00000-of-00001.parquet
+    for split_pattern in ALL_SPLIT_PATTERNS:
+        pattern = split_pattern.replace("{split}", "*")
+        try:
+            data_files = pattern_resolver(pattern)
+        except FileNotFoundError:
+            continue
+        if len(data_files) > 0:
+            splits: Set[str] = {
+                string_to_dict(xbasename(p), glob_pattern_to_regex(xbasename(split_pattern)))["split"]
+                for p in data_files
+            }
+            if any(not re.match(_split_re, split) for split in splits):
+                raise ValueError(f"Split name should match '{_split_re}'' but got '{splits}'.")
+            sorted_splits = [str(split) for split in DEFAULT_SPLITS if split in splits] + sorted(
+                splits - set(DEFAULT_SPLITS)
+            )
+            return {split: [split_pattern.format(split=split)] for split in sorted_splits}
+    # then check the default patterns based on train/valid/test splits
+    for patterns_dict in ALL_DEFAULT_PATTERNS:
+        non_empty_splits = []
+        for split, patterns in patterns_dict.items():
+            for pattern in patterns:
+                try:
+                    data_files = pattern_resolver(pattern)
+                except FileNotFoundError:
+                    continue
+                if len(data_files) > 0:
+                    non_empty_splits.append(split)
+                    break
+        if non_empty_splits:
+            return {split: patterns_dict[split] for split in non_empty_splits}
+    raise FileNotFoundError(f"Couldn't resolve pattern {pattern} with resolver {pattern_resolver}")
+def _get_metadata_files_patterns(pattern_resolver: Callable[[str], List[str]]) -> List[str]:
+    """
+    Get the supported metadata patterns from a directory or repository.
+    """
+    non_empty_patterns = []
+    for pattern in METADATA_PATTERNS:
+        try:
+            metadata_files = pattern_resolver(pattern)
+            if len(metadata_files) > 0:
+                non_empty_patterns.append(pattern)
+        except FileNotFoundError:
+            pass
+    if non_empty_patterns:
+        return non_empty_patterns
+    raise FileNotFoundError(f"Couldn't resolve pattern {pattern} with resolver {pattern_resolver}")
+def resolve_pattern(
+    pattern: str,
+    base_path: str,
+    allowed_extensions: Optional[List[str]] = None,
+    download_config: Optional[DownloadConfig] = None,
+) -> List[str]:
+    """
+    Resolve the paths and URLs of the data files from the pattern passed by the user.
+    You can use patterns to resolve multiple local files. Here are a few examples:
+    - *.csv to match all the CSV files at the first level
+    - **.csv to match all the CSV files at any level
+    - data/* to match all the files inside "data"
+    - data/** to match all the files inside "data" and its subdirectories
+    The patterns are resolved using the fsspec glob. In fsspec>=2023.12.0 this is equivalent to
+    Python's glob.glob, Path.glob, Path.match and fnmatch where ** is unsupported with a prefix/suffix
+    other than a forward slash /.
+    More generally:
+    - '*' matches any character except a forward-slash (to match just the file or directory name)
+    - '**' matches any character including a forward-slash /
+    Hidden files and directories (i.e. whose names start with a dot) are ignored, unless they are explicitly requested.
+    The same applies to special directories that start with a double underscore like "__pycache__".
+    You can still include one if the pattern explicilty mentions it:
+    - to include a hidden file: "*/.hidden.txt" or "*/.*"
+    - to include a hidden directory: ".hidden/*" or ".*/*"
+    - to include a special directory: "__special__/*" or "__*/*"
+    Example::
+        >>> from datasets.data_files import resolve_pattern
+        >>> base_path = "."
+        >>> resolve_pattern("docs/**/*.py", base_path)
+        [/Users/mariosasko/Desktop/projects/datasets/docs/source/_config.py']
+    Args:
+        pattern (str): Unix pattern or paths or URLs of the data files to resolve.
+            The paths can be absolute or relative to base_path.
+            Remote filesystems using fsspec are supported, e.g. with the hf:// protocol.
+        base_path (str): Base path to use when resolving relative paths.
+        allowed_extensions (Optional[list], optional): White-list of file extensions to use. Defaults to None (all extensions).
+            For example: allowed_extensions=[".csv", ".json", ".txt", ".parquet"]
+        download_config ([`DownloadConfig`], *optional*): Specific download configuration parameters.
+    Returns:
+        List[str]: List of paths or URLs to the local or remote files that match the patterns.
+    """
+    if is_relative_path(pattern):
+        pattern = xjoin(base_path, pattern)
+    elif is_local_path(pattern):
+        base_path = os.path.splitdrive(pattern)[0] + os.sep
+    else:
+        base_path = ""
+    pattern, storage_options = _prepare_path_and_storage_options(pattern, download_config=download_config)
+    fs, fs_pattern = url_to_fs(pattern, **storage_options)
+    files_to_ignore = set(FILES_TO_IGNORE) - {xbasename(pattern)}
+    protocol = fs.protocol if isinstance(fs.protocol, str) else fs.protocol[0]
+    protocol_prefix = protocol + "://" if protocol != "file" else ""
+    glob_kwargs = {}
+    if protocol == "hf" and config.HF_HUB_VERSION >= version.parse("0.20.0"):
+        # 10 times faster glob with detail=True (ignores costly info like lastCommit)
+        glob_kwargs["expand_info"] = False
+    matched_paths = [
+        filepath if filepath.startswith(protocol_prefix) else protocol_prefix + filepath
+        for filepath, info in fs.glob(pattern, detail=True, **glob_kwargs).items()
+        if info["type"] == "file"
+        and (xbasename(filepath) not in files_to_ignore)
+        and not _is_inside_unrequested_special_dir(filepath, fs_pattern)
+        and not _is_unrequested_hidden_file_or_is_inside_unrequested_hidden_dir(filepath, fs_pattern)
+    ]  # ignore .ipynb and __pycache__, but keep /../
+    if allowed_extensions is not None:
+        out = [
+            filepath
+            for filepath in matched_paths
+            if any("." + suffix in allowed_extensions for suffix in xbasename(filepath).split(".")[1:])
+        ]
+        if len(out) < len(matched_paths):
+            invalid_matched_files = list(set(matched_paths) - set(out))
+            logger.info(
+                f"Some files matched the pattern '{pattern}' but don't have valid data file extensions: {invalid_matched_files}"
+            )
+    else:
+        out = matched_paths
+    if not out:
+        error_msg = f"Unable to find '{pattern}'"
+        if allowed_extensions is not None:
+            error_msg += f" with any supported extension {list(allowed_extensions)}"
+        raise FileNotFoundError(error_msg)
+    return out
+def get_data_patterns(base_path: str, download_config: Optional[DownloadConfig] = None) -> Dict[str, List[str]]:
+    """
+    Get the default pattern from a directory testing all the supported patterns.
+    The first patterns to return a non-empty list of data files is returned.
+    Some examples of supported patterns:
+    Input:
+        my_dataset_repository/
+        ├── README.md
+        └── dataset.csv
+    Output:
+        {'train': ['**']}
+    Input:
+        my_dataset_repository/
+        ├── README.md
+        ├── train.csv
+        └── test.csv
+        my_dataset_repository/
+        ├── README.md
+        └── data/
+            ├── train.csv
+            └── test.csv
+        my_dataset_repository/
+        ├── README.md
+        ├── train_0.csv
+        ├── train_1.csv
+        ├── train_2.csv
+        ├── train_3.csv
+        ├── test_0.csv
+        └── test_1.csv
+    Output:
+        {'train': ['**/train[-._ 0-9]*', '**/*[-._ 0-9]train[-._ 0-9]*', '**/training[-._ 0-9]*', '**/*[-._ 0-9]training[-._ 0-9]*'],
+         'test': ['**/test[-._ 0-9]*', '**/*[-._ 0-9]test[-._ 0-9]*', '**/testing[-._ 0-9]*', '**/*[-._ 0-9]testing[-._ 0-9]*', ...]}
+    Input:
+        my_dataset_repository/
+        ├── README.md
+        └── data/
+            ├── train/
+            │   ├── shard_0.csv
+            │   ├── shard_1.csv
+            │   ├── shard_2.csv
+            │   └── shard_3.csv
+            └── test/
+                ├── shard_0.csv
+                └── shard_1.csv
+    Output:
+        {'train': ['**/train/**', '**/train[-._ 0-9]*/**', '**/*[-._ 0-9]train/**', '**/*[-._ 0-9]train[-._ 0-9]*/**', ...],
+         'test': ['**/test/**', '**/test[-._ 0-9]*/**', '**/*[-._ 0-9]test/**', '**/*[-._ 0-9]test[-._ 0-9]*/**', ...]}
+    Input:
+        my_dataset_repository/
+        ├── README.md
+        └── data/
+            ├── train-00000-of-00003.csv
+            ├── train-00001-of-00003.csv
+            ├── train-00002-of-00003.csv
+            ├── test-00000-of-00001.csv
+            ├── random-00000-of-00003.csv
+            ├── random-00001-of-00003.csv
+            └── random-00002-of-00003.csv
+    Output:
+        {'train': ['data/train-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*'],
+         'test': ['data/test-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*'],
+         'random': ['data/random-[0-9][0-9][0-9][0-9][0-9]-of-[0-9][0-9][0-9][0-9][0-9]*.*']}
+    In order, it first tests if SPLIT_PATTERN_SHARDED works, otherwise it tests the patterns in ALL_DEFAULT_PATTERNS.
+    """
+    resolver = partial(resolve_pattern, base_path=base_path, download_config=download_config)
+    try:
+        return _get_data_files_patterns(resolver)
+    except FileNotFoundError:
+        raise EmptyDatasetError(f"The directory at {base_path} doesn't contain any data files") from None
+def get_metadata_patterns(
+    base_path: str,
+    download_config: Optional[DownloadConfig] = None,
+) -> List[str]:
+    """
+    Get the supported metadata patterns from a local directory.
+    """
+    resolver = partial(resolve_pattern, base_path=base_path, download_config=download_config)
+    try:
+        return _get_metadata_files_patterns(resolver)
+    except FileNotFoundError:
+        raise FileNotFoundError(f"The directory at {base_path} doesn't contain any metadata file") from None
+def _get_single_origin_metadata(
+    data_file: str,
+    download_config: Optional[DownloadConfig] = None,
+) -> SingleOriginMetadata:
+    data_file, storage_options = _prepare_path_and_storage_options(data_file, download_config=download_config)
+    fs, *_ = url_to_fs(data_file, **storage_options)
+    if isinstance(fs, HfFileSystem):
+        resolved_path = fs.resolve_path(data_file)
+        return resolved_path.repo_id, resolved_path.revision
+    elif isinstance(fs, HTTPFileSystem) and data_file.startswith(config.HF_ENDPOINT):
+        hffs = HfFileSystem(endpoint=config.HF_ENDPOINT, token=download_config.token)
+        data_file = "hf://" + data_file[len(config.HF_ENDPOINT) + 1 :].replace("/resolve/", "@", 1)
+        resolved_path = hffs.resolve_path(data_file)
+        return resolved_path.repo_id, resolved_path.revision
+    info = fs.info(data_file)
+    # s3fs uses "ETag", gcsfs uses "etag", and for local we simply check mtime
+    for key in ["ETag", "etag", "mtime"]:
+        if key in info:
+            return (str(info[key]),)
+    return ()
+def _get_origin_metadata(
+    data_files: List[str],
+    download_config: Optional[DownloadConfig] = None,
+    max_workers: Optional[int] = None,
+) -> List[SingleOriginMetadata]:
+    max_workers = max_workers if max_workers is not None else config.HF_DATASETS_MULTITHREADING_MAX_WORKERS
+    return thread_map(
+        partial(_get_single_origin_metadata, download_config=download_config),
+        data_files,
+        max_workers=max_workers,
+        tqdm_class=hf_tqdm,
+        desc="Resolving data files",
+        # set `disable=None` rather than `disable=False` by default to disable progress bar when no TTY attached
+        disable=len(data_files) <= 16 or None,
+    )
+class DataFilesList(List[str]):
+    """
+    List of data files (absolute local paths or URLs).
+    It has two construction methods given the user's data files patterns:
+    - ``from_hf_repo``: resolve patterns inside a dataset repository
+    - ``from_local_or_remote``: resolve patterns from a local path
+    Moreover, DataFilesList has an additional attribute ``origin_metadata``.
+    It can store:
+    - the last modified time of local files
+    - ETag of remote files
+    - commit sha of a dataset repository
+    Thanks to this additional attribute, it is possible to hash the list
+    and get a different hash if and only if at least one file changed.
+    This is useful for caching Dataset objects that are obtained from a list of data files.
+    """
+    def __init__(self, data_files: List[str], origin_metadata: List[SingleOriginMetadata]) -> None:
+        super().__init__(data_files)
+        self.origin_metadata = origin_metadata
+    def __add__(self, other: "DataFilesList") -> "DataFilesList":
+        return DataFilesList([*self, *other], self.origin_metadata + other.origin_metadata)
+    @classmethod
+    def from_hf_repo(
+        cls,
+        patterns: List[str],
+        dataset_info: huggingface_hub.hf_api.DatasetInfo,
+        base_path: Optional[str] = None,
+        allowed_extensions: Optional[List[str]] = None,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesList":
+        base_path = f"hf://datasets/{dataset_info.id}@{dataset_info.sha}/{base_path or ''}".rstrip("/")
+        return cls.from_patterns(
+            patterns, base_path=base_path, allowed_extensions=allowed_extensions, download_config=download_config
+        )
+    @classmethod
+    def from_local_or_remote(
+        cls,
+        patterns: List[str],
+        base_path: Optional[str] = None,
+        allowed_extensions: Optional[List[str]] = None,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesList":
+        base_path = base_path if base_path is not None else Path().resolve().as_posix()
+        return cls.from_patterns(
+            patterns, base_path=base_path, allowed_extensions=allowed_extensions, download_config=download_config
+        )
+    @classmethod
+    def from_patterns(
+        cls,
+        patterns: List[str],
+        base_path: Optional[str] = None,
+        allowed_extensions: Optional[List[str]] = None,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesList":
+        base_path = base_path if base_path is not None else Path().resolve().as_posix()
+        data_files = []
+        for pattern in patterns:
+            try:
+                data_files.extend(
+                    resolve_pattern(
+                        pattern,
+                        base_path=base_path,
+                        allowed_extensions=allowed_extensions,
+                        download_config=download_config,
+                    )
+                )
+            except FileNotFoundError:
+                if not has_magic(pattern):
+                    raise
+        origin_metadata = _get_origin_metadata(data_files, download_config=download_config)
+        return cls(data_files, origin_metadata)
+    def filter_extensions(self, extensions: List[str]) -> "DataFilesList":
+        pattern = "|".join("\\" + ext for ext in extensions)
+        pattern = re.compile(f".*({pattern})(\\..+)?$")
+        return DataFilesList(
+            [data_file for data_file in self if pattern.match(data_file)],
+            origin_metadata=self.origin_metadata,
+        )
+class DataFilesDict(Dict[str, DataFilesList]):
+    """
+    Dict of split_name -> list of data files (absolute local paths or URLs).
+    It has two construction methods given the user's data files patterns :
+    - ``from_hf_repo``: resolve patterns inside a dataset repository
+    - ``from_local_or_remote``: resolve patterns from a local path
+    Moreover, each list is a DataFilesList. It is possible to hash the dictionary
+    and get a different hash if and only if at least one file changed.
+    For more info, see [`DataFilesList`].
+    This is useful for caching Dataset objects that are obtained from a list of data files.
+    Changing the order of the keys of this dictionary also doesn't change its hash.
+    """
+    @classmethod
+    def from_local_or_remote(
+        cls,
+        patterns: Dict[str, Union[List[str], DataFilesList]],
+        base_path: Optional[str] = None,
+        allowed_extensions: Optional[List[str]] = None,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesDict":
+        out = cls()
+        for key, patterns_for_key in patterns.items():
+            out[key] = (
+                patterns_for_key
+                if isinstance(patterns_for_key, DataFilesList)
+                else DataFilesList.from_local_or_remote(
+                    patterns_for_key,
+                    base_path=base_path,
+                    allowed_extensions=allowed_extensions,
+                    download_config=download_config,
+                )
+            )
+        return out
+    @classmethod
+    def from_hf_repo(
+        cls,
+        patterns: Dict[str, Union[List[str], DataFilesList]],
+        dataset_info: huggingface_hub.hf_api.DatasetInfo,
+        base_path: Optional[str] = None,
+        allowed_extensions: Optional[List[str]] = None,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesDict":
+        out = cls()
+        for key, patterns_for_key in patterns.items():
+            out[key] = (
+                patterns_for_key
+                if isinstance(patterns_for_key, DataFilesList)
+                else DataFilesList.from_hf_repo(
+                    patterns_for_key,
+                    dataset_info=dataset_info,
+                    base_path=base_path,
+                    allowed_extensions=allowed_extensions,
+                    download_config=download_config,
+                )
+            )
+        return out
+    @classmethod
+    def from_patterns(
+        cls,
+        patterns: Dict[str, Union[List[str], DataFilesList]],
+        base_path: Optional[str] = None,
+        allowed_extensions: Optional[List[str]] = None,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesDict":
+        out = cls()
+        for key, patterns_for_key in patterns.items():
+            out[key] = (
+                patterns_for_key
+                if isinstance(patterns_for_key, DataFilesList)
+                else DataFilesList.from_patterns(
+                    patterns_for_key,
+                    base_path=base_path,
+                    allowed_extensions=allowed_extensions,
+                    download_config=download_config,
+                )
+            )
+        return out
+    def filter_extensions(self, extensions: List[str]) -> "DataFilesDict":
+        out = type(self)()
+        for key, data_files_list in self.items():
+            out[key] = data_files_list.filter_extensions(extensions)
+        return out
+class DataFilesPatternsList(List[str]):
+    """
+    List of data files patterns (absolute local paths or URLs).
+    For each pattern there should also be a list of allowed extensions
+    to keep, or a None ot keep all the files for the pattern.
+    """
+    def __init__(
+        self,
+        patterns: List[str],
+        allowed_extensions: List[Optional[List[str]]],
+    ):
+        super().__init__(patterns)
+        self.allowed_extensions = allowed_extensions
+    def __add__(self, other):
+        return DataFilesList([*self, *other], self.allowed_extensions + other.allowed_extensions)
+    @classmethod
+    def from_patterns(
+        cls, patterns: List[str], allowed_extensions: Optional[List[str]] = None
+    ) -> "DataFilesPatternsList":
+        return cls(patterns, [allowed_extensions] * len(patterns))
+    def resolve(
+        self,
+        base_path: str,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesList":
+        base_path = base_path if base_path is not None else Path().resolve().as_posix()
+        data_files = []
+        for pattern, allowed_extensions in zip(self, self.allowed_extensions):
+            try:
+                data_files.extend(
+                    resolve_pattern(
+                        pattern,
+                        base_path=base_path,
+                        allowed_extensions=allowed_extensions,
+                        download_config=download_config,
+                    )
+                )
+            except FileNotFoundError:
+                if not has_magic(pattern):
+                    raise
+        origin_metadata = _get_origin_metadata(data_files, download_config=download_config)
+        return DataFilesList(data_files, origin_metadata)
+    def filter_extensions(self, extensions: List[str]) -> "DataFilesPatternsList":
+        return DataFilesPatternsList(
+            self, [allowed_extensions + extensions for allowed_extensions in self.allowed_extensions]
+        )
+class DataFilesPatternsDict(Dict[str, DataFilesPatternsList]):
+    """
+    Dict of split_name -> list of data files patterns (absolute local paths or URLs).
+    """
+    @classmethod
+    def from_patterns(
+        cls, patterns: Dict[str, List[str]], allowed_extensions: Optional[List[str]] = None
+    ) -> "DataFilesPatternsDict":
+        out = cls()
+        for key, patterns_for_key in patterns.items():
+            out[key] = (
+                patterns_for_key
+                if isinstance(patterns_for_key, DataFilesPatternsList)
+                else DataFilesPatternsList.from_patterns(
+                    patterns_for_key,
+                    allowed_extensions=allowed_extensions,
+                )
+            )
+        return out
+    def resolve(
+        self,
+        base_path: str,
+        download_config: Optional[DownloadConfig] = None,
+    ) -> "DataFilesDict":
+        out = DataFilesDict()
+        for key, data_files_patterns_list in self.items():
+            out[key] = data_files_patterns_list.resolve(base_path, download_config)
+        return out
+    def filter_extensions(self, extensions: List[str]) -> "DataFilesPatternsDict":
+        out = type(self)()
+        for key, data_files_patterns_list in self.items():
+            out[key] = data_files_patterns_list.filter_extensions(extensions)
+        return out

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/fingerprint.py ADDED Viewed

	@@ -0,0 +1,494 @@

+import inspect
+import os
+import random
+import shutil
+import tempfile
+import weakref
+from functools import wraps
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional, Tuple, Union
+import numpy as np
+import xxhash
+from . import config
+from .naming import INVALID_WINDOWS_CHARACTERS_IN_PATH
+from .utils._dill import dumps
+from .utils.deprecation_utils import deprecated
+from .utils.logging import get_logger
+if TYPE_CHECKING:
+    from .arrow_dataset import Dataset
+logger = get_logger(__name__)
+# Fingerprinting allows to have one deterministic fingerprint per dataset state.
+# A dataset fingerprint is updated after each transform.
+# Re-running the same transforms on a dataset in a different session results in the same fingerprint.
+# This is possible thanks to a custom hashing function that works with most python objects.
+# Fingerprinting is the main mechanism that enables caching.
+# The caching mechanism allows to reload an existing cache file if it's already been computed.
+#################
+# Caching
+#################
+_CACHING_ENABLED = True
+_TEMP_DIR_FOR_TEMP_CACHE_FILES: Optional["_TempCacheDir"] = None
+_DATASETS_WITH_TABLE_IN_TEMP_DIR: Optional[weakref.WeakSet] = None
+class _TempCacheDir:
+    """
+    A temporary directory for storing cached Arrow files with a cleanup that frees references to the Arrow files
+    before deleting the directory itself to avoid permission errors on Windows.
+    """
+    def __init__(self):
+        self.name = tempfile.mkdtemp(prefix=config.TEMP_CACHE_DIR_PREFIX)
+        self._finalizer = weakref.finalize(self, self._cleanup)
+    def _cleanup(self):
+        for dset in get_datasets_with_cache_file_in_temp_dir():
+            dset.__del__()
+        if os.path.exists(self.name):
+            try:
+                shutil.rmtree(self.name)
+            except Exception as e:
+                raise OSError(
+                    f"An error occured while trying to delete temporary cache directory {self.name}. Please delete it manually."
+                ) from e
+    def cleanup(self):
+        if self._finalizer.detach():
+            self._cleanup()
+def maybe_register_dataset_for_temp_dir_deletion(dataset):
+    """
+    This function registers the datasets that have cache files in _TEMP_DIR_FOR_TEMP_CACHE_FILES in order
+    to properly delete them before deleting the temporary directory.
+    The temporary directory _TEMP_DIR_FOR_TEMP_CACHE_FILES is used when caching is disabled.
+    """
+    if _TEMP_DIR_FOR_TEMP_CACHE_FILES is None:
+        return
+    global _DATASETS_WITH_TABLE_IN_TEMP_DIR
+    if _DATASETS_WITH_TABLE_IN_TEMP_DIR is None:
+        _DATASETS_WITH_TABLE_IN_TEMP_DIR = weakref.WeakSet()
+    if any(
+        Path(_TEMP_DIR_FOR_TEMP_CACHE_FILES.name) in Path(cache_file["filename"]).parents
+        for cache_file in dataset.cache_files
+    ):
+        _DATASETS_WITH_TABLE_IN_TEMP_DIR.add(dataset)
+def get_datasets_with_cache_file_in_temp_dir():
+    return list(_DATASETS_WITH_TABLE_IN_TEMP_DIR) if _DATASETS_WITH_TABLE_IN_TEMP_DIR is not None else []
+def enable_caching():
+    """
+    When applying transforms on a dataset, the data are stored in cache files.
+    The caching mechanism allows to reload an existing cache file if it's already been computed.
+    Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated
+    after each transform.
+    If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets.
+    More precisely, if the caching is disabled:
+    - cache files are always recreated
+    - cache files are written to a temporary directory that is deleted when session closes
+    - cache files are named using a random hash instead of the dataset fingerprint
+    - use [`~datasets.Dataset.save_to_disk`] to save a transformed dataset or it will be deleted when session closes
+    - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use
+    the `download_mode` parameter in [`~datasets.load_dataset`].
+    """
+    global _CACHING_ENABLED
+    _CACHING_ENABLED = True
+def disable_caching():
+    """
+    When applying transforms on a dataset, the data are stored in cache files.
+    The caching mechanism allows to reload an existing cache file if it's already been computed.
+    Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated
+    after each transform.
+    If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets.
+    More precisely, if the caching is disabled:
+    - cache files are always recreated
+    - cache files are written to a temporary directory that is deleted when session closes
+    - cache files are named using a random hash instead of the dataset fingerprint
+    - use [`~datasets.Dataset.save_to_disk`] to save a transformed dataset or it will be deleted when session closes
+    - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use
+    the `download_mode` parameter in [`~datasets.load_dataset`].
+    """
+    global _CACHING_ENABLED
+    _CACHING_ENABLED = False
+@deprecated(
+    "Use datasets.enable_caching() or datasets.disable_caching() instead. This function will be removed in a future version of datasets."
+)
+def set_caching_enabled(boolean: bool):
+    """
+    When applying transforms on a dataset, the data are stored in cache files.
+    The caching mechanism allows to reload an existing cache file if it's already been computed.
+    Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated
+    after each transform.
+    If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets.
+    More precisely, if the caching is disabled:
+    - cache files are always recreated
+    - cache files are written to a temporary directory that is deleted when session closes
+    - cache files are named using a random hash instead of the dataset fingerprint
+    - use :func:`datasets.Dataset.save_to_disk` to save a transformed dataset or it will be deleted when session closes
+    - caching doesn't affect :func:`datasets.load_dataset`. If you want to regenerate a dataset from scratch you should use
+    the ``download_mode`` parameter in :func:`datasets.load_dataset`.
+    """
+    global _CACHING_ENABLED
+    _CACHING_ENABLED = bool(boolean)
+def is_caching_enabled() -> bool:
+    """
+    When applying transforms on a dataset, the data are stored in cache files.
+    The caching mechanism allows to reload an existing cache file if it's already been computed.
+    Reloading a dataset is possible since the cache files are named using the dataset fingerprint, which is updated
+    after each transform.
+    If disabled, the library will no longer reload cached datasets files when applying transforms to the datasets.
+    More precisely, if the caching is disabled:
+    - cache files are always recreated
+    - cache files are written to a temporary directory that is deleted when session closes
+    - cache files are named using a random hash instead of the dataset fingerprint
+    - use [`~datasets.Dataset.save_to_disk`]] to save a transformed dataset or it will be deleted when session closes
+    - caching doesn't affect [`~datasets.load_dataset`]. If you want to regenerate a dataset from scratch you should use
+    the `download_mode` parameter in [`~datasets.load_dataset`].
+    """
+    global _CACHING_ENABLED
+    return bool(_CACHING_ENABLED)
+def get_temporary_cache_files_directory() -> str:
+    """Return a directory that is deleted when session closes."""
+    global _TEMP_DIR_FOR_TEMP_CACHE_FILES
+    if _TEMP_DIR_FOR_TEMP_CACHE_FILES is None:
+        _TEMP_DIR_FOR_TEMP_CACHE_FILES = _TempCacheDir()
+    return _TEMP_DIR_FOR_TEMP_CACHE_FILES.name
+#################
+# Hashing
+#################
+@deprecated("Use `copyreg.pickle` to register a custom reducer.")
+def hashregister(*types):
+    def proxy(func):
+        for t in types:
+            Hasher.dispatch[t] = func
+        return func
+    return proxy
+class Hasher:
+    """Hasher that accepts python objects as inputs."""
+    dispatch: Dict = {}
+    def __init__(self):
+        self.m = xxhash.xxh64()
+    @classmethod
+    def hash_bytes(cls, value: Union[bytes, List[bytes]]) -> str:
+        value = [value] if isinstance(value, bytes) else value
+        m = xxhash.xxh64()
+        for x in value:
+            m.update(x)
+        return m.hexdigest()
+    @classmethod
+    @deprecated("Use `Hasher.hash` instead.")
+    def hash_default(cls, value: Any) -> str:
+        return cls.hash(value)
+    @classmethod
+    def hash(cls, value: Any) -> str:
+        return cls.hash_bytes(dumps(value))
+    def update(self, value: Any) -> None:
+        header_for_update = f"=={type(value)}=="
+        value_for_update = self.hash(value)
+        self.m.update(header_for_update.encode("utf8"))
+        self.m.update(value_for_update.encode("utf-8"))
+    def hexdigest(self) -> str:
+        return self.m.hexdigest()
+#################
+# Fingerprinting
+#################
+fingerprint_rng = random.Random()
+# we show a warning only once when fingerprinting fails to avoid spam
+fingerprint_warnings: Dict[str, bool] = {}
+def generate_fingerprint(dataset: "Dataset") -> str:
+    state = dataset.__dict__
+    hasher = Hasher()
+    for key in sorted(state):
+        if key == "_fingerprint":
+            continue
+        hasher.update(key)
+        hasher.update(state[key])
+    # hash data files last modification timestamps as well
+    for cache_file in dataset.cache_files:
+        hasher.update(os.path.getmtime(cache_file["filename"]))
+    return hasher.hexdigest()
+def generate_random_fingerprint(nbits: int = 64) -> str:
+    return f"{fingerprint_rng.getrandbits(nbits):0{nbits//4}x}"
+def update_fingerprint(fingerprint, transform, transform_args):
+    global fingerprint_warnings
+    hasher = Hasher()
+    hasher.update(fingerprint)
+    try:
+        hasher.update(transform)
+    except:  # noqa various errors might raise here from pickle or dill
+        if _CACHING_ENABLED:
+            if not fingerprint_warnings.get("update_fingerprint_transform_hash_failed", False):
+                logger.warning(
+                    f"Transform {transform} couldn't be hashed properly, a random hash was used instead. "
+                    "Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. "
+                    "If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. "
+                    "This warning is only showed once. Subsequent hashing failures won't be showed."
+                )
+                fingerprint_warnings["update_fingerprint_transform_hash_failed"] = True
+            else:
+                logger.info(f"Transform {transform} couldn't be hashed properly, a random hash was used instead.")
+        else:
+            logger.info(
+                f"Transform {transform} couldn't be hashed properly, a random hash was used instead. This doesn't affect caching since it's disabled."
+            )
+        return generate_random_fingerprint()
+    for key in sorted(transform_args):
+        hasher.update(key)
+        try:
+            hasher.update(transform_args[key])
+        except:  # noqa various errors might raise here from pickle or dill
+            if _CACHING_ENABLED:
+                if not fingerprint_warnings.get("update_fingerprint_transform_hash_failed", False):
+                    logger.warning(
+                        f"Parameter '{key}'={transform_args[key]} of the transform {transform} couldn't be hashed properly, a random hash was used instead. "
+                        "Make sure your transforms and parameters are serializable with pickle or dill for the dataset fingerprinting and caching to work. "
+                        "If you reuse this transform, the caching mechanism will consider it to be different from the previous calls and recompute everything. "
+                        "This warning is only showed once. Subsequent hashing failures won't be showed."
+                    )
+                    fingerprint_warnings["update_fingerprint_transform_hash_failed"] = True
+                else:
+                    logger.info(
+                        f"Parameter '{key}'={transform_args[key]} of the transform {transform} couldn't be hashed properly, a random hash was used instead."
+                    )
+            else:
+                logger.info(
+                    f"Parameter '{key}'={transform_args[key]} of the transform {transform} couldn't be hashed properly, a random hash was used instead. This doesn't affect caching since it's disabled."
+                )
+            return generate_random_fingerprint()
+    return hasher.hexdigest()
+def validate_fingerprint(fingerprint: str, max_length=64):
+    """
+    Make sure the fingerprint is a non-empty string that is not longer that max_length=64 by default,
+    so that the fingerprint can be used to name cache files without issues.
+    """
+    if not isinstance(fingerprint, str) or not fingerprint:
+        raise ValueError(f"Invalid fingerprint '{fingerprint}': it should be a non-empty string.")
+    for invalid_char in INVALID_WINDOWS_CHARACTERS_IN_PATH:
+        if invalid_char in fingerprint:
+            raise ValueError(
+                f"Invalid fingerprint. Bad characters from black list '{INVALID_WINDOWS_CHARACTERS_IN_PATH}' found in '{fingerprint}'. "
+                f"They could create issues when creating cache files."
+            )
+    if len(fingerprint) > max_length:
+        raise ValueError(
+            f"Invalid fingerprint. Maximum lenth is {max_length} but '{fingerprint}' has length {len(fingerprint)}."
+            "It could create issues when creating cache files."
+        )
+def format_transform_for_fingerprint(func: Callable, version: Optional[str] = None) -> str:
+    """
+    Format a transform to the format that will be used to update the fingerprint.
+    """
+    transform = f"{func.__module__}.{func.__qualname__}"
+    if version is not None:
+        transform += f"@{version}"
+    return transform
+def format_kwargs_for_fingerprint(
+    func: Callable,
+    args: Tuple,
+    kwargs: Dict[str, Any],
+    use_kwargs: Optional[List[str]] = None,
+    ignore_kwargs: Optional[List[str]] = None,
+    randomized_function: bool = False,
+) -> Dict[str, Any]:
+    """
+    Format the kwargs of a transform to the format that will be used to update the fingerprint.
+    """
+    kwargs_for_fingerprint = kwargs.copy()
+    if args:
+        params = [p.name for p in inspect.signature(func).parameters.values() if p != p.VAR_KEYWORD]
+        args = args[1:]  # assume the first argument is the dataset
+        params = params[1:]
+        kwargs_for_fingerprint.update(zip(params, args))
+    else:
+        del kwargs_for_fingerprint[
+            next(iter(inspect.signature(func).parameters))
+        ]  # assume the first key is the dataset
+    # keep the right kwargs to be hashed to generate the fingerprint
+    if use_kwargs:
+        kwargs_for_fingerprint = {k: v for k, v in kwargs_for_fingerprint.items() if k in use_kwargs}
+    if ignore_kwargs:
+        kwargs_for_fingerprint = {k: v for k, v in kwargs_for_fingerprint.items() if k not in ignore_kwargs}
+    if randomized_function:  # randomized functions have `seed` and `generator` parameters
+        if kwargs_for_fingerprint.get("seed") is None and kwargs_for_fingerprint.get("generator") is None:
+            _, seed, pos, *_ = np.random.get_state()
+            seed = seed[pos] if pos < 624 else seed[0]
+            kwargs_for_fingerprint["generator"] = np.random.default_rng(seed)
+    # remove kwargs that are the default values
+    default_values = {
+        p.name: p.default for p in inspect.signature(func).parameters.values() if p.default != inspect._empty
+    }
+    for default_varname, default_value in default_values.items():
+        if default_varname in kwargs_for_fingerprint and kwargs_for_fingerprint[default_varname] == default_value:
+            kwargs_for_fingerprint.pop(default_varname)
+    return kwargs_for_fingerprint
+def fingerprint_transform(
+    inplace: bool,
+    use_kwargs: Optional[List[str]] = None,
+    ignore_kwargs: Optional[List[str]] = None,
+    fingerprint_names: Optional[List[str]] = None,
+    randomized_function: bool = False,
+    version: Optional[str] = None,
+):
+    """
+    Wrapper for dataset transforms to update the dataset fingerprint using ``update_fingerprint``
+    Args:
+        inplace (:obj:`bool`):  If inplace is True, the fingerprint of the dataset is updated inplace.
+            Otherwise, a parameter "new_fingerprint" is passed to the wrapped method that should take care of
+            setting the fingerprint of the returned Dataset.
+        use_kwargs (:obj:`List[str]`, optional): optional white list of argument names to take into account
+            to update the fingerprint to the wrapped method that should take care of
+            setting the fingerprint of the returned Dataset. By default all the arguments are used.
+        ignore_kwargs (:obj:`List[str]`, optional): optional black list of argument names to take into account
+            to update the fingerprint. Note that ignore_kwargs prevails on use_kwargs.
+        fingerprint_names (:obj:`List[str]`, optional, defaults to ["new_fingerprint"]):
+            If the dataset transforms is not inplace and returns a DatasetDict, then it can require
+            several fingerprints (one per dataset in the DatasetDict). By specifying fingerprint_names,
+            one fingerprint named after each element of fingerprint_names is going to be passed.
+        randomized_function (:obj:`bool`, defaults to False): If the dataset transform is random and has
+            optional parameters "seed" and "generator", then you can set randomized_function to True.
+            This way, even if users set "seed" and "generator" to None, then the fingerprint is
+            going to be randomly generated depending on numpy's current state. In this case, the
+            generator is set to np.random.default_rng(np.random.get_state()[1][0]).
+        version (:obj:`str`, optional): version of the transform. The version is taken into account when
+            computing the fingerprint. If a datase transform changes (or at least if the output data
+            that are cached changes), then one should increase the version. If the version stays the
+            same, then old cached data could be reused that are not compatible with the new transform.
+            It should be in the format "MAJOR.MINOR.PATCH".
+    """
+    if use_kwargs is not None and not isinstance(use_kwargs, list):
+        raise ValueError(f"use_kwargs is supposed to be a list, not {type(use_kwargs)}")
+    if ignore_kwargs is not None and not isinstance(ignore_kwargs, list):
+        raise ValueError(f"ignore_kwargs is supposed to be a list, not {type(use_kwargs)}")
+    if inplace and fingerprint_names:
+        raise ValueError("fingerprint_names are only used when inplace is False")
+    fingerprint_names = fingerprint_names if fingerprint_names is not None else ["new_fingerprint"]
+    def _fingerprint(func):
+        if not inplace and not all(name in func.__code__.co_varnames for name in fingerprint_names):
+            raise ValueError(f"function {func} is missing parameters {fingerprint_names} in signature")
+        if randomized_function:  # randomized function have seed and generator parameters
+            if "seed" not in func.__code__.co_varnames:
+                raise ValueError(f"'seed' must be in {func}'s signature")
+            if "generator" not in func.__code__.co_varnames:
+                raise ValueError(f"'generator' must be in {func}'s signature")
+        # this call has to be outside the wrapper or since __qualname__ changes in multiprocessing
+        transform = format_transform_for_fingerprint(func, version=version)
+        @wraps(func)
+        def wrapper(*args, **kwargs):
+            kwargs_for_fingerprint = format_kwargs_for_fingerprint(
+                func,
+                args,
+                kwargs,
+                use_kwargs=use_kwargs,
+                ignore_kwargs=ignore_kwargs,
+                randomized_function=randomized_function,
+            )
+            if args:
+                dataset: Dataset = args[0]
+                args = args[1:]
+            else:
+                dataset: Dataset = kwargs.pop(next(iter(inspect.signature(func).parameters)))
+            # compute new_fingerprint and add it to the args of not in-place transforms
+            if inplace:
+                new_fingerprint = update_fingerprint(dataset._fingerprint, transform, kwargs_for_fingerprint)
+            else:
+                for fingerprint_name in fingerprint_names:  # transforms like `train_test_split` have several hashes
+                    if kwargs.get(fingerprint_name) is None:
+                        kwargs_for_fingerprint["fingerprint_name"] = fingerprint_name
+                        kwargs[fingerprint_name] = update_fingerprint(
+                            dataset._fingerprint, transform, kwargs_for_fingerprint
+                        )
+                    else:
+                        validate_fingerprint(kwargs[fingerprint_name])
+            # Call actual function
+            out = func(dataset, *args, **kwargs)
+            # Update fingerprint of in-place transforms + update in-place history of transforms
+            if inplace:  # update after calling func so that the fingerprint doesn't change if the function fails
+                dataset._fingerprint = new_fingerprint
+            return out
+        wrapper._decorator_name_ = "fingerprint"
+        return wrapper
+    return _fingerprint

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/hub.py ADDED Viewed

	@@ -0,0 +1,230 @@

+import time
+from itertools import chain
+from typing import Optional, Union
+from huggingface_hub import (
+    CommitInfo,
+    CommitOperationAdd,
+    CommitOperationDelete,
+    DatasetCard,
+    DatasetCardData,
+    HfApi,
+    HfFileSystem,
+)
+from huggingface_hub.utils import HfHubHTTPError
+import datasets.config
+from datasets.info import DatasetInfosDict
+from datasets.inspect import get_dataset_config_names, get_dataset_default_config_name
+from datasets.load import load_dataset, load_dataset_builder
+from datasets.utils.metadata import MetadataConfigs
+def convert_to_parquet(
+    repo_id: str,
+    revision: Optional[str] = None,
+    token: Optional[Union[bool, str]] = None,
+    trust_remote_code: Optional[bool] = None,
+) -> CommitInfo:
+    """Convert Hub [script-based dataset](dataset_script) to Parquet [data-only dataset](repository_structure), so that
+    the dataset viewer will be supported.
+    This function:
+    - makes a copy of the script on the "main" branch into a dedicated branch called "script" (if it does not already exist)
+    - creates a pull request to the Hub dataset to convert it to Parquet files (and deletes the script from the main branch)
+    If in the future you need to recreate the Parquet files from the "script" branch, pass the `revision="script"` argument.
+    Note that you should pass the `trust_remote_code=True` argument only if you trust the remote code to be executed locally on your machine.
+    Args:
+        repo_id (`str`): ID of the source Hub dataset repository, in the following format: `<user>/<dataset_name>` or
+            `<org>/<dataset_name>`.
+        revision (`str`, *optional*): Branch of the source Hub dataset repository. Defaults to the `"main"` branch.
+        token (`bool` or `str`, *optional*): Authentication token for the Hugging Face Hub.
+        trust_remote_code (`bool`, defaults to `True`): Whether you trust the remote code of the Hub script-based
+            dataset to be executed locally on your machine. This option should only be set to `True` for repositories
+            where you have read the code and which you trust.
+            <Tip warning={true}>
+            `trust_remote_code` will default to False in the next major release.
+            </Tip>
+    Returns:
+        `huggingface_hub.CommitInfo`
+    """
+    print(f"{repo_id}")
+    configs = get_dataset_config_names(repo_id, token=token, revision=revision, trust_remote_code=trust_remote_code)
+    print(f"{configs = }")
+    default_config = get_dataset_default_config_name(
+        repo_id, token=token, revision=revision, trust_remote_code=trust_remote_code
+    )
+    print(f"{default_config = }")
+    if default_config:
+        config = default_config
+        configs.remove(default_config)
+    else:
+        config = configs.pop(0)
+    print(f"{config = }")
+    dataset = load_dataset(repo_id, config, revision=revision, trust_remote_code=trust_remote_code)
+    commit_info = dataset.push_to_hub(
+        repo_id,
+        config_name=config,
+        commit_message="Convert dataset to Parquet",
+        commit_description="Convert dataset to Parquet.",
+        create_pr=True,
+        token=token,
+        set_default=default_config is not None,
+    )
+    time.sleep(5)
+    pr_revision, pr_url = commit_info.pr_revision, commit_info.pr_url
+    for config in configs:
+        print(f"{config = }")
+        dataset = load_dataset(repo_id, config, revision=revision, trust_remote_code=trust_remote_code)
+        dataset.push_to_hub(
+            repo_id,
+            config_name=config,
+            commit_message=f"Add '{config}' config data files",
+            revision=pr_revision,
+            token=token,
+        )
+        time.sleep(5)
+    _delete_files(repo_id, revision=pr_revision, token=token)
+    if not revision:
+        api = HfApi(endpoint=datasets.config.HF_ENDPOINT, token=token)
+        try:
+            api.create_branch(repo_id, branch="script", repo_type="dataset", token=token, exist_ok=True)
+        except HfHubHTTPError:
+            pass
+    print(f"You can find your PR to convert the dataset to Parquet at: {pr_url}")
+    return commit_info
+def delete_from_hub(
+    repo_id: str,
+    config_name: str,
+    revision: Optional[str] = None,
+    token: Optional[Union[bool, str]] = None,
+) -> CommitInfo:
+    """Delete a dataset configuration from a [data-only dataset](repository_structure) on the Hub.
+    Args:
+        repo_id (`str`): ID of the Hub dataset repository, in the following format: `<user>/<dataset_name>` or
+            `<org>/<dataset_name>`.
+        config_name (`str`): Name of the dataset configuration.
+        revision (`str`, *optional*): Branch to delete the configuration from. Defaults to the `"main"` branch.
+        token (`bool` or `str`, *optional*): Authentication token for the Hugging Face Hub.
+    Returns:
+        `huggingface_hub.CommitInfo`
+    """
+    operations = []
+    # data_files
+    fs = HfFileSystem(endpoint=datasets.config.HF_ENDPOINT, token=token)
+    builder = load_dataset_builder(repo_id, config_name, revision=revision, token=token, trust_remote_code=False)
+    for data_file in chain(*builder.config.data_files.values()):
+        data_file_resolved_path = fs.resolve_path(data_file)
+        if data_file_resolved_path.repo_id == repo_id:
+            operations.append(CommitOperationDelete(path_in_repo=data_file_resolved_path.path_in_repo))
+    # README.md
+    dataset_card = DatasetCard.load(repo_id)
+    # config_names
+    if dataset_card.data.get("config_names", None) and config_name in dataset_card.data["config_names"]:
+        dataset_card.data["config_names"].remove(config_name)
+    # metadata_configs
+    metadata_configs = MetadataConfigs.from_dataset_card_data(dataset_card.data)
+    if metadata_configs:
+        _ = metadata_configs.pop(config_name, None)
+        dataset_card_data = DatasetCardData()
+        metadata_configs.to_dataset_card_data(dataset_card_data)
+        if datasets.config.METADATA_CONFIGS_FIELD in dataset_card_data:
+            dataset_card.data[datasets.config.METADATA_CONFIGS_FIELD] = dataset_card_data[
+                datasets.config.METADATA_CONFIGS_FIELD
+            ]
+        else:
+            _ = dataset_card.data.pop(datasets.config.METADATA_CONFIGS_FIELD, None)
+    # dataset_info
+    dataset_infos: DatasetInfosDict = DatasetInfosDict.from_dataset_card_data(dataset_card.data)
+    if dataset_infos:
+        _ = dataset_infos.pop(config_name, None)
+        dataset_card_data = DatasetCardData()
+        dataset_infos.to_dataset_card_data(dataset_card_data)
+        if "dataset_info" in dataset_card_data:
+            dataset_card.data["dataset_info"] = dataset_card_data["dataset_info"]
+        else:
+            _ = dataset_card.data.pop("dataset_info", None)
+    # Commit
+    operations.append(
+        CommitOperationAdd(path_in_repo=datasets.config.REPOCARD_FILENAME, path_or_fileobj=str(dataset_card).encode())
+    )
+    api = HfApi(endpoint=datasets.config.HF_ENDPOINT, token=token)
+    commit_info = api.create_commit(
+        repo_id,
+        operations=operations,
+        commit_message=f"Delete '{config_name}' config",
+        commit_description=f"Delete '{config_name}' config.",
+        token=token,
+        repo_type="dataset",
+        revision=revision,
+        create_pr=True,
+    )
+    print(f"You can find your PR to delete the dataset config at: {commit_info.pr_url}")
+    return commit_info
+def _delete_files(dataset_id, revision=None, token=None):
+    dataset_name = dataset_id.split("/")[-1]
+    hf_api = HfApi(endpoint=datasets.config.HF_ENDPOINT, token=token)
+    repo_files = hf_api.list_repo_files(
+        dataset_id,
+        repo_type="dataset",
+    )
+    if repo_files:
+        legacy_json_file = []
+        python_files = []
+        data_files = []
+        for filename in repo_files:
+            if filename in {".gitattributes", "README.md"}:
+                continue
+            elif filename == f"{dataset_name}.py":
+                hf_api.delete_file(
+                    filename,
+                    dataset_id,
+                    repo_type="dataset",
+                    revision=revision,
+                    commit_message="Delete loading script",
+                )
+            elif filename == "dataset_infos.json":
+                legacy_json_file.append(filename)
+            elif filename.endswith(".py"):
+                python_files.append(filename)
+            else:
+                data_files.append(filename)
+        if legacy_json_file:
+            hf_api.delete_file(
+                "dataset_infos.json",
+                dataset_id,
+                repo_type="dataset",
+                revision=revision,
+                commit_message="Delete legacy dataset_infos.json",
+            )
+        if python_files:
+            for filename in python_files:
+                hf_api.delete_file(
+                    filename,
+                    dataset_id,
+                    repo_type="dataset",
+                    revision=revision,
+                    commit_message="Delete loading script auxiliary file",
+                )
+        if data_files:
+            for filename in data_files:
+                hf_api.delete_file(
+                    filename,
+                    dataset_id,
+                    repo_type="dataset",
+                    revision=revision,
+                    commit_message="Delete data file",
+                )

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/info.py ADDED Viewed

	@@ -0,0 +1,593 @@

+# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""DatasetInfo and MetricInfo record information we know about a dataset and a metric.
+This includes things that we know about the dataset statically, i.e.:
+ - description
+ - canonical location
+ - does it have validation and tests splits
+ - size
+ - etc.
+This also includes the things that can and should be computed once we've
+processed the dataset as well:
+ - number of examples (in each split)
+ - etc.
+"""
+import copy
+import dataclasses
+import json
+import os
+import posixpath
+import warnings
+from dataclasses import dataclass
+from pathlib import Path
+from typing import ClassVar, Dict, List, Optional, Union
+import fsspec
+from fsspec.core import url_to_fs
+from huggingface_hub import DatasetCard, DatasetCardData
+from . import config
+from .features import Features, Value
+from .splits import SplitDict
+from .tasks import TaskTemplate, task_template_from_dict
+from .utils import Version
+from .utils.logging import get_logger
+from .utils.py_utils import asdict, unique_values
+logger = get_logger(__name__)
+@dataclass
+class SupervisedKeysData:
+    input: str = ""
+    output: str = ""
+@dataclass
+class DownloadChecksumsEntryData:
+    key: str = ""
+    value: str = ""
+class MissingCachedSizesConfigError(Exception):
+    """The expected cached sizes of the download file are missing."""
+class NonMatchingCachedSizesError(Exception):
+    """The prepared split doesn't have expected sizes."""
+@dataclass
+class PostProcessedInfo:
+    features: Optional[Features] = None
+    resources_checksums: Optional[dict] = None
+    def __post_init__(self):
+        # Convert back to the correct classes when we reload from dict
+        if self.features is not None and not isinstance(self.features, Features):
+            self.features = Features.from_dict(self.features)
+    @classmethod
+    def from_dict(cls, post_processed_info_dict: dict) -> "PostProcessedInfo":
+        field_names = {f.name for f in dataclasses.fields(cls)}
+        return cls(**{k: v for k, v in post_processed_info_dict.items() if k in field_names})
+@dataclass
+class DatasetInfo:
+    """Information about a dataset.
+    `DatasetInfo` documents datasets, including its name, version, and features.
+    See the constructor arguments and properties for a full list.
+    Not all fields are known on construction and may be updated later.
+    Attributes:
+        description (`str`):
+            A description of the dataset.
+        citation (`str`):
+            A BibTeX citation of the dataset.
+        homepage (`str`):
+            A URL to the official homepage for the dataset.
+        license (`str`):
+            The dataset's license. It can be the name of the license or a paragraph containing the terms of the license.
+        features ([`Features`], *optional*):
+            The features used to specify the dataset's column types.
+        post_processed (`PostProcessedInfo`, *optional*):
+            Information regarding the resources of a possible post-processing of a dataset. For example, it can contain the information of an index.
+        supervised_keys (`SupervisedKeysData`, *optional*):
+            Specifies the input feature and the label for supervised learning if applicable for the dataset (legacy from TFDS).
+        builder_name (`str`, *optional*):
+            The name of the `GeneratorBasedBuilder` subclass used to create the dataset. Usually matched to the corresponding script name. It is also the snake_case version of the dataset builder class name.
+        config_name (`str`, *optional*):
+            The name of the configuration derived from [`BuilderConfig`].
+        version (`str` or [`Version`], *optional*):
+            The version of the dataset.
+        splits (`dict`, *optional*):
+            The mapping between split name and metadata.
+        download_checksums (`dict`, *optional*):
+            The mapping between the URL to download the dataset's checksums and corresponding metadata.
+        download_size (`int`, *optional*):
+            The size of the files to download to generate the dataset, in bytes.
+        post_processing_size (`int`, *optional*):
+            Size of the dataset in bytes after post-processing, if any.
+        dataset_size (`int`, *optional*):
+            The combined size in bytes of the Arrow tables for all splits.
+        size_in_bytes (`int`, *optional*):
+            The combined size in bytes of all files associated with the dataset (downloaded files + Arrow files).
+        task_templates (`List[TaskTemplate]`, *optional*):
+            The task templates to prepare the dataset for during training and evaluation. Each template casts the dataset's [`Features`] to standardized column names and types as detailed in `datasets.tasks`.
+        **config_kwargs (additional keyword arguments):
+            Keyword arguments to be passed to the [`BuilderConfig`] and used in the [`DatasetBuilder`].
+    """
+    # Set in the dataset scripts
+    description: str = dataclasses.field(default_factory=str)
+    citation: str = dataclasses.field(default_factory=str)
+    homepage: str = dataclasses.field(default_factory=str)
+    license: str = dataclasses.field(default_factory=str)
+    features: Optional[Features] = None
+    post_processed: Optional[PostProcessedInfo] = None
+    supervised_keys: Optional[SupervisedKeysData] = None
+    task_templates: Optional[List[TaskTemplate]] = None
+    # Set later by the builder
+    builder_name: Optional[str] = None
+    dataset_name: Optional[str] = None  # for packaged builders, to be different from builder_name
+    config_name: Optional[str] = None
+    version: Optional[Union[str, Version]] = None
+    # Set later by `download_and_prepare`
+    splits: Optional[dict] = None
+    download_checksums: Optional[dict] = None
+    download_size: Optional[int] = None
+    post_processing_size: Optional[int] = None
+    dataset_size: Optional[int] = None
+    size_in_bytes: Optional[int] = None
+    _INCLUDED_INFO_IN_YAML: ClassVar[List[str]] = [
+        "config_name",
+        "download_size",
+        "dataset_size",
+        "features",
+        "splits",
+    ]
+    def __post_init__(self):
+        # Convert back to the correct classes when we reload from dict
+        if self.features is not None and not isinstance(self.features, Features):
+            self.features = Features.from_dict(self.features)
+        if self.post_processed is not None and not isinstance(self.post_processed, PostProcessedInfo):
+            self.post_processed = PostProcessedInfo.from_dict(self.post_processed)
+        if self.version is not None and not isinstance(self.version, Version):
+            if isinstance(self.version, str):
+                self.version = Version(self.version)
+            else:
+                self.version = Version.from_dict(self.version)
+        if self.splits is not None and not isinstance(self.splits, SplitDict):
+            self.splits = SplitDict.from_split_dict(self.splits)
+        if self.supervised_keys is not None and not isinstance(self.supervised_keys, SupervisedKeysData):
+            if isinstance(self.supervised_keys, (tuple, list)):
+                self.supervised_keys = SupervisedKeysData(*self.supervised_keys)
+            else:
+                self.supervised_keys = SupervisedKeysData(**self.supervised_keys)
+        # Parse and make a list of templates
+        if self.task_templates is not None:
+            if isinstance(self.task_templates, (list, tuple)):
+                templates = [
+                    template if isinstance(template, TaskTemplate) else task_template_from_dict(template)
+                    for template in self.task_templates
+                ]
+                self.task_templates = [template for template in templates if template is not None]
+            elif isinstance(self.task_templates, TaskTemplate):
+                self.task_templates = [self.task_templates]
+            else:
+                template = task_template_from_dict(self.task_templates)
+                self.task_templates = [template] if template is not None else []
+        # Align task templates with features
+        if self.task_templates is not None:
+            self.task_templates = list(self.task_templates)
+            if self.features is not None:
+                self.task_templates = [
+                    template.align_with_features(self.features) for template in (self.task_templates)
+                ]
+    def write_to_directory(
+        self, dataset_info_dir, pretty_print=False, fs="deprecated", storage_options: Optional[dict] = None
+    ):
+        """Write `DatasetInfo` and license (if present) as JSON files to `dataset_info_dir`.
+        Args:
+            dataset_info_dir (`str`):
+                Destination directory.
+            pretty_print (`bool`, defaults to `False`):
+                If `True`, the JSON will be pretty-printed with the indent level of 4.
+            fs (`fsspec.spec.AbstractFileSystem`, *optional*):
+                Instance of the remote filesystem used to download the files from.
+                <Deprecated version="2.9.0">
+                `fs` was deprecated in version 2.9.0 and will be removed in 3.0.0.
+                Please use `storage_options` instead, e.g. `storage_options=fs.storage_options`.
+                </Deprecated>
+            storage_options (`dict`, *optional*):
+                Key/value pairs to be passed on to the file-system backend, if any.
+                <Added version="2.9.0"/>
+        Example:
+        ```py
+        >>> from datasets import load_dataset
+        >>> ds = load_dataset("rotten_tomatoes", split="validation")
+        >>> ds.info.write_to_directory("/path/to/directory/")
+        ```
+        """
+        if fs != "deprecated":
+            warnings.warn(
+                "'fs' was deprecated in favor of 'storage_options' in version 2.9.0 and will be removed in 3.0.0.\n"
+                "You can remove this warning by passing 'storage_options=fs.storage_options' instead.",
+                FutureWarning,
+            )
+            storage_options = fs.storage_options
+        fs: fsspec.AbstractFileSystem
+        fs, *_ = url_to_fs(dataset_info_dir, **(storage_options or {}))
+        with fs.open(posixpath.join(dataset_info_dir, config.DATASET_INFO_FILENAME), "wb") as f:
+            self._dump_info(f, pretty_print=pretty_print)
+        if self.license:
+            with fs.open(posixpath.join(dataset_info_dir, config.LICENSE_FILENAME), "wb") as f:
+                self._dump_license(f)
+    def _dump_info(self, file, pretty_print=False):
+        """Dump info in `file` file-like object open in bytes mode (to support remote files)"""
+        file.write(json.dumps(asdict(self), indent=4 if pretty_print else None).encode("utf-8"))
+    def _dump_license(self, file):
+        """Dump license in `file` file-like object open in bytes mode (to support remote files)"""
+        file.write(self.license.encode("utf-8"))
+    @classmethod
+    def from_merge(cls, dataset_infos: List["DatasetInfo"]):
+        dataset_infos = [dset_info.copy() for dset_info in dataset_infos if dset_info is not None]
+        if len(dataset_infos) > 0 and all(dataset_infos[0] == dset_info for dset_info in dataset_infos):
+            # if all dataset_infos are equal we don't need to merge. Just return the first.
+            return dataset_infos[0]
+        description = "\n\n".join(unique_values(info.description for info in dataset_infos)).strip()
+        citation = "\n\n".join(unique_values(info.citation for info in dataset_infos)).strip()
+        homepage = "\n\n".join(unique_values(info.homepage for info in dataset_infos)).strip()
+        license = "\n\n".join(unique_values(info.license for info in dataset_infos)).strip()
+        features = None
+        supervised_keys = None
+        task_templates = None
+        # Find common task templates across all dataset infos
+        all_task_templates = [info.task_templates for info in dataset_infos if info.task_templates is not None]
+        if len(all_task_templates) > 1:
+            task_templates = list(set(all_task_templates[0]).intersection(*all_task_templates[1:]))
+        elif len(all_task_templates):
+            task_templates = list(set(all_task_templates[0]))
+        # If no common task templates found, replace empty list with None
+        task_templates = task_templates if task_templates else None
+        return cls(
+            description=description,
+            citation=citation,
+            homepage=homepage,
+            license=license,
+            features=features,
+            supervised_keys=supervised_keys,
+            task_templates=task_templates,
+        )
+    @classmethod
+    def from_directory(
+        cls, dataset_info_dir: str, fs="deprecated", storage_options: Optional[dict] = None
+    ) -> "DatasetInfo":
+        """Create [`DatasetInfo`] from the JSON file in `dataset_info_dir`.
+        This function updates all the dynamically generated fields (num_examples,
+        hash, time of creation,...) of the [`DatasetInfo`].
+        This will overwrite all previous metadata.
+        Args:
+            dataset_info_dir (`str`):
+                The directory containing the metadata file. This
+                should be the root directory of a specific dataset version.
+            fs (`fsspec.spec.AbstractFileSystem`, *optional*):
+                Instance of the remote filesystem used to download the files from.
+                <Deprecated version="2.9.0">
+                `fs` was deprecated in version 2.9.0 and will be removed in 3.0.0.
+                Please use `storage_options` instead, e.g. `storage_options=fs.storage_options`.
+                </Deprecated>
+            storage_options (`dict`, *optional*):
+                Key/value pairs to be passed on to the file-system backend, if any.
+                <Added version="2.9.0"/>
+        Example:
+        ```py
+        >>> from datasets import DatasetInfo
+        >>> ds_info = DatasetInfo.from_directory("/path/to/directory/")
+        ```
+        """
+        if fs != "deprecated":
+            warnings.warn(
+                "'fs' was deprecated in favor of 'storage_options' in version 2.9.0 and will be removed in 3.0.0.\n"
+                "You can remove this warning by passing 'storage_options=fs.storage_options' instead.",
+                FutureWarning,
+            )
+            storage_options = fs.storage_options
+        fs: fsspec.AbstractFileSystem
+        fs, *_ = url_to_fs(dataset_info_dir, **(storage_options or {}))
+        logger.info(f"Loading Dataset info from {dataset_info_dir}")
+        if not dataset_info_dir:
+            raise ValueError("Calling DatasetInfo.from_directory() with undefined dataset_info_dir.")
+        with fs.open(posixpath.join(dataset_info_dir, config.DATASET_INFO_FILENAME), "r", encoding="utf-8") as f:
+            dataset_info_dict = json.load(f)
+        return cls.from_dict(dataset_info_dict)
+    @classmethod
+    def from_dict(cls, dataset_info_dict: dict) -> "DatasetInfo":
+        field_names = {f.name for f in dataclasses.fields(cls)}
+        return cls(**{k: v for k, v in dataset_info_dict.items() if k in field_names})
+    def update(self, other_dataset_info: "DatasetInfo", ignore_none=True):
+        self_dict = self.__dict__
+        self_dict.update(
+            **{
+                k: copy.deepcopy(v)
+                for k, v in other_dataset_info.__dict__.items()
+                if (v is not None or not ignore_none)
+            }
+        )
+    def copy(self) -> "DatasetInfo":
+        return self.__class__(**{k: copy.deepcopy(v) for k, v in self.__dict__.items()})
+    def _to_yaml_dict(self) -> dict:
+        yaml_dict = {}
+        dataset_info_dict = asdict(self)
+        for key in dataset_info_dict:
+            if key in self._INCLUDED_INFO_IN_YAML:
+                value = getattr(self, key)
+                if hasattr(value, "_to_yaml_list"):  # Features, SplitDict
+                    yaml_dict[key] = value._to_yaml_list()
+                elif hasattr(value, "_to_yaml_string"):  # Version
+                    yaml_dict[key] = value._to_yaml_string()
+                else:
+                    yaml_dict[key] = value
+        return yaml_dict
+    @classmethod
+    def _from_yaml_dict(cls, yaml_data: dict) -> "DatasetInfo":
+        yaml_data = copy.deepcopy(yaml_data)
+        if yaml_data.get("features") is not None:
+            yaml_data["features"] = Features._from_yaml_list(yaml_data["features"])
+        if yaml_data.get("splits") is not None:
+            yaml_data["splits"] = SplitDict._from_yaml_list(yaml_data["splits"])
+        field_names = {f.name for f in dataclasses.fields(cls)}
+        return cls(**{k: v for k, v in yaml_data.items() if k in field_names})
+class DatasetInfosDict(Dict[str, DatasetInfo]):
+    def write_to_directory(self, dataset_infos_dir, overwrite=False, pretty_print=False) -> None:
+        total_dataset_infos = {}
+        dataset_infos_path = os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME)
+        dataset_readme_path = os.path.join(dataset_infos_dir, config.REPOCARD_FILENAME)
+        if not overwrite:
+            total_dataset_infos = self.from_directory(dataset_infos_dir)
+        total_dataset_infos.update(self)
+        if os.path.exists(dataset_infos_path):
+            # for backward compatibility, let's update the JSON file if it exists
+            with open(dataset_infos_path, "w", encoding="utf-8") as f:
+                dataset_infos_dict = {
+                    config_name: asdict(dset_info) for config_name, dset_info in total_dataset_infos.items()
+                }
+                json.dump(dataset_infos_dict, f, indent=4 if pretty_print else None)
+        # Dump the infos in the YAML part of the README.md file
+        if os.path.exists(dataset_readme_path):
+            dataset_card = DatasetCard.load(dataset_readme_path)
+            dataset_card_data = dataset_card.data
+        else:
+            dataset_card = None
+            dataset_card_data = DatasetCardData()
+        if total_dataset_infos:
+            total_dataset_infos.to_dataset_card_data(dataset_card_data)
+            dataset_card = (
+                DatasetCard("---\n" + str(dataset_card_data) + "\n---\n") if dataset_card is None else dataset_card
+            )
+            dataset_card.save(Path(dataset_readme_path))
+    @classmethod
+    def from_directory(cls, dataset_infos_dir) -> "DatasetInfosDict":
+        logger.info(f"Loading Dataset Infos from {dataset_infos_dir}")
+        # Load the info from the YAML part of README.md
+        if os.path.exists(os.path.join(dataset_infos_dir, config.REPOCARD_FILENAME)):
+            dataset_card_data = DatasetCard.load(Path(dataset_infos_dir) / config.REPOCARD_FILENAME).data
+            if "dataset_info" in dataset_card_data:
+                return cls.from_dataset_card_data(dataset_card_data)
+        if os.path.exists(os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME)):
+            # this is just to have backward compatibility with dataset_infos.json files
+            with open(os.path.join(dataset_infos_dir, config.DATASETDICT_INFOS_FILENAME), encoding="utf-8") as f:
+                return cls(
+                    {
+                        config_name: DatasetInfo.from_dict(dataset_info_dict)
+                        for config_name, dataset_info_dict in json.load(f).items()
+                    }
+                )
+        else:
+            return cls()
+    @classmethod
+    def from_dataset_card_data(cls, dataset_card_data: DatasetCardData) -> "DatasetInfosDict":
+        if isinstance(dataset_card_data.get("dataset_info"), (list, dict)):
+            if isinstance(dataset_card_data["dataset_info"], list):
+                return cls(
+                    {
+                        dataset_info_yaml_dict.get("config_name", "default"): DatasetInfo._from_yaml_dict(
+                            dataset_info_yaml_dict
+                        )
+                        for dataset_info_yaml_dict in dataset_card_data["dataset_info"]
+                    }
+                )
+            else:
+                dataset_info = DatasetInfo._from_yaml_dict(dataset_card_data["dataset_info"])
+                dataset_info.config_name = dataset_card_data["dataset_info"].get("config_name", "default")
+                return cls({dataset_info.config_name: dataset_info})
+        else:
+            return cls()
+    def to_dataset_card_data(self, dataset_card_data: DatasetCardData) -> None:
+        if self:
+            # first get existing metadata info
+            if "dataset_info" in dataset_card_data and isinstance(dataset_card_data["dataset_info"], dict):
+                dataset_metadata_infos = {
+                    dataset_card_data["dataset_info"].get("config_name", "default"): dataset_card_data["dataset_info"]
+                }
+            elif "dataset_info" in dataset_card_data and isinstance(dataset_card_data["dataset_info"], list):
+                dataset_metadata_infos = {
+                    config_metadata["config_name"]: config_metadata
+                    for config_metadata in dataset_card_data["dataset_info"]
+                }
+            else:
+                dataset_metadata_infos = {}
+            # update/rewrite existing metadata info with the one to dump
+            total_dataset_infos = {
+                **dataset_metadata_infos,
+                **{config_name: dset_info._to_yaml_dict() for config_name, dset_info in self.items()},
+            }
+            # the config_name from the dataset_infos_dict takes over the config_name of the DatasetInfo
+            for config_name, dset_info_yaml_dict in total_dataset_infos.items():
+                dset_info_yaml_dict["config_name"] = config_name
+            if len(total_dataset_infos) == 1:
+                # use a struct instead of a list of configurations, since there's only one
+                dataset_card_data["dataset_info"] = next(iter(total_dataset_infos.values()))
+                config_name = dataset_card_data["dataset_info"].pop("config_name", None)
+                if config_name != "default":
+                    # if config_name is not "default" preserve it and put at the first position
+                    dataset_card_data["dataset_info"] = {
+                        "config_name": config_name,
+                        **dataset_card_data["dataset_info"],
+                    }
+            else:
+                dataset_card_data["dataset_info"] = []
+                for config_name, dataset_info_yaml_dict in sorted(total_dataset_infos.items()):
+                    # add the config_name field in first position
+                    dataset_info_yaml_dict.pop("config_name", None)
+                    dataset_info_yaml_dict = {"config_name": config_name, **dataset_info_yaml_dict}
+                    dataset_card_data["dataset_info"].append(dataset_info_yaml_dict)
+@dataclass
+class MetricInfo:
+    """Information about a metric.
+    `MetricInfo` documents a metric, including its name, version, and features.
+    See the constructor arguments and properties for a full list.
+    Note: Not all fields are known on construction and may be updated later.
+    """
+    # Set in the dataset scripts
+    description: str
+    citation: str
+    features: Features
+    inputs_description: str = dataclasses.field(default_factory=str)
+    homepage: str = dataclasses.field(default_factory=str)
+    license: str = dataclasses.field(default_factory=str)
+    codebase_urls: List[str] = dataclasses.field(default_factory=list)
+    reference_urls: List[str] = dataclasses.field(default_factory=list)
+    streamable: bool = False
+    format: Optional[str] = None
+    # Set later by the builder
+    metric_name: Optional[str] = None
+    config_name: Optional[str] = None
+    experiment_id: Optional[str] = None
+    def __post_init__(self):
+        if self.format is not None:
+            for key, value in self.features.items():
+                if not isinstance(value, Value):
+                    raise ValueError(
+                        f"When using 'numpy' format, all features should be a `datasets.Value` feature. "
+                        f"Here {key} is an instance of {value.__class__.__name__}"
+                    )
+    def write_to_directory(self, metric_info_dir, pretty_print=False):
+        """Write `MetricInfo` as JSON to `metric_info_dir`.
+        Also save the license separately in LICENCE.
+        If `pretty_print` is True, the JSON will be pretty-printed with the indent level of 4.
+        Example:
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> metric.info.write_to_directory("/path/to/directory/")
+        ```
+        """
+        with open(os.path.join(metric_info_dir, config.METRIC_INFO_FILENAME), "w", encoding="utf-8") as f:
+            json.dump(asdict(self), f, indent=4 if pretty_print else None)
+        if self.license:
+            with open(os.path.join(metric_info_dir, config.LICENSE_FILENAME), "w", encoding="utf-8") as f:
+                f.write(self.license)
+    @classmethod
+    def from_directory(cls, metric_info_dir) -> "MetricInfo":
+        """Create MetricInfo from the JSON file in `metric_info_dir`.
+        Args:
+            metric_info_dir: `str` The directory containing the metadata file. This
+                should be the root directory of a specific dataset version.
+        Example:
+        ```py
+        >>> from datasets import MetricInfo
+        >>> metric_info = MetricInfo.from_directory("/path/to/directory/")
+        ```
+        """
+        logger.info(f"Loading Metric info from {metric_info_dir}")
+        if not metric_info_dir:
+            raise ValueError("Calling MetricInfo.from_directory() with undefined metric_info_dir.")
+        with open(os.path.join(metric_info_dir, config.METRIC_INFO_FILENAME), encoding="utf-8") as f:
+            metric_info_dict = json.load(f)
+        return cls.from_dict(metric_info_dict)
+    @classmethod
+    def from_dict(cls, metric_info_dict: dict) -> "MetricInfo":
+        field_names = {f.name for f in dataclasses.fields(cls)}
+        return cls(**{k: v for k, v in metric_info_dict.items() if k in field_names})

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/iterable_dataset.py ADDED Viewed

The diff for this file is too large to render. See raw diff

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/metric.py ADDED Viewed

	@@ -0,0 +1,652 @@

+# Copyright 2020 The HuggingFace Datasets Authors
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Metrics base class."""
+import os
+import types
+import uuid
+from typing import Any, Dict, List, Optional, Tuple, Union
+import numpy as np
+import pyarrow as pa
+from filelock import BaseFileLock, Timeout
+from . import config
+from .arrow_dataset import Dataset
+from .arrow_reader import ArrowReader
+from .arrow_writer import ArrowWriter
+from .download.download_config import DownloadConfig
+from .download.download_manager import DownloadManager
+from .features import Features
+from .info import DatasetInfo, MetricInfo
+from .naming import camelcase_to_snakecase
+from .utils._filelock import FileLock
+from .utils.deprecation_utils import deprecated
+from .utils.logging import get_logger
+from .utils.py_utils import copyfunc, temp_seed
+logger = get_logger(__name__)
+class FileFreeLock(BaseFileLock):
+    """Thread lock until a file **cannot** be locked"""
+    def __init__(self, lock_file, *args, **kwargs):
+        self.filelock = FileLock(lock_file)
+        super().__init__(self.filelock.lock_file, *args, **kwargs)
+    def _acquire(self):
+        try:
+            self.filelock.acquire(timeout=0.01, poll_intervall=0.02)  # Try to lock once
+        except Timeout:
+            # We couldn't acquire the lock, the file is locked!
+            self._context.lock_file_fd = self.filelock.lock_file
+        else:
+            # We were able to acquire the lock, the file is not yet locked!
+            self.filelock.release()
+            self._context.lock_file_fd = None
+    def _release(self):
+        self._context.lock_file_fd = None
+# lists - summarize long lists similarly to NumPy
+# arrays/tensors - let the frameworks control formatting
+def summarize_if_long_list(obj):
+    if not type(obj) == list or len(obj) <= 6:  # noqa: E721
+        return f"{obj}"
+    def format_chunk(chunk):
+        return ", ".join(repr(x) for x in chunk)
+    return f"[{format_chunk(obj[:3])}, ..., {format_chunk(obj[-3:])}]"
+class MetricInfoMixin:
+    """This base class exposes some attributes of MetricInfo
+    at the base level of the Metric for easy access.
+    <Deprecated version="2.5.0">
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    </Deprecated>
+    """
+    def __init__(self, info: MetricInfo):
+        self._metric_info = info
+    @property
+    def info(self):
+        """:class:`datasets.MetricInfo` object containing all the metadata in the metric."""
+        return self._metric_info
+    @property
+    def name(self) -> str:
+        return self._metric_info.metric_name
+    @property
+    def experiment_id(self) -> Optional[str]:
+        return self._metric_info.experiment_id
+    @property
+    def description(self) -> str:
+        return self._metric_info.description
+    @property
+    def citation(self) -> str:
+        return self._metric_info.citation
+    @property
+    def features(self) -> Features:
+        return self._metric_info.features
+    @property
+    def inputs_description(self) -> str:
+        return self._metric_info.inputs_description
+    @property
+    def homepage(self) -> Optional[str]:
+        return self._metric_info.homepage
+    @property
+    def license(self) -> str:
+        return self._metric_info.license
+    @property
+    def codebase_urls(self) -> Optional[List[str]]:
+        return self._metric_info.codebase_urls
+    @property
+    def reference_urls(self) -> Optional[List[str]]:
+        return self._metric_info.reference_urls
+    @property
+    def streamable(self) -> bool:
+        return self._metric_info.streamable
+    @property
+    def format(self) -> Optional[str]:
+        return self._metric_info.format
+class Metric(MetricInfoMixin):
+    """A Metric is the base class and common API for all metrics.
+    <Deprecated version="2.5.0">
+    Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate
+    </Deprecated>
+    Args:
+        config_name (``str``): This is used to define a hash specific to a metrics computation script and prevents the metric's data
+            to be overridden when the metric loading script is modified.
+        keep_in_memory (:obj:`bool`): keep all predictions and references in memory. Not possible in distributed settings.
+        cache_dir (``str``): Path to a directory in which temporary prediction/references data will be stored.
+            The data directory should be located on a shared file-system in distributed setups.
+        num_process (``int``): specify the total number of nodes in a distributed settings.
+            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
+        process_id (``int``): specify the id of the current process in a distributed setup (between 0 and num_process-1)
+            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
+        seed (:obj:`int`, optional): If specified, this will temporarily set numpy's random seed when :func:`datasets.Metric.compute` is run.
+        experiment_id (``str``): A specific experiment id. This is used if several distributed evaluations share the same file system.
+            This is useful to compute metrics in distributed setups (in particular non-additive metrics like F1).
+        max_concurrent_cache_files (``int``): Max number of concurrent metrics cache files (default 10000).
+        timeout (``Union[int, float]``): Timeout in second for distributed setting synchronization.
+    """
+    @deprecated("Use the new library 🤗 Evaluate instead: https://huggingface.co/docs/evaluate")
+    def __init__(
+        self,
+        config_name: Optional[str] = None,
+        keep_in_memory: bool = False,
+        cache_dir: Optional[str] = None,
+        num_process: int = 1,
+        process_id: int = 0,
+        seed: Optional[int] = None,
+        experiment_id: Optional[str] = None,
+        max_concurrent_cache_files: int = 10000,
+        timeout: Union[int, float] = 100,
+        **kwargs,
+    ):
+        # prepare info
+        self.config_name = config_name or "default"
+        info = self._info()
+        info.metric_name = camelcase_to_snakecase(self.__class__.__name__)
+        info.config_name = self.config_name
+        info.experiment_id = experiment_id or "default_experiment"
+        MetricInfoMixin.__init__(self, info)  # For easy access on low level
+        # Safety checks on num_process and process_id
+        if not isinstance(process_id, int) or process_id < 0:
+            raise ValueError("'process_id' should be a number greater than 0")
+        if not isinstance(num_process, int) or num_process <= process_id:
+            raise ValueError("'num_process' should be a number greater than process_id")
+        if keep_in_memory and num_process != 1:
+            raise ValueError("Using 'keep_in_memory' is not possible in distributed setting (num_process > 1).")
+        self.num_process = num_process
+        self.process_id = process_id
+        self.max_concurrent_cache_files = max_concurrent_cache_files
+        self.keep_in_memory = keep_in_memory
+        self._data_dir_root = os.path.expanduser(cache_dir or config.HF_METRICS_CACHE)
+        self.data_dir = self._build_data_dir()
+        if seed is None:
+            _, seed, pos, *_ = np.random.get_state()
+            self.seed: int = seed[pos] if pos < 624 else seed[0]
+        else:
+            self.seed: int = seed
+        self.timeout: Union[int, float] = timeout
+        # Update 'compute' and 'add' docstring
+        # methods need to be copied otherwise it changes the docstrings of every instance
+        self.compute = types.MethodType(copyfunc(self.compute), self)
+        self.add_batch = types.MethodType(copyfunc(self.add_batch), self)
+        self.add = types.MethodType(copyfunc(self.add), self)
+        self.compute.__func__.__doc__ += self.info.inputs_description
+        self.add_batch.__func__.__doc__ += self.info.inputs_description
+        self.add.__func__.__doc__ += self.info.inputs_description
+        # self.arrow_schema = pa.schema(field for field in self.info.features.type)
+        self.buf_writer = None
+        self.writer = None
+        self.writer_batch_size = None
+        self.data = None
+        # This is the cache file we store our predictions/references in
+        # Keep it None for now so we can (cloud)pickle the object
+        self.cache_file_name = None
+        self.filelock = None
+        self.rendez_vous_lock = None
+        # This is all the cache files on which we have a lock when we are in a distributed setting
+        self.file_paths = None
+        self.filelocks = None
+    def __len__(self):
+        """Return the number of examples (predictions or predictions/references pair)
+        currently stored in the metric's cache.
+        """
+        return 0 if self.writer is None else len(self.writer)
+    def __repr__(self):
+        return (
+            f'Metric(name: "{self.name}", features: {self.features}, '
+            f'usage: """{self.inputs_description}""", '
+            f"stored examples: {len(self)})"
+        )
+    def _build_data_dir(self):
+        """Path of this metric in cache_dir:
+        Will be:
+            self._data_dir_root/self.name/self.config_name/self.hash (if not none)/
+        If any of these element is missing or if ``with_version=False`` the corresponding subfolders are dropped.
+        """
+        builder_data_dir = self._data_dir_root
+        builder_data_dir = os.path.join(builder_data_dir, self.name, self.config_name)
+        os.makedirs(builder_data_dir, exist_ok=True)
+        return builder_data_dir
+    def _create_cache_file(self, timeout=1) -> Tuple[str, FileLock]:
+        """Create a new cache file. If the default cache file is used, we generated a new hash."""
+        file_path = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{self.process_id}.arrow")
+        filelock = None
+        for i in range(self.max_concurrent_cache_files):
+            filelock = FileLock(file_path + ".lock")
+            try:
+                filelock.acquire(timeout=timeout)
+            except Timeout:
+                # If we have reached the max number of attempts or we are not allow to find a free name (distributed setup)
+                # We raise an error
+                if self.num_process != 1:
+                    raise ValueError(
+                        f"Error in _create_cache_file: another metric instance is already using the local cache file at {file_path}. "
+                        f"Please specify an experiment_id (currently: {self.experiment_id}) to avoid collision "
+                        f"between distributed metric instances."
+                    ) from None
+                if i == self.max_concurrent_cache_files - 1:
+                    raise ValueError(
+                        f"Cannot acquire lock, too many metric instance are operating concurrently on this file system."
+                        f"You should set a larger value of max_concurrent_cache_files when creating the metric "
+                        f"(current value is {self.max_concurrent_cache_files})."
+                    ) from None
+                # In other cases (allow to find new file name + not yet at max num of attempts) we can try to sample a new hashing name.
+                file_uuid = str(uuid.uuid4())
+                file_path = os.path.join(
+                    self.data_dir, f"{self.experiment_id}-{file_uuid}-{self.num_process}-{self.process_id}.arrow"
+                )
+            else:
+                break
+        return file_path, filelock
+    def _get_all_cache_files(self) -> Tuple[List[str], List[FileLock]]:
+        """Get a lock on all the cache files in a distributed setup.
+        We wait for timeout second to let all the distributed node finish their tasks (default is 100 seconds).
+        """
+        if self.num_process == 1:
+            if self.cache_file_name is None:
+                raise ValueError(
+                    "Metric cache file doesn't exist. Please make sure that you call `add` or `add_batch` "
+                    "at least once before calling `compute`."
+                )
+            file_paths = [self.cache_file_name]
+        else:
+            file_paths = [
+                os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{process_id}.arrow")
+                for process_id in range(self.num_process)
+            ]
+        # Let's acquire a lock on each process files to be sure they are finished writing
+        filelocks = []
+        for process_id, file_path in enumerate(file_paths):
+            if process_id == 0:  # process 0 already has its lock file
+                filelocks.append(self.filelock)
+            else:
+                filelock = FileLock(file_path + ".lock")
+                try:
+                    filelock.acquire(timeout=self.timeout)
+                except Timeout:
+                    raise ValueError(
+                        f"Cannot acquire lock on cached file {file_path} for process {process_id}."
+                    ) from None
+                else:
+                    filelocks.append(filelock)
+        return file_paths, filelocks
+    def _check_all_processes_locks(self):
+        expected_lock_file_names = [
+            os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-{process_id}.arrow.lock")
+            for process_id in range(self.num_process)
+        ]
+        for expected_lock_file_name in expected_lock_file_names:
+            nofilelock = FileFreeLock(expected_lock_file_name)
+            try:
+                nofilelock.acquire(timeout=self.timeout)
+            except Timeout:
+                raise ValueError(
+                    f"Expected to find locked file {expected_lock_file_name} from process {self.process_id} but it doesn't exist."
+                ) from None
+            else:
+                nofilelock.release()
+    def _check_rendez_vous(self):
+        expected_lock_file_name = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-0.arrow.lock")
+        nofilelock = FileFreeLock(expected_lock_file_name)
+        try:
+            nofilelock.acquire(timeout=self.timeout)
+        except Timeout:
+            raise ValueError(
+                f"Expected to find locked file {expected_lock_file_name} from process {self.process_id} but it doesn't exist."
+            ) from None
+        else:
+            nofilelock.release()
+        lock_file_name = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-rdv.lock")
+        rendez_vous_lock = FileLock(lock_file_name)
+        try:
+            rendez_vous_lock.acquire(timeout=self.timeout)
+        except Timeout:
+            raise ValueError(f"Couldn't acquire lock on {lock_file_name} from process {self.process_id}.") from None
+        else:
+            rendez_vous_lock.release()
+    def _finalize(self):
+        """Close all the writing process and load/gather the data
+        from all the nodes if main node or all_process is True.
+        """
+        if self.writer is not None:
+            self.writer.finalize()
+        self.writer = None
+        # release the locks of the processes > 0 so that process 0 can lock them to read + delete the data
+        if self.filelock is not None and self.process_id > 0:
+            self.filelock.release()
+        if self.keep_in_memory:
+            # Read the predictions and references
+            reader = ArrowReader(path=self.data_dir, info=DatasetInfo(features=self.features))
+            self.data = Dataset.from_buffer(self.buf_writer.getvalue())
+        elif self.process_id == 0:
+            # Let's acquire a lock on each node files to be sure they are finished writing
+            file_paths, filelocks = self._get_all_cache_files()
+            # Read the predictions and references
+            try:
+                reader = ArrowReader(path="", info=DatasetInfo(features=self.features))
+                self.data = Dataset(**reader.read_files([{"filename": f} for f in file_paths]))
+            except FileNotFoundError:
+                raise ValueError(
+                    "Error in finalize: another metric instance is already using the local cache file. "
+                    "Please specify an experiment_id to avoid collision between distributed metric instances."
+                ) from None
+            # Store file paths and locks and we will release/delete them after the computation.
+            self.file_paths = file_paths
+            self.filelocks = filelocks
+    def compute(self, *, predictions=None, references=None, **kwargs) -> Optional[dict]:
+        """Compute the metrics.
+        Usage of positional arguments is not allowed to prevent mistakes.
+        Args:
+            predictions (list/array/tensor, optional): Predictions.
+            references (list/array/tensor, optional): References.
+            **kwargs (optional): Keyword arguments that will be forwarded to the metrics :meth:`_compute`
+                method (see details in the docstring).
+        Return:
+            dict or None
+            - Dictionary with the metrics if this metric is run on the main process (``process_id == 0``).
+            - None if the metric is not run on the main process (``process_id != 0``).
+        Example:
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> accuracy = metric.compute(predictions=model_prediction, references=labels)
+        ```
+        """
+        all_kwargs = {"predictions": predictions, "references": references, **kwargs}
+        if predictions is None and references is None:
+            missing_kwargs = {k: None for k in self.features if k not in all_kwargs}
+            all_kwargs.update(missing_kwargs)
+        else:
+            missing_inputs = [k for k in self.features if k not in all_kwargs]
+            if missing_inputs:
+                raise ValueError(
+                    f"Metric inputs are missing: {missing_inputs}. All required inputs are {list(self.features)}"
+                )
+        inputs = {input_name: all_kwargs[input_name] for input_name in self.features}
+        compute_kwargs = {k: kwargs[k] for k in kwargs if k not in self.features}
+        if any(v is not None for v in inputs.values()):
+            self.add_batch(**inputs)
+        self._finalize()
+        self.cache_file_name = None
+        self.filelock = None
+        if self.process_id == 0:
+            self.data.set_format(type=self.info.format)
+            inputs = {input_name: self.data[input_name] for input_name in self.features}
+            with temp_seed(self.seed):
+                output = self._compute(**inputs, **compute_kwargs)
+            if self.buf_writer is not None:
+                self.buf_writer = None
+                del self.data
+                self.data = None
+            else:
+                # Release locks and delete all the cache files. Process 0 is released last.
+                for filelock, file_path in reversed(list(zip(self.filelocks, self.file_paths))):
+                    logger.info(f"Removing {file_path}")
+                    del self.data
+                    self.data = None
+                    del self.writer
+                    self.writer = None
+                    os.remove(file_path)
+                    filelock.release()
+            return output
+        else:
+            return None
+    def add_batch(self, *, predictions=None, references=None, **kwargs):
+        """Add a batch of predictions and references for the metric's stack.
+        Args:
+            predictions (list/array/tensor, optional): Predictions.
+            references (list/array/tensor, optional): References.
+        Example:
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> metric.add_batch(predictions=model_prediction, references=labels)
+        ```
+        """
+        bad_inputs = [input_name for input_name in kwargs if input_name not in self.features]
+        if bad_inputs:
+            raise ValueError(f"Bad inputs for metric: {bad_inputs}. All required inputs are {list(self.features)}")
+        batch = {"predictions": predictions, "references": references, **kwargs}
+        batch = {intput_name: batch[intput_name] for intput_name in self.features}
+        batch = self.info.features.encode_batch(batch)
+        if self.writer is None:
+            self._init_writer()
+        try:
+            self.writer.write_batch(batch)
+        except pa.ArrowInvalid:
+            if any(len(batch[c]) != len(next(iter(batch.values()))) for c in batch):
+                col0 = next(iter(batch))
+                bad_col = [c for c in batch if len(batch[c]) != len(batch[col0])][0]
+                error_msg = (
+                    f"Mismatch in the number of {col0} ({len(batch[col0])}) and {bad_col} ({len(batch[bad_col])})"
+                )
+            elif sorted(self.features) != ["references", "predictions"]:
+                error_msg = f"Metric inputs don't match the expected format.\n" f"Expected format: {self.features},\n"
+                error_msg_inputs = ",\n".join(
+                    f"Input {input_name}: {summarize_if_long_list(batch[input_name])}" for input_name in self.features
+                )
+                error_msg += error_msg_inputs
+            else:
+                error_msg = (
+                    f"Predictions and/or references don't match the expected format.\n"
+                    f"Expected format: {self.features},\n"
+                    f"Input predictions: {summarize_if_long_list(predictions)},\n"
+                    f"Input references: {summarize_if_long_list(references)}"
+                )
+            raise ValueError(error_msg) from None
+    def add(self, *, prediction=None, reference=None, **kwargs):
+        """Add one prediction and reference for the metric's stack.
+        Args:
+            prediction (list/array/tensor, optional): Predictions.
+            reference (list/array/tensor, optional): References.
+        Example:
+        ```py
+        >>> from datasets import load_metric
+        >>> metric = load_metric("accuracy")
+        >>> metric.add(predictions=model_predictions, references=labels)
+        ```
+        """
+        bad_inputs = [input_name for input_name in kwargs if input_name not in self.features]
+        if bad_inputs:
+            raise ValueError(f"Bad inputs for metric: {bad_inputs}. All required inputs are {list(self.features)}")
+        example = {"predictions": prediction, "references": reference, **kwargs}
+        example = {intput_name: example[intput_name] for intput_name in self.features}
+        example = self.info.features.encode_example(example)
+        if self.writer is None:
+            self._init_writer()
+        try:
+            self.writer.write(example)
+        except pa.ArrowInvalid:
+            error_msg = f"Metric inputs don't match the expected format.\n" f"Expected format: {self.features},\n"
+            error_msg_inputs = ",\n".join(
+                f"Input {input_name}: {summarize_if_long_list(example[input_name])}" for input_name in self.features
+            )
+            error_msg += error_msg_inputs
+            raise ValueError(error_msg) from None
+    def _init_writer(self, timeout=1):
+        if self.num_process > 1:
+            if self.process_id == 0:
+                file_path = os.path.join(self.data_dir, f"{self.experiment_id}-{self.num_process}-rdv.lock")
+                self.rendez_vous_lock = FileLock(file_path)
+                try:
+                    self.rendez_vous_lock.acquire(timeout=timeout)
+                except TimeoutError:
+                    raise ValueError(
+                        f"Error in _init_writer: another metric instance is already using the local cache file at {file_path}. "
+                        f"Please specify an experiment_id (currently: {self.experiment_id}) to avoid collision "
+                        f"between distributed metric instances."
+                    ) from None
+        if self.keep_in_memory:
+            self.buf_writer = pa.BufferOutputStream()
+            self.writer = ArrowWriter(
+                features=self.info.features, stream=self.buf_writer, writer_batch_size=self.writer_batch_size
+            )
+        else:
+            self.buf_writer = None
+            # Get cache file name and lock it
+            if self.cache_file_name is None or self.filelock is None:
+                cache_file_name, filelock = self._create_cache_file()  # get ready
+                self.cache_file_name = cache_file_name
+                self.filelock = filelock
+            self.writer = ArrowWriter(
+                features=self.info.features, path=self.cache_file_name, writer_batch_size=self.writer_batch_size
+            )
+        # Setup rendez-vous here if
+        if self.num_process > 1:
+            if self.process_id == 0:
+                self._check_all_processes_locks()  # wait for everyone to be ready
+                self.rendez_vous_lock.release()  # let everyone go
+            else:
+                self._check_rendez_vous()  # wait for master to be ready and to let everyone go
+    def _info(self) -> MetricInfo:
+        """Construct the MetricInfo object. See `MetricInfo` for details.
+        Warning: This function is only called once and the result is cached for all
+        following .info() calls.
+        Returns:
+            info: (MetricInfo) The metrics information
+        """
+        raise NotImplementedError
+    def download_and_prepare(
+        self,
+        download_config: Optional[DownloadConfig] = None,
+        dl_manager: Optional[DownloadManager] = None,
+    ):
+        """Downloads and prepares dataset for reading.
+        Args:
+            download_config (:class:`DownloadConfig`, optional): Specific download configuration parameters.
+            dl_manager (:class:`DownloadManager`, optional): Specific download manager to use.
+        """
+        if dl_manager is None:
+            if download_config is None:
+                download_config = DownloadConfig()
+                download_config.cache_dir = os.path.join(self.data_dir, "downloads")
+                download_config.force_download = False
+            dl_manager = DownloadManager(
+                dataset_name=self.name, download_config=download_config, data_dir=self.data_dir
+            )
+        self._download_and_prepare(dl_manager)
+    def _download_and_prepare(self, dl_manager):
+        """Downloads and prepares resources for the metric.
+        This is the internal implementation to overwrite called when user calls
+        `download_and_prepare`. It should download all required resources for the metric.
+        Args:
+            dl_manager (:class:`DownloadManager`): `DownloadManager` used to download and cache data.
+        """
+        return None
+    def _compute(self, *, predictions=None, references=None, **kwargs) -> Dict[str, Any]:
+        """This method defines the common API for all the metrics in the library"""
+        raise NotImplementedError
+    def __del__(self):
+        if hasattr(self, "filelock") and self.filelock is not None:
+            self.filelock.release()
+        if hasattr(self, "rendez_vous_lock") and self.rendez_vous_lock is not None:
+            self.rendez_vous_lock.release()
+        if hasattr(self, "writer"):  # in case it was already deleted
+            del self.writer
+        if hasattr(self, "data"):  # in case it was already deleted
+            del self.data

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/search.py ADDED Viewed

	@@ -0,0 +1,785 @@

+import importlib.util
+import os
+import tempfile
+from pathlib import PurePath
+from typing import TYPE_CHECKING, Dict, List, NamedTuple, Optional, Union
+import fsspec
+import numpy as np
+from .features import Sequence
+from .utils import logging
+from .utils import tqdm as hf_tqdm
+if TYPE_CHECKING:
+    from .arrow_dataset import Dataset  # noqa: F401
+    try:
+        from elasticsearch import Elasticsearch  # noqa: F401
+    except ImportError:
+        pass
+    try:
+        import faiss  # noqa: F401
+    except ImportError:
+        pass
+_has_elasticsearch = importlib.util.find_spec("elasticsearch") is not None
+_has_faiss = importlib.util.find_spec("faiss") is not None
+logger = logging.get_logger(__name__)
+class MissingIndex(Exception):
+    pass
+class SearchResults(NamedTuple):
+    scores: List[float]
+    indices: List[int]
+class BatchedSearchResults(NamedTuple):
+    total_scores: List[List[float]]
+    total_indices: List[List[int]]
+class NearestExamplesResults(NamedTuple):
+    scores: List[float]
+    examples: dict
+class BatchedNearestExamplesResults(NamedTuple):
+    total_scores: List[List[float]]
+    total_examples: List[dict]
+class BaseIndex:
+    """Base class for indexing"""
+    def search(self, query, k: int = 10, **kwargs) -> SearchResults:
+        """
+        To implement.
+        This method has to return the scores and the indices of the retrieved examples given a certain query.
+        """
+        raise NotImplementedError
+    def search_batch(self, queries, k: int = 10, **kwargs) -> BatchedSearchResults:
+        """Find the nearest examples indices to the query.
+        Args:
+            queries (`Union[List[str], np.ndarray]`): The queries as a list of strings if `column` is a text index or as a numpy array if `column` is a vector index.
+            k (`int`): The number of examples to retrieve per query.
+        Ouput:
+            total_scores (`List[List[float]`): The retrieval scores of the retrieved examples per query.
+            total_indices (`List[List[int]]`): The indices of the retrieved examples per query.
+        """
+        total_scores, total_indices = [], []
+        for query in queries:
+            scores, indices = self.search(query, k)
+            total_scores.append(scores)
+            total_indices.append(indices)
+        return BatchedSearchResults(total_scores, total_indices)
+    def save(self, file: Union[str, PurePath]):
+        """Serialize the index on disk"""
+        raise NotImplementedError
+    @classmethod
+    def load(cls, file: Union[str, PurePath]) -> "BaseIndex":
+        """Deserialize the index from disk"""
+        raise NotImplementedError
+class ElasticSearchIndex(BaseIndex):
+    """
+    Sparse index using Elasticsearch. It is used to index text and run queries based on BM25 similarity.
+    An Elasticsearch server needs to be accessible, and a python client is declared with
+    ```
+    es_client = Elasticsearch([{'host': 'localhost', 'port': '9200'}])
+    ```
+    for example.
+    """
+    def __init__(
+        self,
+        host: Optional[str] = None,
+        port: Optional[int] = None,
+        es_client: Optional["Elasticsearch"] = None,
+        es_index_name: Optional[str] = None,
+        es_index_config: Optional[dict] = None,
+    ):
+        if not _has_elasticsearch:
+            raise ImportError(
+                "You must install ElasticSearch to use ElasticSearchIndex. To do so you can run `pip install elasticsearch==7.7.1 for example`"
+            )
+        if es_client is not None and (host is not None or port is not None):
+            raise ValueError("Please specify either `es_client` or `(host, port)`, but not both.")
+        host = host or "localhost"
+        port = port or 9200
+        import elasticsearch.helpers  # noqa: F401 - need this to properly load all the es features
+        from elasticsearch import Elasticsearch  # noqa: F811
+        self.es_client = es_client if es_client is not None else Elasticsearch([{"host": host, "port": str(port)}])
+        self.es_index_name = (
+            es_index_name
+            if es_index_name is not None
+            else "huggingface_datasets_" + os.path.basename(tempfile.NamedTemporaryFile().name)
+        )
+        self.es_index_config = (
+            es_index_config
+            if es_index_config is not None
+            else {
+                "settings": {
+                    "number_of_shards": 1,
+                    "analysis": {"analyzer": {"stop_standard": {"type": "standard", " stopwords": "_english_"}}},
+                },
+                "mappings": {"properties": {"text": {"type": "text", "analyzer": "standard", "similarity": "BM25"}}},
+            }
+        )
+    def add_documents(self, documents: Union[List[str], "Dataset"], column: Optional[str] = None):
+        """
+        Add documents to the index.
+        If the documents are inside a certain column, you can specify it using the `column` argument.
+        """
+        index_name = self.es_index_name
+        index_config = self.es_index_config
+        self.es_client.indices.create(index=index_name, body=index_config)
+        number_of_docs = len(documents)
+        progress = hf_tqdm(unit="docs", total=number_of_docs)
+        successes = 0
+        def passage_generator():
+            if column is not None:
+                for i, example in enumerate(documents):
+                    yield {"text": example[column], "_id": i}
+            else:
+                for i, example in enumerate(documents):
+                    yield {"text": example, "_id": i}
+        # create the ES index
+        import elasticsearch as es
+        for ok, action in es.helpers.streaming_bulk(
+            client=self.es_client,
+            index=index_name,
+            actions=passage_generator(),
+        ):
+            progress.update(1)
+            successes += ok
+        if successes != len(documents):
+            logger.warning(
+                f"Some documents failed to be added to ElasticSearch. Failures: {len(documents)-successes}/{len(documents)}"
+            )
+        logger.info(f"Indexed {successes:d} documents")
+    def search(self, query: str, k=10, **kwargs) -> SearchResults:
+        """Find the nearest examples indices to the query.
+        Args:
+            query (`str`): The query as a string.
+            k (`int`): The number of examples to retrieve.
+        Ouput:
+            scores (`List[List[float]`): The retrieval scores of the retrieved examples.
+            indices (`List[List[int]]`): The indices of the retrieved examples.
+        """
+        response = self.es_client.search(
+            index=self.es_index_name,
+            body={"query": {"multi_match": {"query": query, "fields": ["text"], "type": "cross_fields"}}, "size": k},
+            **kwargs,
+        )
+        hits = response["hits"]["hits"]
+        return SearchResults([hit["_score"] for hit in hits], [int(hit["_id"]) for hit in hits])
+    def search_batch(self, queries, k: int = 10, max_workers=10, **kwargs) -> BatchedSearchResults:
+        import concurrent.futures
+        total_scores, total_indices = [None] * len(queries), [None] * len(queries)
+        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+            future_to_index = {executor.submit(self.search, query, k, **kwargs): i for i, query in enumerate(queries)}
+            for future in concurrent.futures.as_completed(future_to_index):
+                index = future_to_index[future]
+                results: SearchResults = future.result()
+                total_scores[index] = results.scores
+                total_indices[index] = results.indices
+        return BatchedSearchResults(total_indices=total_indices, total_scores=total_scores)
+class FaissIndex(BaseIndex):
+    """
+    Dense index using Faiss. It is used to index vectors.
+    Faiss is a library for efficient similarity search and clustering of dense vectors.
+    It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM.
+    You can find more information about Faiss here:
+    - For index types and the string factory: https://github.com/facebookresearch/faiss/wiki/The-index-factory
+    - For GPU settings: https://github.com/facebookresearch/faiss/wiki/Faiss-on-the-GPU
+    """
+    def __init__(
+        self,
+        device: Optional[Union[int, List[int]]] = None,
+        string_factory: Optional[str] = None,
+        metric_type: Optional[int] = None,
+        custom_index: Optional["faiss.Index"] = None,
+    ):
+        """
+        Create a Dense index using Faiss. You can specify `device` if you want to run it on GPU (`device` must be the GPU index).
+        You can find more information about Faiss here:
+        - For `string factory`: https://github.com/facebookresearch/faiss/wiki/The-index-factory
+        """
+        if string_factory is not None and custom_index is not None:
+            raise ValueError("Please specify either `string_factory` or `custom_index` but not both.")
+        if device is not None and custom_index is not None:
+            raise ValueError(
+                "Cannot pass both 'custom_index' and 'device'. "
+                "Pass 'custom_index' already transferred to the target device instead."
+            )
+        self.device = device
+        self.string_factory = string_factory
+        self.metric_type = metric_type
+        self.faiss_index = custom_index
+        if not _has_faiss:
+            raise ImportError(
+                "You must install Faiss to use FaissIndex. To do so you can run `conda install -c pytorch faiss-cpu` or `conda install -c pytorch faiss-gpu`. "
+                "A community supported package is also available on pypi: `pip install faiss-cpu` or `pip install faiss-gpu`. "
+                "Note that pip may not have the latest version of FAISS, and thus, some of the latest features and bug fixes may not be available."
+            )
+    def add_vectors(
+        self,
+        vectors: Union[np.array, "Dataset"],
+        column: Optional[str] = None,
+        batch_size: int = 1000,
+        train_size: Optional[int] = None,
+        faiss_verbose: Optional[bool] = None,
+    ):
+        """
+        Add vectors to the index.
+        If the arrays are inside a certain column, you can specify it using the `column` argument.
+        """
+        import faiss  # noqa: F811
+        if column and not isinstance(vectors.features[column], Sequence):
+            raise ValueError(
+                f"Wrong feature type for column '{column}'. Expected 1d array, got {vectors.features[column]}"
+            )
+        # Create index
+        if self.faiss_index is None:
+            size = len(vectors[0]) if column is None else len(vectors[0][column])
+            if self.string_factory is not None:
+                if self.metric_type is None:
+                    index = faiss.index_factory(size, self.string_factory)
+                else:
+                    index = faiss.index_factory(size, self.string_factory, self.metric_type)
+            else:
+                if self.metric_type is None:
+                    index = faiss.IndexFlat(size)
+                else:
+                    index = faiss.IndexFlat(size, self.metric_type)
+            self.faiss_index = self._faiss_index_to_device(index, self.device)
+            logger.info(f"Created faiss index of type {type(self.faiss_index)}")
+        # Set verbosity level
+        if faiss_verbose is not None:
+            self.faiss_index.verbose = faiss_verbose
+            if hasattr(self.faiss_index, "index") and self.faiss_index.index is not None:
+                self.faiss_index.index.verbose = faiss_verbose
+            if hasattr(self.faiss_index, "quantizer") and self.faiss_index.quantizer is not None:
+                self.faiss_index.quantizer.verbose = faiss_verbose
+            if hasattr(self.faiss_index, "clustering_index") and self.faiss_index.clustering_index is not None:
+                self.faiss_index.clustering_index.verbose = faiss_verbose
+        # Train
+        if train_size is not None:
+            train_vecs = vectors[:train_size] if column is None else vectors[:train_size][column]
+            logger.info(f"Training the index with the first {len(train_vecs)} vectors")
+            self.faiss_index.train(train_vecs)
+        else:
+            logger.info("Ignored the training step of the faiss index as `train_size` is None.")
+        # Add vectors
+        logger.info(f"Adding {len(vectors)} vectors to the faiss index")
+        for i in hf_tqdm(range(0, len(vectors), batch_size)):
+            vecs = vectors[i : i + batch_size] if column is None else vectors[i : i + batch_size][column]
+            self.faiss_index.add(vecs)
+    @staticmethod
+    def _faiss_index_to_device(index: "faiss.Index", device: Optional[Union[int, List[int]]] = None) -> "faiss.Index":
+        """
+        Sends a faiss index to a device.
+        A device can either be a positive integer (GPU id), a negative integer (all GPUs),
+            or a list of positive integers (select GPUs to use), or `None` for CPU.
+        """
+        # If device is not specified, then it runs on CPU.
+        if device is None:
+            return index
+        import faiss  # noqa: F811
+        # If the device id is given as an integer
+        if isinstance(device, int):
+            # Positive integers are directly mapped to GPU ids
+            if device > -1:
+                faiss_res = faiss.StandardGpuResources()
+                index = faiss.index_cpu_to_gpu(faiss_res, device, index)
+            # And negative integers mean using all GPUs
+            else:
+                index = faiss.index_cpu_to_all_gpus(index)
+        # Device ids given as a list mean mapping to those devices specified.
+        elif isinstance(device, (list, tuple)):
+            index = faiss.index_cpu_to_gpus_list(index, gpus=list(device))
+        else:
+            raise TypeError(
+                f"The argument type: {type(device)} is not expected. "
+                + "Please pass in either nothing, a positive int, a negative int, or a list of positive ints."
+            )
+        return index
+    def search(self, query: np.array, k=10, **kwargs) -> SearchResults:
+        """Find the nearest examples indices to the query.
+        Args:
+            query (`np.array`): The query as a numpy array.
+            k (`int`): The number of examples to retrieve.
+        Ouput:
+            scores (`List[List[float]`): The retrieval scores of the retrieved examples.
+            indices (`List[List[int]]`): The indices of the retrieved examples.
+        """
+        if len(query.shape) != 1 and (len(query.shape) != 2 or query.shape[0] != 1):
+            raise ValueError("Shape of query is incorrect, it has to be either a 1D array or 2D (1, N)")
+        queries = query.reshape(1, -1)
+        if not queries.flags.c_contiguous:
+            queries = np.asarray(queries, order="C")
+        scores, indices = self.faiss_index.search(queries, k, **kwargs)
+        return SearchResults(scores[0], indices[0].astype(int))
+    def search_batch(self, queries: np.array, k=10, **kwargs) -> BatchedSearchResults:
+        """Find the nearest examples indices to the queries.
+        Args:
+            queries (`np.array`): The queries as a numpy array.
+            k (`int`): The number of examples to retrieve.
+        Ouput:
+            total_scores (`List[List[float]`): The retrieval scores of the retrieved examples per query.
+            total_indices (`List[List[int]]`): The indices of the retrieved examples per query.
+        """
+        if len(queries.shape) != 2:
+            raise ValueError("Shape of query must be 2D")
+        if not queries.flags.c_contiguous:
+            queries = np.asarray(queries, order="C")
+        scores, indices = self.faiss_index.search(queries, k, **kwargs)
+        return BatchedSearchResults(scores, indices.astype(int))
+    def save(self, file: Union[str, PurePath], storage_options: Optional[Dict] = None):
+        """Serialize the FaissIndex on disk"""
+        import faiss  # noqa: F811
+        if self.device is not None and isinstance(self.device, (int, list, tuple)):
+            index = faiss.index_gpu_to_cpu(self.faiss_index)
+        else:
+            index = self.faiss_index
+        with fsspec.open(str(file), "wb", **(storage_options or {})) as f:
+            faiss.write_index(index, faiss.BufferedIOWriter(faiss.PyCallbackIOWriter(f.write)))
+    @classmethod
+    def load(
+        cls,
+        file: Union[str, PurePath],
+        device: Optional[Union[int, List[int]]] = None,
+        storage_options: Optional[Dict] = None,
+    ) -> "FaissIndex":
+        """Deserialize the FaissIndex from disk"""
+        import faiss  # noqa: F811
+        # Instances of FaissIndex is essentially just a wrapper for faiss indices.
+        faiss_index = cls(device=device)
+        with fsspec.open(str(file), "rb", **(storage_options or {})) as f:
+            index = faiss.read_index(faiss.BufferedIOReader(faiss.PyCallbackIOReader(f.read)))
+        faiss_index.faiss_index = faiss_index._faiss_index_to_device(index, faiss_index.device)
+        return faiss_index
+class IndexableMixin:
+    """Add indexing features to `datasets.Dataset`"""
+    def __init__(self):
+        self._indexes: Dict[str, BaseIndex] = {}
+    def __len__(self):
+        raise NotImplementedError
+    def __getitem__(self, key):
+        raise NotImplementedError
+    def is_index_initialized(self, index_name: str) -> bool:
+        return index_name in self._indexes
+    def _check_index_is_initialized(self, index_name: str):
+        if not self.is_index_initialized(index_name):
+            raise MissingIndex(
+                f"Index with index_name '{index_name}' not initialized yet. Please make sure that you call `add_faiss_index` or `add_elasticsearch_index` first."
+            )
+    def list_indexes(self) -> List[str]:
+        """List the `colindex_nameumns`/identifiers of all the attached indexes."""
+        return list(self._indexes)
+    def get_index(self, index_name: str) -> BaseIndex:
+        """List the `index_name`/identifiers of all the attached indexes.
+        Args:
+            index_name (`str`): Index name.
+        Returns:
+            [`BaseIndex`]
+        """
+        self._check_index_is_initialized(index_name)
+        return self._indexes[index_name]
+    def add_faiss_index(
+        self,
+        column: str,
+        index_name: Optional[str] = None,
+        device: Optional[Union[int, List[int]]] = None,
+        string_factory: Optional[str] = None,
+        metric_type: Optional[int] = None,
+        custom_index: Optional["faiss.Index"] = None,
+        batch_size: int = 1000,
+        train_size: Optional[int] = None,
+        faiss_verbose: bool = False,
+    ):
+        """Add a dense index using Faiss for fast retrieval.
+        The index is created using the vectors of the specified column.
+        You can specify `device` if you want to run it on GPU (`device` must be the GPU index, see more below).
+        You can find more information about Faiss here:
+        - For `string factory`: https://github.com/facebookresearch/faiss/wiki/The-index-factory
+        Args:
+            column (`str`): The column of the vectors to add to the index.
+            index_name (Optional `str`): The index_name/identifier of the index. This is the index_name that is used to call `.get_nearest` or `.search`.
+                By default it corresponds to `column`.
+            device (Optional `Union[int, List[int]]`): If positive integer, this is the index of the GPU to use. If negative integer, use all GPUs.
+                If a list of positive integers is passed in, run only on those GPUs. By default it uses the CPU.
+            string_factory (Optional `str`): This is passed to the index factory of Faiss to create the index. Default index class is IndexFlatIP.
+            metric_type (Optional `int`): Type of metric. Ex: `faiss.METRIC_INNER_PRODUCT` or `faiss.METRIC_L2`.
+            custom_index (Optional `faiss.Index`): Custom Faiss index that you already have instantiated and configured for your needs.
+            batch_size (Optional `int`): Size of the batch to use while adding vectors to the FaissIndex. Default value is 1000.
+                <Added version="2.4.0"/>
+            train_size (Optional `int`): If the index needs a training step, specifies how many vectors will be used to train the index.
+            faiss_verbose (`bool`, defaults to False): Enable the verbosity of the Faiss index.
+        """
+        index_name = index_name if index_name is not None else column
+        faiss_index = FaissIndex(
+            device=device, string_factory=string_factory, metric_type=metric_type, custom_index=custom_index
+        )
+        faiss_index.add_vectors(
+            self, column=column, batch_size=batch_size, train_size=train_size, faiss_verbose=faiss_verbose
+        )
+        self._indexes[index_name] = faiss_index
+    def add_faiss_index_from_external_arrays(
+        self,
+        external_arrays: np.array,
+        index_name: str,
+        device: Optional[Union[int, List[int]]] = None,
+        string_factory: Optional[str] = None,
+        metric_type: Optional[int] = None,
+        custom_index: Optional["faiss.Index"] = None,
+        batch_size: int = 1000,
+        train_size: Optional[int] = None,
+        faiss_verbose: bool = False,
+    ):
+        """Add a dense index using Faiss for fast retrieval.
+        The index is created using the vectors of `external_arrays`.
+        You can specify `device` if you want to run it on GPU (`device` must be the GPU index).
+        You can find more information about Faiss here:
+        - For `string factory`: https://github.com/facebookresearch/faiss/wiki/The-index-factory
+        Args:
+            external_arrays (`np.array`): If you want to use arrays from outside the lib for the index, you can set `external_arrays`.
+                It will use `external_arrays` to create the Faiss index instead of the arrays in the given `column`.
+            index_name (`str`): The index_name/identifier of the index. This is the index_name that is used to call `.get_nearest` or `.search`.
+            device (Optional `Union[int, List[int]]`): If positive integer, this is the index of the GPU to use. If negative integer, use all GPUs.
+                If a list of positive integers is passed in, run only on those GPUs. By default it uses the CPU.
+            string_factory (Optional `str`): This is passed to the index factory of Faiss to create the index. Default index class is IndexFlatIP.
+            metric_type (Optional `int`): Type of metric. Ex: `faiss.METRIC_INNER_PRODUCT` or `faiss.METRIC_L2`.
+            custom_index (Optional `faiss.Index`): Custom Faiss index that you already have instantiated and configured for your needs.
+            batch_size (Optional `int`): Size of the batch to use while adding vectors to the FaissIndex. Default value is 1000.
+                <Added version="2.4.0"/>
+            train_size (Optional `int`): If the index needs a training step, specifies how many vectors will be used to train the index.
+            faiss_verbose (`bool`, defaults to False): Enable the verbosity of the Faiss index.
+        """
+        faiss_index = FaissIndex(
+            device=device, string_factory=string_factory, metric_type=metric_type, custom_index=custom_index
+        )
+        faiss_index.add_vectors(
+            external_arrays, column=None, batch_size=batch_size, train_size=train_size, faiss_verbose=faiss_verbose
+        )
+        self._indexes[index_name] = faiss_index
+    def save_faiss_index(self, index_name: str, file: Union[str, PurePath], storage_options: Optional[Dict] = None):
+        """Save a FaissIndex on disk.
+        Args:
+            index_name (`str`): The index_name/identifier of the index. This is the index_name that is used to call `.get_nearest` or `.search`.
+            file (`str`): The path to the serialized faiss index on disk or remote URI (e.g. `"s3://my-bucket/index.faiss"`).
+            storage_options (`dict`, *optional*):
+                Key/value pairs to be passed on to the file-system backend, if any.
+                <Added version="2.11.0"/>
+        """
+        index = self.get_index(index_name)
+        if not isinstance(index, FaissIndex):
+            raise ValueError(f"Index '{index_name}' is not a FaissIndex but a '{type(index)}'")
+        index.save(file, storage_options=storage_options)
+        logger.info(f"Saved FaissIndex {index_name} at {file}")
+    def load_faiss_index(
+        self,
+        index_name: str,
+        file: Union[str, PurePath],
+        device: Optional[Union[int, List[int]]] = None,
+        storage_options: Optional[Dict] = None,
+    ):
+        """Load a FaissIndex from disk.
+        If you want to do additional configurations, you can have access to the faiss index object by doing
+        `.get_index(index_name).faiss_index` to make it fit your needs.
+        Args:
+            index_name (`str`): The index_name/identifier of the index. This is the index_name that is used to
+                call `.get_nearest` or `.search`.
+            file (`str`): The path to the serialized faiss index on disk or remote URI (e.g. `"s3://my-bucket/index.faiss"`).
+            device (Optional `Union[int, List[int]]`): If positive integer, this is the index of the GPU to use. If negative integer, use all GPUs.
+                If a list of positive integers is passed in, run only on those GPUs. By default it uses the CPU.
+            storage_options (`dict`, *optional*):
+                Key/value pairs to be passed on to the file-system backend, if any.
+                <Added version="2.11.0"/>
+        """
+        index = FaissIndex.load(file, device=device, storage_options=storage_options)
+        if index.faiss_index.ntotal != len(self):
+            raise ValueError(
+                f"Index size should match Dataset size, but Index '{index_name}' at {file} has {index.faiss_index.ntotal} elements while the dataset has {len(self)} examples."
+            )
+        self._indexes[index_name] = index
+        logger.info(f"Loaded FaissIndex {index_name} from {file}")
+    def add_elasticsearch_index(
+        self,
+        column: str,
+        index_name: Optional[str] = None,
+        host: Optional[str] = None,
+        port: Optional[int] = None,
+        es_client: Optional["Elasticsearch"] = None,
+        es_index_name: Optional[str] = None,
+        es_index_config: Optional[dict] = None,
+    ):
+        """Add a text index using ElasticSearch for fast retrieval.
+        Args:
+            column (`str`): The column of the documents to add to the index.
+            index_name (Optional `str`): The index_name/identifier of the index. This is the index name that is used to call `.get_nearest` or `.search`.
+                By default it corresponds to `column`.
+            host (Optional `str`, defaults to localhost):
+                host of where ElasticSearch is running
+            port (Optional `str`, defaults to 9200):
+                port of where ElasticSearch is running
+            es_client (Optional `elasticsearch.Elasticsearch`):
+                The elasticsearch client used to create the index if host and port are None.
+            es_index_name (Optional `str`): The elasticsearch index name used to create the index.
+            es_index_config (Optional `dict`):
+                The configuration of the elasticsearch index.
+                Default config is:
+        Config::
+            {
+                "settings": {
+                    "number_of_shards": 1,
+                    "analysis": {"analyzer": {"stop_standard": {"type": "standard", " stopwords": "_english_"}}},
+                },
+                "mappings": {
+                    "properties": {
+                        "text": {
+                            "type": "text",
+                            "analyzer": "standard",
+                            "similarity": "BM25"
+                        },
+                    }
+                },
+            }
+        """
+        index_name = index_name if index_name is not None else column
+        es_index = ElasticSearchIndex(
+            host=host, port=port, es_client=es_client, es_index_name=es_index_name, es_index_config=es_index_config
+        )
+        es_index.add_documents(self, column=column)
+        self._indexes[index_name] = es_index
+    def load_elasticsearch_index(
+        self,
+        index_name: str,
+        es_index_name: str,
+        host: Optional[str] = None,
+        port: Optional[int] = None,
+        es_client: Optional["Elasticsearch"] = None,
+        es_index_config: Optional[dict] = None,
+    ):
+        """Load an existing text index using ElasticSearch for fast retrieval.
+        Args:
+            index_name (`str`):
+                The `index_name`/identifier of the index. This is the index name that is used to call `get_nearest` or `search`.
+            es_index_name (`str`):
+                The name of elasticsearch index to load.
+            host (`str`, *optional*, defaults to `localhost`):
+                Host of where ElasticSearch is running.
+            port (`str`, *optional*, defaults to `9200`):
+                Port of where ElasticSearch is running.
+            es_client (`elasticsearch.Elasticsearch`, *optional*):
+                The elasticsearch client used to create the index if host and port are `None`.
+            es_index_config (`dict`, *optional*):
+                The configuration of the elasticsearch index.
+                Default config is:
+                    ```
+                    {
+                        "settings": {
+                            "number_of_shards": 1,
+                            "analysis": {"analyzer": {"stop_standard": {"type": "standard", " stopwords": "_english_"}}},
+                        },
+                        "mappings": {
+                            "properties": {
+                                "text": {
+                                    "type": "text",
+                                    "analyzer": "standard",
+                                    "similarity": "BM25"
+                                },
+                            }
+                        },
+                    }
+                    ```
+        """
+        self._indexes[index_name] = ElasticSearchIndex(
+            host=host, port=port, es_client=es_client, es_index_name=es_index_name, es_index_config=es_index_config
+        )
+    def drop_index(self, index_name: str):
+        """Drop the index with the specified column.
+        Args:
+            index_name (`str`):
+                The `index_name`/identifier of the index.
+        """
+        del self._indexes[index_name]
+    def search(self, index_name: str, query: Union[str, np.array], k: int = 10, **kwargs) -> SearchResults:
+        """Find the nearest examples indices in the dataset to the query.
+        Args:
+            index_name (`str`):
+                The name/identifier of the index.
+            query (`Union[str, np.ndarray]`):
+                The query as a string if `index_name` is a text index or as a numpy array if `index_name` is a vector index.
+            k (`int`):
+                The number of examples to retrieve.
+        Returns:
+            `(scores, indices)`:
+                A tuple of `(scores, indices)` where:
+                - **scores** (`List[List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples
+                - **indices** (`List[List[int]]`): the indices of the retrieved examples
+        """
+        self._check_index_is_initialized(index_name)
+        return self._indexes[index_name].search(query, k, **kwargs)
+    def search_batch(
+        self, index_name: str, queries: Union[List[str], np.array], k: int = 10, **kwargs
+    ) -> BatchedSearchResults:
+        """Find the nearest examples indices in the dataset to the query.
+        Args:
+            index_name (`str`):
+                The `index_name`/identifier of the index.
+            queries (`Union[List[str], np.ndarray]`):
+                The queries as a list of strings if `index_name` is a text index or as a numpy array if `index_name` is a vector index.
+            k (`int`):
+                The number of examples to retrieve per query.
+        Returns:
+            `(total_scores, total_indices)`:
+                A tuple of `(total_scores, total_indices)` where:
+                - **total_scores** (`List[List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples per query
+                - **total_indices** (`List[List[int]]`): the indices of the retrieved examples per query
+        """
+        self._check_index_is_initialized(index_name)
+        return self._indexes[index_name].search_batch(queries, k, **kwargs)
+    def get_nearest_examples(
+        self, index_name: str, query: Union[str, np.array], k: int = 10, **kwargs
+    ) -> NearestExamplesResults:
+        """Find the nearest examples in the dataset to the query.
+        Args:
+            index_name (`str`):
+                The index_name/identifier of the index.
+            query (`Union[str, np.ndarray]`):
+                The query as a string if `index_name` is a text index or as a numpy array if `index_name` is a vector index.
+            k (`int`):
+                The number of examples to retrieve.
+        Returns:
+            `(scores, examples)`:
+                A tuple of `(scores, examples)` where:
+                - **scores** (`List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples
+                - **examples** (`dict`): the retrieved examples
+        """
+        self._check_index_is_initialized(index_name)
+        scores, indices = self.search(index_name, query, k, **kwargs)
+        top_indices = [i for i in indices if i >= 0]
+        return NearestExamplesResults(scores[: len(top_indices)], self[top_indices])
+    def get_nearest_examples_batch(
+        self, index_name: str, queries: Union[List[str], np.array], k: int = 10, **kwargs
+    ) -> BatchedNearestExamplesResults:
+        """Find the nearest examples in the dataset to the query.
+        Args:
+            index_name (`str`):
+                The `index_name`/identifier of the index.
+            queries (`Union[List[str], np.ndarray]`):
+                The queries as a list of strings if `index_name` is a text index or as a numpy array if `index_name` is a vector index.
+            k (`int`):
+                The number of examples to retrieve per query.
+        Returns:
+            `(total_scores, total_examples)`:
+                A tuple of `(total_scores, total_examples)` where:
+                - **total_scores** (`List[List[float]`): the retrieval scores from either FAISS (`IndexFlatL2` by default) or ElasticSearch of the retrieved examples per query
+                - **total_examples** (`List[dict]`): the retrieved examples per query
+        """
+        self._check_index_is_initialized(index_name)
+        total_scores, total_indices = self.search_batch(index_name, queries, k, **kwargs)
+        total_scores = [
+            scores_i[: len([i for i in indices_i if i >= 0])]
+            for scores_i, indices_i in zip(total_scores, total_indices)
+        ]
+        total_samples = [self[[i for i in indices if i >= 0]] for indices in total_indices]
+        return BatchedNearestExamplesResults(total_scores, total_samples)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/splits.py ADDED Viewed

	@@ -0,0 +1,635 @@

+# Copyright 2020 The HuggingFace Datasets Authors and the TensorFlow Datasets Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# Lint as: python3
+"""Splits related API."""
+import abc
+import collections
+import copy
+import dataclasses
+import re
+from dataclasses import dataclass
+from typing import Dict, List, Optional, Union
+from .arrow_reader import FileInstructions, make_file_instructions
+from .naming import _split_re
+from .utils.py_utils import NonMutableDict, asdict
+@dataclass
+class SplitInfo:
+    name: str = dataclasses.field(default="", metadata={"include_in_asdict_even_if_is_default": True})
+    num_bytes: int = dataclasses.field(default=0, metadata={"include_in_asdict_even_if_is_default": True})
+    num_examples: int = dataclasses.field(default=0, metadata={"include_in_asdict_even_if_is_default": True})
+    shard_lengths: Optional[List[int]] = None
+    # Deprecated
+    # For backward compatibility, this field needs to always be included in files like
+    # dataset_infos.json and dataset_info.json files
+    # To do so, we always include it in the output of datasets.utils.py_utils.asdict(split_info)
+    dataset_name: Optional[str] = dataclasses.field(
+        default=None, metadata={"include_in_asdict_even_if_is_default": True}
+    )
+    @property
+    def file_instructions(self):
+        """Returns the list of dict(filename, take, skip)."""
+        # `self.dataset_name` is assigned in `SplitDict.add()`.
+        instructions = make_file_instructions(
+            name=self.dataset_name,
+            split_infos=[self],
+            instruction=str(self.name),
+        )
+        return instructions.file_instructions
+@dataclass
+class SubSplitInfo:
+    """Wrapper around a sub split info.
+    This class expose info on the subsplit:
+    ```
+    ds, info = datasets.load_dataset(..., split='train[75%:]', with_info=True)
+    info.splits['train[75%:]'].num_examples
+    ```
+    """
+    instructions: FileInstructions
+    @property
+    def num_examples(self):
+        """Returns the number of example in the subsplit."""
+        return self.instructions.num_examples
+    @property
+    def file_instructions(self):
+        """Returns the list of dict(filename, take, skip)."""
+        return self.instructions.file_instructions
+class SplitBase(metaclass=abc.ABCMeta):
+    # pylint: disable=line-too-long
+    """Abstract base class for Split compositionality.
+    See the
+    [guide on splits](../loading#slice-splits)
+    for more information.
+    There are three parts to the composition:
+        1) The splits are composed (defined, merged, split,...) together before
+             calling the `.as_dataset()` function. This is done with the `__add__`,
+             `__getitem__`, which return a tree of `SplitBase` (whose leaf
+             are the `NamedSplit` objects)
+        ```
+        split = datasets.Split.TRAIN + datasets.Split.TEST.subsplit(datasets.percent[:50])
+        ```
+        2) The `SplitBase` is forwarded to the `.as_dataset()` function
+             to be resolved into actual read instruction. This is done by the
+             `.get_read_instruction()` method which takes the real dataset splits
+             (name, number of shards,...) and parse the tree to return a
+             `SplitReadInstruction()` object
+        ```
+        read_instruction = split.get_read_instruction(self.info.splits)
+        ```
+        3) The `SplitReadInstruction` is then used in the `tf.data.Dataset` pipeline
+             to define which files to read and how to skip examples within file.
+    """
+    # pylint: enable=line-too-long
+    @abc.abstractmethod
+    def get_read_instruction(self, split_dict):
+        """Parse the descriptor tree and compile all read instructions together.
+        Args:
+            split_dict: `dict`, The `dict[split_name, SplitInfo]` of the dataset
+        Returns:
+            split_read_instruction: `SplitReadInstruction`
+        """
+        raise NotImplementedError("Abstract method")
+    def __eq__(self, other):
+        """Equality: datasets.Split.TRAIN == 'train'."""
+        if isinstance(other, (NamedSplit, str)):
+            return False
+        raise NotImplementedError("Equality is not implemented between merged/sub splits.")
+    def __ne__(self, other):
+        """InEquality: datasets.Split.TRAIN != 'test'."""
+        return not self.__eq__(other)
+    def __add__(self, other):
+        """Merging: datasets.Split.TRAIN + datasets.Split.TEST."""
+        return _SplitMerged(self, other)
+    def subsplit(self, arg=None, k=None, percent=None, weighted=None):  # pylint: disable=redefined-outer-name
+        """Divides this split into subsplits.
+        There are 3 ways to define subsplits, which correspond to the 3
+        arguments `k` (get `k` even subsplits), `percent` (get a slice of the
+        dataset with `datasets.percent`), and `weighted` (get subsplits with proportions
+        specified by `weighted`).
+        Example::
+        ```
+        # 50% train, 50% test
+        train, test = split.subsplit(k=2)
+        # 50% train, 25% test, 25% validation
+        train, test, validation = split.subsplit(weighted=[2, 1, 1])
+        # Extract last 20%
+        subsplit = split.subsplit(datasets.percent[-20:])
+        ```
+        Warning: k and weighted will be converted into percent which mean that
+        values below the percent will be rounded up or down. The final split may be
+        bigger to deal with remainders. For instance:
+        ```
+        train, test, valid = split.subsplit(k=3)  # 33%, 33%, 34%
+        s1, s2, s3, s4 = split.subsplit(weighted=[2, 2, 1, 1])  # 33%, 33%, 16%, 18%
+        ```
+        Args:
+            arg: If no kwargs are given, `arg` will be interpreted as one of
+                `k`, `percent`, or `weighted` depending on the type.
+                For example:
+                ```
+                split.subsplit(10)  # Equivalent to split.subsplit(k=10)
+                split.subsplit(datasets.percent[:-20])  # percent=datasets.percent[:-20]
+                split.subsplit([1, 1, 2])  # weighted=[1, 1, 2]
+                ```
+            k: `int` If set, subdivide the split into `k` equal parts.
+            percent: `datasets.percent slice`, return a single subsplit corresponding to
+                a slice of the original split. For example:
+                `split.subsplit(datasets.percent[-20:])  # Last 20% of the dataset`.
+            weighted: `list[int]`, return a list of subsplits whose proportions match
+                the normalized sum of the list. For example:
+                `split.subsplit(weighted=[1, 1, 2])  # 25%, 25%, 50%`.
+        Returns:
+            A subsplit or list of subsplits extracted from this split object.
+        """
+        # Note that the percent kwargs redefine the outer name datasets.percent. This
+        # is done for consistency (.subsplit(percent=datasets.percent[:40]))
+        if sum(bool(x) for x in (arg, k, percent, weighted)) != 1:
+            raise ValueError("Only one argument of subsplit should be set.")
+        # Auto deduce k
+        if isinstance(arg, int):
+            k = arg
+        elif isinstance(arg, slice):
+            percent = arg
+        elif isinstance(arg, list):
+            weighted = arg
+        if not (k or percent or weighted):
+            raise ValueError(
+                f"Invalid split argument {arg}. Only list, slice and int supported. "
+                "One of k, weighted or percent should be set to a non empty value."
+            )
+        def assert_slices_coverage(slices):
+            # Ensure that the expended slices cover all percents.
+            assert sum((list(range(*s.indices(100))) for s in slices), []) == list(range(100))
+        if k:
+            if not 0 < k <= 100:
+                raise ValueError(f"Subsplit k should be between 0 and 100, got {k}")
+            shift = 100 // k
+            slices = [slice(i * shift, (i + 1) * shift) for i in range(k)]
+            # Round up last element to ensure all elements are taken
+            slices[-1] = slice(slices[-1].start, 100)
+            # Internal check to ensure full coverage
+            assert_slices_coverage(slices)
+            return tuple(_SubSplit(self, s) for s in slices)
+        elif percent:
+            return _SubSplit(self, percent)
+        elif weighted:
+            # Normalize the weighted sum
+            total = sum(weighted)
+            weighted = [100 * x // total for x in weighted]
+            # Create the slice for each of the elements
+            start = 0
+            stop = 0
+            slices = []
+            for v in weighted:
+                stop += v
+                slices.append(slice(start, stop))
+                start = stop
+            # Round up last element to ensure all elements are taken
+            slices[-1] = slice(slices[-1].start, 100)
+            # Internal check to ensure full coverage
+            assert_slices_coverage(slices)
+            return tuple(_SubSplit(self, s) for s in slices)
+        else:
+            # Should not be possible
+            raise ValueError("Could not determine the split")
+# 2 requirements:
+# 1. datasets.percent be sliceable
+# 2. datasets.percent be documented
+#
+# Instances are not documented, so we want datasets.percent to be a class, but to
+# have it be sliceable, we need this metaclass.
+class PercentSliceMeta(type):
+    def __getitem__(cls, slice_value):
+        if not isinstance(slice_value, slice):
+            raise ValueError(f"datasets.percent should only be called with slice, not {slice_value}")
+        return slice_value
+class PercentSlice(metaclass=PercentSliceMeta):
+    # pylint: disable=line-too-long
+    """Syntactic sugar for defining slice subsplits: `datasets.percent[75:-5]`.
+    See the
+    [guide on splits](../loading#slice-splits)
+    for more information.
+    """
+    # pylint: enable=line-too-long
+    pass
+percent = PercentSlice  # pylint: disable=invalid-name
+class _SplitMerged(SplitBase):
+    """Represent two split descriptors merged together."""
+    def __init__(self, split1, split2):
+        self._split1 = split1
+        self._split2 = split2
+    def get_read_instruction(self, split_dict):
+        read_instruction1 = self._split1.get_read_instruction(split_dict)
+        read_instruction2 = self._split2.get_read_instruction(split_dict)
+        return read_instruction1 + read_instruction2
+    def __repr__(self):
+        return f"({repr(self._split1)} + {repr(self._split2)})"
+class _SubSplit(SplitBase):
+    """Represent a sub split of a split descriptor."""
+    def __init__(self, split, slice_value):
+        self._split = split
+        self._slice_value = slice_value
+    def get_read_instruction(self, split_dict):
+        return self._split.get_read_instruction(split_dict)[self._slice_value]
+    def __repr__(self):
+        slice_str = "{start}:{stop}"
+        if self._slice_value.step is not None:
+            slice_str += ":{step}"
+        slice_str = slice_str.format(
+            start="" if self._slice_value.start is None else self._slice_value.start,
+            stop="" if self._slice_value.stop is None else self._slice_value.stop,
+            step=self._slice_value.step,
+        )
+        return f"{repr(self._split)}(datasets.percent[{slice_str}])"
+class NamedSplit(SplitBase):
+    """Descriptor corresponding to a named split (train, test, ...).
+    Example:
+        Each descriptor can be composed with other using addition or slice:
+            ```py
+            split = datasets.Split.TRAIN.subsplit(datasets.percent[0:25]) + datasets.Split.TEST
+            ```
+        The resulting split will correspond to 25% of the train split merged with
+        100% of the test split.
+        A split cannot be added twice, so the following will fail:
+            ```py
+            split = (
+                    datasets.Split.TRAIN.subsplit(datasets.percent[:25]) +
+                    datasets.Split.TRAIN.subsplit(datasets.percent[75:])
+            )  # Error
+            split = datasets.Split.TEST + datasets.Split.ALL  # Error
+            ```
+        The slices can be applied only one time. So the following are valid:
+            ```py
+            split = (
+                    datasets.Split.TRAIN.subsplit(datasets.percent[:25]) +
+                    datasets.Split.TEST.subsplit(datasets.percent[:50])
+            )
+            split = (datasets.Split.TRAIN + datasets.Split.TEST).subsplit(datasets.percent[:50])
+            ```
+        But this is not valid:
+            ```py
+            train = datasets.Split.TRAIN
+            test = datasets.Split.TEST
+            split = train.subsplit(datasets.percent[:25]).subsplit(datasets.percent[:25])
+            split = (train.subsplit(datasets.percent[:25]) + test).subsplit(datasets.percent[:50])
+            ```
+    """
+    def __init__(self, name):
+        self._name = name
+        split_names_from_instruction = [split_instruction.split("[")[0] for split_instruction in name.split("+")]
+        for split_name in split_names_from_instruction:
+            if not re.match(_split_re, split_name):
+                raise ValueError(f"Split name should match '{_split_re}' but got '{split_name}'.")
+    def __str__(self):
+        return self._name
+    def __repr__(self):
+        return f"NamedSplit({self._name!r})"
+    def __eq__(self, other):
+        """Equality: datasets.Split.TRAIN == 'train'."""
+        if isinstance(other, NamedSplit):
+            return self._name == other._name  # pylint: disable=protected-access
+        elif isinstance(other, SplitBase):
+            return False
+        elif isinstance(other, str):  # Other should be string
+            return self._name == other
+        else:
+            raise ValueError(f"Equality not supported between split {self} and {other}")
+    def __lt__(self, other):
+        return self._name < other._name  # pylint: disable=protected-access
+    def __hash__(self):
+        return hash(self._name)
+    def get_read_instruction(self, split_dict):
+        return SplitReadInstruction(split_dict[self._name])
+class NamedSplitAll(NamedSplit):
+    """Split corresponding to the union of all defined dataset splits."""
+    def __init__(self):
+        super().__init__("all")
+    def __repr__(self):
+        return "NamedSplitAll()"
+    def get_read_instruction(self, split_dict):
+        # Merge all dataset split together
+        read_instructions = [SplitReadInstruction(s) for s in split_dict.values()]
+        return sum(read_instructions, SplitReadInstruction())
+class Split:
+    # pylint: disable=line-too-long
+    """`Enum` for dataset splits.
+    Datasets are typically split into different subsets to be used at various
+    stages of training and evaluation.
+    - `TRAIN`: the training data.
+    - `VALIDATION`: the validation data. If present, this is typically used as
+      evaluation data while iterating on a model (e.g. changing hyperparameters,
+      model architecture, etc.).
+    - `TEST`: the testing data. This is the data to report metrics on. Typically
+      you do not want to use this during model iteration as you may overfit to it.
+    - `ALL`: the union of all defined dataset splits.
+    All splits, including compositions inherit from `datasets.SplitBase`.
+    See the [guide](../load_hub#splits) on splits for more information.
+    Example:
+    ```py
+    >>> datasets.SplitGenerator(
+    ...     name=datasets.Split.TRAIN,
+    ...     gen_kwargs={"split_key": "train", "files": dl_manager.download_and extract(url)},
+    ... ),
+    ... datasets.SplitGenerator(
+    ...     name=datasets.Split.VALIDATION,
+    ...     gen_kwargs={"split_key": "validation", "files": dl_manager.download_and extract(url)},
+    ... ),
+    ... datasets.SplitGenerator(
+    ...     name=datasets.Split.TEST,
+    ...     gen_kwargs={"split_key": "test", "files": dl_manager.download_and extract(url)},
+    ... )
+    ```
+    """
+    # pylint: enable=line-too-long
+    TRAIN = NamedSplit("train")
+    TEST = NamedSplit("test")
+    VALIDATION = NamedSplit("validation")
+    ALL = NamedSplitAll()
+    def __new__(cls, name):
+        """Create a custom split with datasets.Split('custom_name')."""
+        return NamedSplitAll() if name == "all" else NamedSplit(name)
+# Similar to SplitInfo, but contain an additional slice info
+SlicedSplitInfo = collections.namedtuple(
+    "SlicedSplitInfo",
+    [
+        "split_info",
+        "slice_value",
+    ],
+)  # noqa: E231
+class SplitReadInstruction:
+    """Object containing the reading instruction for the dataset.
+    Similarly to `SplitDescriptor` nodes, this object can be composed with itself,
+    but the resolution happens instantaneously, instead of keeping track of the
+    tree, such as all instructions are compiled and flattened in a single
+    SplitReadInstruction object containing the list of files and slice to use.
+    Once resolved, the instructions can be accessed with:
+    ```
+    read_instructions.get_list_sliced_split_info()  # List of splits to use
+    ```
+    """
+    def __init__(self, split_info=None):
+        self._splits = NonMutableDict(error_msg="Overlap between splits. Split {key} has been added with " "itself.")
+        if split_info:
+            self.add(SlicedSplitInfo(split_info=split_info, slice_value=None))
+    def add(self, sliced_split):
+        """Add a SlicedSplitInfo the read instructions."""
+        # TODO(epot): Check that the number of examples per shard % 100 == 0
+        # Otherwise the slices value may be unbalanced and not exactly reflect the
+        # requested slice.
+        self._splits[sliced_split.split_info.name] = sliced_split
+    def __add__(self, other):
+        """Merging split together."""
+        # Will raise error if a split has already be added (NonMutableDict)
+        # TODO(epot): If a split is already added but there is no overlap between
+        # the slices, should merge the slices (ex: [:10] + [80:])
+        split_instruction = SplitReadInstruction()
+        split_instruction._splits.update(self._splits)  # pylint: disable=protected-access
+        split_instruction._splits.update(other._splits)  # pylint: disable=protected-access
+        return split_instruction
+    def __getitem__(self, slice_value):
+        """Sub-splits."""
+        # Will raise an error if a split has already been sliced
+        split_instruction = SplitReadInstruction()
+        for v in self._splits.values():
+            if v.slice_value is not None:
+                raise ValueError(f"Trying to slice Split {v.split_info.name} which has already been sliced")
+            v = v._asdict()
+            v["slice_value"] = slice_value
+            split_instruction.add(SlicedSplitInfo(**v))
+        return split_instruction
+    def get_list_sliced_split_info(self):
+        return list(self._splits.values())
+class SplitDict(dict):
+    """Split info object."""
+    def __init__(self, *args, dataset_name=None, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.dataset_name = dataset_name
+    def __getitem__(self, key: Union[SplitBase, str]):
+        # 1st case: The key exists: `info.splits['train']`
+        if str(key) in self:
+            return super().__getitem__(str(key))
+        # 2nd case: Uses instructions: `info.splits['train[50%]']`
+        else:
+            instructions = make_file_instructions(
+                name=self.dataset_name,
+                split_infos=self.values(),
+                instruction=key,
+            )
+            return SubSplitInfo(instructions)
+    def __setitem__(self, key: Union[SplitBase, str], value: SplitInfo):
+        if key != value.name:
+            raise ValueError(f"Cannot add elem. (key mismatch: '{key}' != '{value.name}')")
+        super().__setitem__(key, value)
+    def add(self, split_info: SplitInfo):
+        """Add the split info."""
+        if split_info.name in self:
+            raise ValueError(f"Split {split_info.name} already present")
+        split_info.dataset_name = self.dataset_name
+        super().__setitem__(split_info.name, split_info)
+    @property
+    def total_num_examples(self):
+        """Return the total number of examples."""
+        return sum(s.num_examples for s in self.values())
+    @classmethod
+    def from_split_dict(cls, split_infos: Union[List, Dict], dataset_name: Optional[str] = None):
+        """Returns a new SplitDict initialized from a Dict or List of `split_infos`."""
+        if isinstance(split_infos, dict):
+            split_infos = list(split_infos.values())
+        if dataset_name is None:
+            dataset_name = split_infos[0].get("dataset_name") if split_infos else None
+        split_dict = cls(dataset_name=dataset_name)
+        for split_info in split_infos:
+            if isinstance(split_info, dict):
+                split_info = SplitInfo(**split_info)
+            split_dict.add(split_info)
+        return split_dict
+    def to_split_dict(self):
+        """Returns a list of SplitInfo protos that we have."""
+        out = []
+        for split_name, split_info in self.items():
+            split_info = copy.deepcopy(split_info)
+            split_info.name = split_name
+            out.append(split_info)
+        return out
+    def copy(self):
+        return SplitDict.from_split_dict(self.to_split_dict(), self.dataset_name)
+    def _to_yaml_list(self) -> list:
+        out = [asdict(s) for s in self.to_split_dict()]
+        # we don't need the shard lengths in YAML, since it depends on max_shard_size and num_proc
+        for split_info_dict in out:
+            split_info_dict.pop("shard_lengths", None)
+        # we don't need the dataset_name attribute that is deprecated
+        for split_info_dict in out:
+            split_info_dict.pop("dataset_name", None)
+        return out
+    @classmethod
+    def _from_yaml_list(cls, yaml_data: list) -> "SplitDict":
+        return cls.from_split_dict(yaml_data)
+@dataclass
+class SplitGenerator:
+    """Defines the split information for the generator.
+    This should be used as returned value of
+    `GeneratorBasedBuilder._split_generators`.
+    See `GeneratorBasedBuilder._split_generators` for more info and example
+    of usage.
+    Args:
+        name (`str`):
+            Name of the `Split` for which the generator will
+            create the examples.
+        **gen_kwargs (additional keyword arguments):
+            Keyword arguments to forward to the `DatasetBuilder._generate_examples` method
+            of the builder.
+    Example:
+    ```py
+    >>> datasets.SplitGenerator(
+    ...     name=datasets.Split.TRAIN,
+    ...     gen_kwargs={"split_key": "train", "files": dl_manager.download_and_extract(url)},
+    ... )
+    ```
+    """
+    name: str
+    gen_kwargs: Dict = dataclasses.field(default_factory=dict)
+    split_info: SplitInfo = dataclasses.field(init=False)
+    def __post_init__(self):
+        self.name = str(self.name)  # Make sure we convert NamedSplits in strings
+        NamedSplit(self.name)  # check that it's a valid split name
+        self.split_info = SplitInfo(name=self.name)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/datasets/table.py ADDED Viewed

	@@ -0,0 +1,2422 @@

+import copy
+import os
+from functools import partial
+from itertools import groupby
+from typing import TYPE_CHECKING, Any, Callable, Iterator, List, Optional, Tuple, TypeVar, Union
+import numpy as np
+import pyarrow as pa
+import pyarrow.compute as pc
+import pyarrow.types
+from . import config
+from .utils.logging import get_logger
+if TYPE_CHECKING:
+    from .features.features import Features, FeatureType
+logger = get_logger(__name__)
+def inject_arrow_table_documentation(arrow_table_method):
+    def wrapper(fn):
+        fn.__doc__ = arrow_table_method.__doc__ + (fn.__doc__ if fn.__doc__ is not None else "")
+        fn.__doc__ = fn.__doc__.replace("pyarrow.Table", "Table")
+        if hasattr(arrow_table_method, "__annotations__"):
+            fn.__annotations__ = arrow_table_method.__annotations__
+        return fn
+    return wrapper
+def _in_memory_arrow_table_from_file(filename: str) -> pa.Table:
+    in_memory_stream = pa.input_stream(filename)
+    opened_stream = pa.ipc.open_stream(in_memory_stream)
+    pa_table = opened_stream.read_all()
+    return pa_table
+def _in_memory_arrow_table_from_buffer(buffer: pa.Buffer) -> pa.Table:
+    stream = pa.BufferReader(buffer)
+    opened_stream = pa.ipc.open_stream(stream)
+    table = opened_stream.read_all()
+    return table
+def _memory_mapped_record_batch_reader_from_file(filename: str) -> pa.RecordBatchStreamReader:
+    memory_mapped_stream = pa.memory_map(filename)
+    return pa.ipc.open_stream(memory_mapped_stream)
+def read_schema_from_file(filename: str) -> pa.Schema:
+    """
+    Infer arrow table schema from file without loading whole file into memory.
+    Usefull especially while having very big files.
+    """
+    with pa.memory_map(filename) as memory_mapped_stream:
+        schema = pa.ipc.open_stream(memory_mapped_stream).schema
+    return schema
+def _memory_mapped_arrow_table_from_file(filename: str) -> pa.Table:
+    opened_stream = _memory_mapped_record_batch_reader_from_file(filename)
+    pa_table = opened_stream.read_all()
+    return pa_table
+def _deepcopy(x, memo: dict):
+    """deepcopy a regular class instance"""
+    cls = x.__class__
+    result = cls.__new__(cls)
+    memo[id(x)] = result
+    for k, v in x.__dict__.items():
+        setattr(result, k, copy.deepcopy(v, memo))
+    return result
+def _interpolation_search(arr: List[int], x: int) -> int:
+    """
+    Return the position i of a sorted array so that arr[i] <= x < arr[i+1]
+    Args:
+        arr (`List[int]`): non-empty sorted list of integers
+        x (`int`): query
+    Returns:
+        `int`: the position i so that arr[i] <= x < arr[i+1]
+    Raises:
+        `IndexError`: if the array is empty or if the query is outside the array values
+    """
+    i, j = 0, len(arr) - 1
+    while i < j and arr[i] <= x < arr[j]:
+        k = i + ((j - i) * (x - arr[i]) // (arr[j] - arr[i]))
+        if arr[k] <= x < arr[k + 1]:
+            return k
+        elif arr[k] < x:
+            i, j = k + 1, j
+        else:
+            i, j = i, k
+    raise IndexError(f"Invalid query '{x}' for size {arr[-1] if len(arr) else 'none'}.")
+class IndexedTableMixin:
+    def __init__(self, table: pa.Table):
+        self._schema: pa.Schema = table.schema
+        self._batches: List[pa.RecordBatch] = [
+            recordbatch for recordbatch in table.to_batches() if len(recordbatch) > 0
+        ]
+        self._offsets: np.ndarray = np.cumsum([0] + [len(b) for b in self._batches], dtype=np.int64)
+    def fast_gather(self, indices: Union[List[int], np.ndarray]) -> pa.Table:
+        """
+        Create a pa.Table by gathering the records at the records at the specified indices. Should be faster
+        than pa.concat_tables(table.fast_slice(int(i) % table.num_rows, 1) for i in indices) since NumPy can compute
+        the binary searches in parallel, highly optimized C
+        """
+        if not len(indices):
+            raise ValueError("Indices must be non-empty")
+        batch_indices = np.searchsorted(self._offsets, indices, side="right") - 1
+        return pa.Table.from_batches(
+            [
+                self._batches[batch_idx].slice(i - self._offsets[batch_idx], 1)
+                for batch_idx, i in zip(batch_indices, indices)
+            ],
+            schema=self._schema,
+        )
+    def fast_slice(self, offset=0, length=None) -> pa.Table:
+        """
+        Slice the Table using interpolation search.
+        The behavior is the same as `pyarrow.Table.slice` but it's significantly faster.
+        Interpolation search is used to find the start and end indexes of the batches we want to keep.
+        The batches to keep are then concatenated to form the sliced Table.
+        """
+        if offset < 0:
+            raise IndexError("Offset must be non-negative")
+        elif offset >= self._offsets[-1] or (length is not None and length <= 0):
+            return pa.Table.from_batches([], schema=self._schema)
+        i = _interpolation_search(self._offsets, offset)
+        if length is None or length + offset >= self._offsets[-1]:
+            batches = self._batches[i:]
+            batches[0] = batches[0].slice(offset - self._offsets[i])
+        else:
+            j = _interpolation_search(self._offsets, offset + length - 1)
+            batches = self._batches[i : j + 1]
+            batches[-1] = batches[-1].slice(0, offset + length - self._offsets[j])
+            batches[0] = batches[0].slice(offset - self._offsets[i])
+        return pa.Table.from_batches(batches, schema=self._schema)
+class Table(IndexedTableMixin):
+    """
+    Wraps a pyarrow Table by using composition.
+    This is the base class for `InMemoryTable`, `MemoryMappedTable` and `ConcatenationTable`.
+    It implements all the basic attributes/methods of the pyarrow Table class except
+    the Table transforms: `slice, filter, flatten, combine_chunks, cast, add_column,
+    append_column, remove_column, set_column, rename_columns` and `drop`.
+    The implementation of these methods differs for the subclasses.
+    """
+    def __init__(self, table: pa.Table):
+        super().__init__(table)
+        self.table = table
+    def __deepcopy__(self, memo: dict):
+        # arrow tables are immutable, so there's no need to copy self.table
+        # moreover calling deepcopy on a pyarrow table seems to make pa.total_allocated_bytes() decrease for some reason
+        # by adding it to the memo, self.table won't be copied
+        memo[id(self.table)] = self.table
+        # same for the recordbatches used by the index
+        memo[id(self._batches)] = list(self._batches)
+        return _deepcopy(self, memo)
+    def validate(self, *args, **kwargs):
+        """
+        Perform validation checks.  An exception is raised if validation fails.
+        By default only cheap validation checks are run.  Pass `full=True`
+        for thorough validation checks (potentially `O(n)`).
+        Args:
+            full (`bool`, defaults to `False`):
+                If `True`, run expensive checks, otherwise cheap checks only.
+        Raises:
+            `pa.lib.ArrowInvalid`: if validation fails
+        """
+        return self.table.validate(*args, **kwargs)
+    def equals(self, *args, **kwargs):
+        """
+        Check if contents of two tables are equal.
+        Args:
+            other ([`~datasets.table.Table`]):
+                Table to compare against.
+            check_metadata `bool`, defaults to `False`):
+                Whether schema metadata equality should be checked as well.
+        Returns:
+            `bool`
+        """
+        args = tuple(arg.table if isinstance(arg, Table) else arg for arg in args)
+        kwargs = {k: v.table if isinstance(v, Table) else v for k, v in kwargs}
+        return self.table.equals(*args, **kwargs)
+    def to_batches(self, *args, **kwargs):
+        """
+        Convert Table to list of (contiguous) `RecordBatch` objects.
+        Args:
+            max_chunksize (`int`, defaults to `None`):
+                Maximum size for `RecordBatch` chunks. Individual chunks may be
+                smaller depending on the chunk layout of individual columns.
+        Returns:
+            `List[pyarrow.RecordBatch]`
+        """
+        return self.table.to_batches(*args, **kwargs)
+    def to_pydict(self, *args, **kwargs):
+        """
+        Convert the Table to a `dict` or `OrderedDict`.
+        Returns:
+            `dict`
+        """
+        return self.table.to_pydict(*args, **kwargs)
+    def to_pylist(self, *args, **kwargs):
+        """
+        Convert the Table to a list
+        Returns:
+            `list`
+        """
+        return self.table.to_pylist(*args, **kwargs)
+    def to_pandas(self, *args, **kwargs):
+        """
+        Convert to a pandas-compatible NumPy array or DataFrame, as appropriate.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                Arrow MemoryPool to use for allocations. Uses the default memory
+                pool is not passed.
+            strings_to_categorical (`bool`, defaults to `False`):
+                Encode string (UTF8) and binary types to `pandas.Categorical`.
+            categories (`list`, defaults to `empty`):
+                List of fields that should be returned as `pandas.Categorical`. Only
+                applies to table-like data structures.
+            zero_copy_only (`bool`, defaults to `False`):
+                Raise an `ArrowException` if this function call would require copying
+                the underlying data.
+            integer_object_nulls (`bool`, defaults to `False`):
+                Cast integers with nulls to objects.
+            date_as_object (`bool`, defaults to `True`):
+                Cast dates to objects. If `False`, convert to `datetime64[ns]` dtype.
+            timestamp_as_object (`bool`, defaults to `False`):
+                Cast non-nanosecond timestamps (`np.datetime64`) to objects. This is
+                useful if you have timestamps that don't fit in the normal date
+                range of nanosecond timestamps (1678 CE-2262 CE).
+                If `False`, all timestamps are converted to `datetime64[ns]` dtype.
+            use_threads (`bool`, defaults to `True`):
+                Whether to parallelize the conversion using multiple threads.
+            deduplicate_objects (`bool`, defaults to `False`):
+                Do not create multiple copies Python objects when created, to save
+                on memory use. Conversion will be slower.
+            ignore_metadata (`bool`, defaults to `False`):
+                If `True`, do not use the 'pandas' metadata to reconstruct the
+                DataFrame index, if present.
+            safe (`bool`, defaults to `True`):
+                For certain data types, a cast is needed in order to store the
+                data in a pandas DataFrame or Series (e.g. timestamps are always
+                stored as nanoseconds in pandas). This option controls whether it
+                is a safe cast or not.
+            split_blocks (`bool`, defaults to `False`):
+                If `True`, generate one internal "block" for each column when
+                creating a pandas.DataFrame from a `RecordBatch` or `Table`. While this
+                can temporarily reduce memory note that various pandas operations
+                can trigger "consolidation" which may balloon memory use.
+            self_destruct (`bool`, defaults to `False`):
+                EXPERIMENTAL: If `True`, attempt to deallocate the originating Arrow
+                memory while converting the Arrow object to pandas. If you use the
+                object after calling `to_pandas` with this option it will crash your
+                program.
+            types_mapper (`function`, defaults to `None`):
+                A function mapping a pyarrow DataType to a pandas `ExtensionDtype`.
+                This can be used to override the default pandas type for conversion
+                of built-in pyarrow types or in absence of `pandas_metadata` in the
+                Table schema. The function receives a pyarrow DataType and is
+                expected to return a pandas `ExtensionDtype` or `None` if the
+                default conversion should be used for that type. If you have
+                a dictionary mapping, you can pass `dict.get` as function.
+        Returns:
+            `pandas.Series` or `pandas.DataFrame`: `pandas.Series` or `pandas.DataFrame` depending on type of object
+        """
+        return self.table.to_pandas(*args, **kwargs)
+    def to_string(self, *args, **kwargs):
+        return self.table.to_string(*args, **kwargs)
+    def to_reader(self, max_chunksize: Optional[int] = None):
+        """
+        Convert the Table to a RecordBatchReader.
+        Note that this method is zero-copy, it merely exposes the same data under a different API.
+        Args:
+            max_chunksize (`int`, defaults to `None`)
+                Maximum size for RecordBatch chunks. Individual chunks may be smaller depending
+                on the chunk layout of individual columns.
+        Returns:
+            `pyarrow.RecordBatchReader`
+        """
+        return self.table.to_reader(max_chunksize=max_chunksize)
+    def field(self, *args, **kwargs):
+        """
+        Select a schema field by its column name or numeric index.
+        Args:
+            i (`Union[int, str]`):
+                The index or name of the field to retrieve.
+        Returns:
+            `pyarrow.Field`
+        """
+        return self.table.field(*args, **kwargs)
+    def column(self, *args, **kwargs):
+        """
+        Select a column by its column name, or numeric index.
+        Args:
+            i (`Union[int, str]`):
+                The index or name of the column to retrieve.
+        Returns:
+            `pyarrow.ChunkedArray`
+        """
+        return self.table.column(*args, **kwargs)
+    def itercolumns(self, *args, **kwargs):
+        """
+        Iterator over all columns in their numerical order.
+        Yields:
+            `pyarrow.ChunkedArray`
+        """
+        return self.table.itercolumns(*args, **kwargs)
+    @property
+    def schema(self):
+        """
+        Schema of the table and its columns.
+        Returns:
+            `pyarrow.Schema`
+        """
+        return self.table.schema
+    @property
+    def columns(self):
+        """
+        List of all columns in numerical order.
+        Returns:
+            `List[pa.ChunkedArray]`
+        """
+        return self.table.columns
+    @property
+    def num_columns(self):
+        """
+        Number of columns in this table.
+        Returns:
+            int
+        """
+        return self.table.num_columns
+    @property
+    def num_rows(self):
+        """
+        Number of rows in this table.
+        Due to the definition of a table, all columns have the same number of
+        rows.
+        Returns:
+            int
+        """
+        return self.table.num_rows
+    @property
+    def shape(self):
+        """
+        Dimensions of the table: (#rows, #columns).
+        Returns:
+            `(int, int)`: Number of rows and number of columns.
+        """
+        return self.table.shape
+    @property
+    def nbytes(self):
+        """
+        Total number of bytes consumed by the elements of the table.
+        """
+        return self.table.nbytes
+    @property
+    def column_names(self):
+        """
+        Names of the table's columns.
+        """
+        return self.table.column_names
+    def __eq__(self, other):
+        return self.equals(other)
+    def __getitem__(self, i):
+        return self.table[i]
+    def __len__(self):
+        return len(self.table)
+    def __repr__(self):
+        return self.table.__repr__().replace("pyarrow.Table", self.__class__.__name__)
+    def __str__(self):
+        return self.table.__str__().replace("pyarrow.Table", self.__class__.__name__)
+    def slice(self, *args, **kwargs):
+        """
+        Compute zero-copy slice of this Table.
+        Args:
+            offset (`int`, defaults to `0`):
+                Offset from start of table to slice.
+            length (`int`, defaults to `None`):
+                Length of slice (default is until end of table starting from
+                offset).
+        Returns:
+            `datasets.table.Table`
+        """
+        raise NotImplementedError()
+    def filter(self, *args, **kwargs):
+        """
+        Select records from a Table. See `pyarrow.compute.filter` for full usage.
+        """
+        raise NotImplementedError()
+    def flatten(self, *args, **kwargs):
+        """
+        Flatten this Table.  Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        raise NotImplementedError()
+    def combine_chunks(self, *args, **kwargs):
+        """
+        Make a new table by combining the chunks this table has.
+        All the underlying chunks in the `ChunkedArray` of each column are
+        concatenated into zero or one chunk.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        raise NotImplementedError()
+    def cast(self, *args, **kwargs):
+        """
+        Cast table values to another schema.
+        Args:
+            target_schema (`Schema`):
+                Schema to cast to, the names and order of fields must match.
+            safe (`bool`, defaults to `True`):
+                Check for overflows or other unsafe conversions.
+        Returns:
+            `datasets.table.Table`
+        """
+        raise NotImplementedError()
+    def replace_schema_metadata(self, *args, **kwargs):
+        """
+        EXPERIMENTAL: Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be None,
+        which deletes any existing metadata
+        Args:
+            metadata (`dict`, defaults to `None`):
+        Returns:
+            `datasets.table.Table`: shallow_copy
+        """
+        raise NotImplementedError()
+    def add_column(self, *args, **kwargs):
+        """
+        Add column to Table at position.
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`: New table with the passed column added.
+        """
+        raise NotImplementedError()
+    def append_column(self, *args, **kwargs):
+        """
+        Append column at end of columns.
+        Args:
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:  New table with the passed column added.
+        """
+        raise NotImplementedError()
+    def remove_column(self, *args, **kwargs):
+        """
+        Create new Table with the indicated column removed.
+        Args:
+            i (`int`):
+                Index of column to remove.
+        Returns:
+            `datasets.table.Table`: New table without the column.
+        """
+        raise NotImplementedError()
+    def set_column(self, *args, **kwargs):
+        """
+        Replace column in Table at position.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`: New table with the passed column set.
+        """
+        raise NotImplementedError()
+    def rename_columns(self, *args, **kwargs):
+        """
+        Create new table with columns renamed to provided names.
+        """
+        raise NotImplementedError()
+    def drop(self, *args, **kwargs):
+        """
+        Drop one or more columns and return a new table.
+        Args:
+            columns (`List[str]`):
+                List of field names referencing existing columns.
+        Raises:
+            `KeyError` : if any of the passed columns name are not existing.
+        Returns:
+            `datasets.table.Table`: New table without the columns.
+        """
+        raise NotImplementedError()
+    def select(self, *args, **kwargs):
+        """
+        Select columns of the table.
+        Returns a new table with the specified columns, and metadata preserved.
+        Args:
+            columns (:obj:`Union[List[str], List[int]]`):
+                The column names or integer indices to select.
+        Returns:
+            `datasets.table.Table`: table with only a subset of the columns
+        """
+        raise NotImplementedError()
+class TableBlock(Table):
+    """
+    `TableBlock` is the allowed class inside a `ConcanetationTable`.
+    Only `MemoryMappedTable` and `InMemoryTable` are `TableBlock`.
+    This is because we don't want a `ConcanetationTable` made out of other `ConcanetationTables`.
+    """
+    pass
+class InMemoryTable(TableBlock):
+    """
+    The table is said in-memory when it is loaded into the user's RAM.
+    Pickling it does copy all the data using memory.
+    Its implementation is simple and uses the underlying pyarrow Table methods directly.
+    This is different from the `MemoryMapped` table, for which pickling doesn't copy all the
+    data in memory. For a `MemoryMapped`, unpickling instead reloads the table from the disk.
+    `InMemoryTable` must be used when data fit in memory, while `MemoryMapped` are reserved for
+    data bigger than memory or when you want the memory footprint of your application to
+    stay low.
+    """
+    @classmethod
+    def from_file(cls, filename: str):
+        table = _in_memory_arrow_table_from_file(filename)
+        return cls(table)
+    @classmethod
+    def from_buffer(cls, buffer: pa.Buffer):
+        table = _in_memory_arrow_table_from_buffer(buffer)
+        return cls(table)
+    @classmethod
+    def from_pandas(cls, *args, **kwargs):
+        """
+        Convert pandas.DataFrame to an Arrow Table.
+        The column types in the resulting Arrow Table are inferred from the
+        dtypes of the pandas.Series in the DataFrame. In the case of non-object
+        Series, the NumPy dtype is translated to its Arrow equivalent. In the
+        case of `object`, we need to guess the datatype by looking at the
+        Python objects in this Series.
+        Be aware that Series of the `object` dtype don't carry enough
+        information to always lead to a meaningful Arrow type. In the case that
+        we cannot infer a type, e.g. because the DataFrame is of length 0 or
+        the Series only contains `None/nan` objects, the type is set to
+        null. This behavior can be avoided by constructing an explicit schema
+        and passing it to this function.
+        Args:
+            df (`pandas.DataFrame`):
+            schema (`pyarrow.Schema`, *optional*):
+                The expected schema of the Arrow Table. This can be used to
+                indicate the type of columns if we cannot infer it automatically.
+                If passed, the output will have exactly this schema. Columns
+                specified in the schema that are not found in the DataFrame columns
+                or its index will raise an error. Additional columns or index
+                levels in the DataFrame which are not specified in the schema will
+                be ignored.
+            preserve_index (`bool`, *optional*):
+                Whether to store the index as an additional column in the resulting
+                `Table`. The default of None will store the index as a column,
+                except for RangeIndex which is stored as metadata only. Use
+                `preserve_index=True` to force it to be stored as a column.
+            nthreads (`int`, defaults to `None` (may use up to system CPU count threads))
+                If greater than 1, convert columns to Arrow in parallel using
+                indicated number of threads.
+            columns (`List[str]`, *optional*):
+               List of column to be converted. If `None`, use all columns.
+            safe (`bool`, defaults to `True`):
+               Check for overflows or other unsafe conversions,
+        Returns:
+            `datasets.table.Table`:
+        Examples:
+        ```python
+        >>> import pandas as pd
+        >>> import pyarrow as pa
+        >>> df = pd.DataFrame({
+            ...     'int': [1, 2],
+            ...     'str': ['a', 'b']
+            ... })
+        >>> pa.Table.from_pandas(df)
+        <pyarrow.lib.Table object at 0x7f05d1fb1b40>
+        ```
+        """
+        return cls(pa.Table.from_pandas(*args, **kwargs))
+    @classmethod
+    def from_arrays(cls, *args, **kwargs):
+        """
+        Construct a Table from Arrow arrays.
+        Args:
+            arrays (`List[Union[pyarrow.Array, pyarrow.ChunkedArray]]`):
+                Equal-length arrays that should form the table.
+            names (`List[str]`, *optional*):
+                Names for the table columns. If not passed, schema must be passed.
+            schema (`Schema`, defaults to `None`):
+                Schema for the created table. If not passed, names must be passed.
+            metadata (`Union[dict, Mapping]`, defaults to `None`):
+                Optional metadata for the schema (if inferred).
+        Returns:
+            `datasets.table.Table`
+        """
+        return cls(pa.Table.from_arrays(*args, **kwargs))
+    @classmethod
+    def from_pydict(cls, *args, **kwargs):
+        """
+        Construct a Table from Arrow arrays or columns.
+        Args:
+            mapping (`Union[dict, Mapping]`):
+                A mapping of strings to Arrays or Python lists.
+            schema (`Schema`, defaults to `None`):
+                If not passed, will be inferred from the Mapping values
+            metadata (`Union[dict, Mapping]`, defaults to `None`):
+                Optional metadata for the schema (if inferred).
+        Returns:
+            `datasets.table.Table`
+        """
+        return cls(pa.Table.from_pydict(*args, **kwargs))
+    @classmethod
+    def from_pylist(cls, mapping, *args, **kwargs):
+        """
+        Construct a Table from list of rows / dictionaries.
+        Args:
+            mapping (`List[dict]`):
+                A mapping of strings to row values.
+            schema (`Schema`, defaults to `None`):
+                If not passed, will be inferred from the Mapping values
+            metadata (`Union[dict, Mapping]`, defaults to `None`):
+                Optional metadata for the schema (if inferred).
+        Returns:
+            `datasets.table.Table`
+        """
+        return cls(pa.Table.from_pylist(mapping, *args, **kwargs))
+    @classmethod
+    def from_batches(cls, *args, **kwargs):
+        """
+        Construct a Table from a sequence or iterator of Arrow `RecordBatches`.
+        Args:
+            batches (`Union[Sequence[pyarrow.RecordBatch], Iterator[pyarrow.RecordBatch]]`):
+                Sequence of `RecordBatch` to be converted, all schemas must be equal.
+            schema (`Schema`, defaults to `None`):
+                If not passed, will be inferred from the first `RecordBatch`.
+        Returns:
+            `datasets.table.Table`:
+        """
+        return cls(pa.Table.from_batches(*args, **kwargs))
+    def slice(self, offset=0, length=None):
+        """
+        Compute zero-copy slice of this Table.
+        Args:
+            offset (`int`, defaults to `0`):
+                Offset from start of table to slice.
+            length (`int`, defaults to `None`):
+                Length of slice (default is until end of table starting from
+                offset).
+        Returns:
+            `datasets.table.Table`
+        """
+        # Use fast slicing here
+        return InMemoryTable(self.fast_slice(offset=offset, length=length))
+    def filter(self, *args, **kwargs):
+        """
+        Select records from a Table. See `pyarrow.compute.filter` for full usage.
+        """
+        return InMemoryTable(self.table.filter(*args, **kwargs))
+    def flatten(self, *args, **kwargs):
+        """
+        Flatten this Table.  Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        return InMemoryTable(table_flatten(self.table, *args, **kwargs))
+    def combine_chunks(self, *args, **kwargs):
+        """
+        Make a new table by combining the chunks this table has.
+        All the underlying chunks in the `ChunkedArray` of each column are
+        concatenated into zero or one chunk.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        return InMemoryTable(self.table.combine_chunks(*args, **kwargs))
+    def cast(self, *args, **kwargs):
+        """
+        Cast table values to another schema.
+        Args:
+            target_schema (`Schema`):
+                Schema to cast to, the names and order of fields must match.
+            safe (`bool`, defaults to `True`):
+                Check for overflows or other unsafe conversions.
+        Returns:
+            `datasets.table.Table`
+        """
+        return InMemoryTable(table_cast(self.table, *args, **kwargs))
+    def replace_schema_metadata(self, *args, **kwargs):
+        """
+        EXPERIMENTAL: Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be `None`,
+        which deletes any existing metadata).
+        Args:
+            metadata (`dict`, defaults to `None`):
+        Returns:
+            `datasets.table.Table`: shallow_copy
+        """
+        return InMemoryTable(self.table.replace_schema_metadata(*args, **kwargs))
+    def add_column(self, *args, **kwargs):
+        """
+        Add column to Table at position.
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`: New table with the passed column added.
+        """
+        return InMemoryTable(self.table.add_column(*args, **kwargs))
+    def append_column(self, *args, **kwargs):
+        """
+        Append column at end of columns.
+        Args:
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:
+                New table with the passed column added.
+        """
+        return InMemoryTable(self.table.append_column(*args, **kwargs))
+    def remove_column(self, *args, **kwargs):
+        """
+        Create new Table with the indicated column removed.
+        Args:
+            i (`int`):
+                Index of column to remove.
+        Returns:
+            `datasets.table.Table`:
+                New table without the column.
+        """
+        return InMemoryTable(self.table.remove_column(*args, **kwargs))
+    def set_column(self, *args, **kwargs):
+        """
+        Replace column in Table at position.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:
+                New table with the passed column set.
+        """
+        return InMemoryTable(self.table.set_column(*args, **kwargs))
+    def rename_columns(self, *args, **kwargs):
+        """
+        Create new table with columns renamed to provided names.
+        """
+        return InMemoryTable(self.table.rename_columns(*args, **kwargs))
+    def drop(self, *args, **kwargs):
+        """
+        Drop one or more columns and return a new table.
+        Args:
+            columns (`List[str]`):
+                List of field names referencing existing columns.
+        Raises:
+            `KeyError` : if any of the passed columns name are not existing.
+        Returns:
+            `datasets.table.Table`:
+                New table without the columns.
+        """
+        return InMemoryTable(self.table.drop(*args, **kwargs))
+    def select(self, *args, **kwargs):
+        """
+        Select columns of the table.
+        Returns a new table with the specified columns, and metadata preserved.
+        Args:
+            columns (:obj:`Union[List[str], List[int]]`):
+                The column names or integer indices to select.
+        Returns:
+            :class:`datasets.table.Table`: New table with the specified columns, and metadata preserved.
+        """
+        return InMemoryTable(self.table.select(*args, **kwargs))
+# The MemoryMappedTable needs replays to properly reload tables from the disk
+Replay = Tuple[str, tuple, dict]
+class MemoryMappedTable(TableBlock):
+    """
+    The table is said memory mapped when it doesn't use the user's RAM but loads the data
+    from the disk instead.
+    Pickling it doesn't copy the data into memory.
+    Instead, only the path to the memory mapped arrow file is pickled, as well as the list
+    of transforms to "replay" when reloading the table from the disk.
+    Its implementation requires to store an history of all the transforms that were applied
+    to the underlying pyarrow Table, so that they can be "replayed" when reloading the Table
+    from the disk.
+    This is different from the `InMemoryTable` table, for which pickling does copy all the
+    data in memory.
+    `InMemoryTable` must be used when data fit in memory, while `MemoryMapped` are reserved for
+    data bigger than memory or when you want the memory footprint of your application to
+    stay low.
+    """
+    def __init__(self, table: pa.Table, path: str, replays: Optional[List[Replay]] = None):
+        super().__init__(table)
+        self.path = os.path.abspath(path)
+        self.replays: List[Replay] = replays if replays is not None else []
+    @classmethod
+    def from_file(cls, filename: str, replays=None):
+        table = _memory_mapped_arrow_table_from_file(filename)
+        table = cls._apply_replays(table, replays)
+        return cls(table, filename, replays)
+    def __getstate__(self):
+        return {"path": self.path, "replays": self.replays}
+    def __setstate__(self, state):
+        path = state["path"]
+        replays = state["replays"]
+        table = _memory_mapped_arrow_table_from_file(path)
+        table = self._apply_replays(table, replays)
+        MemoryMappedTable.__init__(self, table, path=path, replays=replays)
+    @staticmethod
+    def _apply_replays(table: pa.Table, replays: Optional[List[Replay]] = None) -> pa.Table:
+        if replays is not None:
+            for name, args, kwargs in replays:
+                if name == "cast":
+                    table = table_cast(table, *args, **kwargs)
+                elif name == "flatten":
+                    table = table_flatten(table, *args, **kwargs)
+                else:
+                    table = getattr(table, name)(*args, **kwargs)
+        return table
+    def _append_replay(self, replay: Replay) -> List[Replay]:
+        replays = copy.deepcopy(self.replays)
+        replays.append(replay)
+        return replays
+    def slice(self, offset=0, length=None):
+        """
+        Compute zero-copy slice of this Table.
+        Args:
+            offset (`int`, defaults to `0`):
+                Offset from start of table to slice.
+            length (`int`, defaults to `None`):
+                Length of slice (default is until end of table starting from
+                offset).
+        Returns:
+            `datasets.table.Table`
+        """
+        replay = ("slice", (offset, length), {})
+        replays = self._append_replay(replay)
+        # Use fast slicing here
+        return MemoryMappedTable(self.fast_slice(offset=offset, length=length), self.path, replays)
+    def filter(self, *args, **kwargs):
+        """
+        Select records from a Table. See `pyarrow.compute.filter` for full usage.
+        """
+        replay = ("filter", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.filter(*args, **kwargs), self.path, replays)
+    def flatten(self, *args, **kwargs):
+        """
+        Flatten this Table.  Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        replay = ("flatten", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(table_flatten(self.table, *args, **kwargs), self.path, replays)
+    def combine_chunks(self, *args, **kwargs):
+        """
+        Make a new table by combining the chunks this table has.
+        All the underlying chunks in the ChunkedArray of each column are
+        concatenated into zero or one chunk.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        replay = ("combine_chunks", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.combine_chunks(*args, **kwargs), self.path, replays)
+    def cast(self, *args, **kwargs):
+        """
+        Cast table values to another schema
+        Args:
+            target_schema (`Schema`):
+                Schema to cast to, the names and order of fields must match.
+            safe (`bool`, defaults to `True`):
+                Check for overflows or other unsafe conversions.
+        Returns:
+            `datasets.table.Table`
+        """
+        replay = ("cast", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(table_cast(self.table, *args, **kwargs), self.path, replays)
+    def replace_schema_metadata(self, *args, **kwargs):
+        """
+        EXPERIMENTAL: Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be None,
+        which deletes any existing metadata.
+        Args:
+            metadata (`dict`, defaults to `None`):
+        Returns:
+            `datasets.table.Table`: shallow_copy
+        """
+        replay = ("replace_schema_metadata", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.replace_schema_metadata(*args, **kwargs), self.path, replays)
+    def add_column(self, *args, **kwargs):
+        """
+        Add column to Table at position.
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`: New table with the passed column added.
+        """
+        replay = ("add_column", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.add_column(*args, **kwargs), self.path, replays)
+    def append_column(self, *args, **kwargs):
+        """
+        Append column at end of columns.
+        Args:
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:
+                New table with the passed column added.
+        """
+        replay = ("append_column", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.append_column(*args, **kwargs), self.path, replays)
+    def remove_column(self, *args, **kwargs):
+        """
+        Create new Table with the indicated column removed.
+        Args:
+            i (`int`):
+                Index of column to remove.
+        Returns:
+            `datasets.table.Table`:
+                New table without the column.
+        """
+        replay = ("remove_column", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.remove_column(*args, **kwargs), self.path, replays)
+    def set_column(self, *args, **kwargs):
+        """
+        Replace column in Table at position.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:
+                New table with the passed column set.
+        """
+        replay = ("set_column", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.set_column(*args, **kwargs), self.path, replays)
+    def rename_columns(self, *args, **kwargs):
+        """
+        Create new table with columns renamed to provided names.
+        """
+        replay = ("rename_columns", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.rename_columns(*args, **kwargs), self.path, replays)
+    def drop(self, *args, **kwargs):
+        """
+        Drop one or more columns and return a new table.
+        Args:
+            columns (`List[str]`):
+                List of field names referencing existing columns.
+        Raises:
+            `KeyError` : if any of the passed columns name are not existing.
+        Returns:
+            `datasets.table.Table`:
+                New table without the columns.
+        """
+        replay = ("drop", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.drop(*args, **kwargs), self.path, replays)
+    def select(self, *args, **kwargs):
+        """
+        Select columns of the table.
+        Returns a new table with the specified columns, and metadata preserved.
+        Args:
+            columns (:obj:`Union[List[str], List[int]]`):
+                The column names or integer indices to select.
+        Returns:
+            :class:`datasets.table.Table`: New table with the specified columns, and metadata preserved.
+        """
+        replay = ("select", copy.deepcopy(args), copy.deepcopy(kwargs))
+        replays = self._append_replay(replay)
+        return MemoryMappedTable(self.table.select(*args, **kwargs), self.path, replays)
+# A ConcatenationTable is the concatenation of several tables.
+# The ``blocks`` attributes stores a list of list of blocks.
+# The first axis concatenates the tables along the axis 0 (it appends rows),
+# while the second axis concatenates tables along the axis 1 (it appends columns).
+TableBlockContainer = TypeVar("TableBlockContainer", TableBlock, List[TableBlock], List[List[TableBlock]])
+class ConcatenationTable(Table):
+    """
+    The table comes from the concatenation of several tables called blocks.
+    It enables concatenation on both axis 0 (append rows) and axis 1 (append columns).
+    The underlying tables are called "blocks" and can be either `InMemoryTable`
+    or `MemoryMappedTable` objects.
+    This allows to combine tables that come from memory or that are memory mapped.
+    When a `ConcatenationTable` is pickled, then each block is pickled:
+    - the `InMemoryTable` objects are pickled by copying all the data in memory.
+    - the MemoryMappedTable objects are pickled without copying the data into memory.
+    Instead, only the path to the memory mapped arrow file is pickled, as well as the list
+    of transforms to "replays" when reloading the table from the disk.
+    Its implementation requires to store each block separately.
+    The `blocks` attributes stores a list of list of blocks.
+    The first axis concatenates the tables along the axis 0 (it appends rows),
+    while the second axis concatenates tables along the axis 1 (it appends columns).
+    If some columns are missing when concatenating on axis 0, they are filled with null values.
+    This is done using `pyarrow.concat_tables(tables, promote=True)`.
+    You can access the fully combined table by accessing the `ConcatenationTable.table` attribute,
+    and the blocks by accessing the `ConcatenationTable.blocks` attribute.
+    """
+    def __init__(self, table: pa.Table, blocks: List[List[TableBlock]]):
+        super().__init__(table)
+        self.blocks = blocks
+        # Check that all the blocks have the right type.
+        # Only InMemoryTable and MemoryMappedTable are allowed.
+        for subtables in blocks:
+            for subtable in subtables:
+                if not isinstance(subtable, TableBlock):
+                    raise TypeError(
+                        "The blocks of a ConcatenationTable must be InMemoryTable or MemoryMappedTable objects"
+                        f", but got {_short_str(subtable)}."
+                    )
+    def __getstate__(self):
+        return {"blocks": self.blocks, "schema": self.table.schema}
+    def __setstate__(self, state):
+        blocks = state["blocks"]
+        schema = state["schema"]
+        table = self._concat_blocks_horizontally_and_vertically(blocks)
+        if schema is not None and table.schema != schema:
+            # We fix the columns by concatenating with an empty table with the right columns
+            empty_table = pa.Table.from_batches([], schema=schema)
+            # we set promote=True to fill missing columns with null values
+            if config.PYARROW_VERSION.major < 14:
+                table = pa.concat_tables([table, empty_table], promote=True)
+            else:
+                table = pa.concat_tables([table, empty_table], promote_options="default")
+        ConcatenationTable.__init__(self, table, blocks=blocks)
+    @staticmethod
+    def _concat_blocks(blocks: List[Union[TableBlock, pa.Table]], axis: int = 0) -> pa.Table:
+        pa_tables = [table.table if hasattr(table, "table") else table for table in blocks]
+        if axis == 0:
+            # we set promote=True to fill missing columns with null values
+            if config.PYARROW_VERSION.major < 14:
+                return pa.concat_tables(pa_tables, promote=True)
+            else:
+                return pa.concat_tables(pa_tables, promote_options="default")
+        elif axis == 1:
+            for i, table in enumerate(pa_tables):
+                if i == 0:
+                    pa_table = table
+                else:
+                    for name, col in zip(table.column_names, table.columns):
+                        pa_table = pa_table.append_column(name, col)
+            return pa_table
+        else:
+            raise ValueError("'axis' must be either 0 or 1")
+    @classmethod
+    def _concat_blocks_horizontally_and_vertically(cls, blocks: List[List[TableBlock]]) -> pa.Table:
+        pa_tables_to_concat_vertically = []
+        for i, tables in enumerate(blocks):
+            if not tables:
+                continue
+            pa_table_horizontally_concatenated = cls._concat_blocks(tables, axis=1)
+            pa_tables_to_concat_vertically.append(pa_table_horizontally_concatenated)
+        return cls._concat_blocks(pa_tables_to_concat_vertically, axis=0)
+    @classmethod
+    def _merge_blocks(cls, blocks: TableBlockContainer, axis: Optional[int] = None) -> TableBlockContainer:
+        if axis is not None:
+            merged_blocks = []
+            for is_in_memory, block_group in groupby(blocks, key=lambda x: isinstance(x, InMemoryTable)):
+                if is_in_memory:
+                    block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
+                merged_blocks += list(block_group)
+        else:  # both
+            merged_blocks = [cls._merge_blocks(row_block, axis=1) for row_block in blocks]
+            if all(len(row_block) == 1 for row_block in merged_blocks):
+                merged_blocks = cls._merge_blocks(
+                    [block for row_block in merged_blocks for block in row_block], axis=0
+                )
+        return merged_blocks
+    @classmethod
+    def _consolidate_blocks(cls, blocks: TableBlockContainer) -> TableBlockContainer:
+        if isinstance(blocks, TableBlock):
+            return blocks
+        elif isinstance(blocks[0], TableBlock):
+            return cls._merge_blocks(blocks, axis=0)
+        else:
+            return cls._merge_blocks(blocks)
+    @classmethod
+    def from_blocks(cls, blocks: TableBlockContainer) -> "ConcatenationTable":
+        blocks = cls._consolidate_blocks(blocks)
+        if isinstance(blocks, TableBlock):
+            table = blocks
+            return cls(table.table, [[table]])
+        elif isinstance(blocks[0], TableBlock):
+            table = cls._concat_blocks(blocks, axis=0)
+            blocks = [[t] for t in blocks]
+            return cls(table, blocks)
+        else:
+            table = cls._concat_blocks_horizontally_and_vertically(blocks)
+            return cls(table, blocks)
+    @classmethod
+    def from_tables(cls, tables: List[Union[pa.Table, Table]], axis: int = 0) -> "ConcatenationTable":
+        """Create `ConcatenationTable` from list of tables.
+        Args:
+            tables (list of `Table` or list of `pyarrow.Table`):
+                List of tables.
+            axis (`{0, 1}`, defaults to `0`, meaning over rows):
+                Axis to concatenate over, where `0` means over rows (vertically) and `1` means over columns
+                (horizontally).
+                <Added version="1.6.0"/>
+        """
+        def to_blocks(table: Union[pa.Table, Table]) -> List[List[TableBlock]]:
+            if isinstance(table, pa.Table):
+                return [[InMemoryTable(table)]]
+            elif isinstance(table, ConcatenationTable):
+                return copy.deepcopy(table.blocks)
+            else:
+                return [[table]]
+        def _slice_row_block(row_block: List[TableBlock], length: int) -> Tuple[List[TableBlock], List[TableBlock]]:
+            sliced = [table.slice(0, length) for table in row_block]
+            remainder = [table.slice(length, len(row_block[0]) - length) for table in row_block]
+            return sliced, remainder
+        def _split_both_like(
+            result: List[List[TableBlock]], blocks: List[List[TableBlock]]
+        ) -> Tuple[List[List[TableBlock]], List[List[TableBlock]]]:
+            """
+            Make sure each row_block contain the same num_rows to be able to concatenate them on axis=1.
+            To do so, we modify both blocks sets to have the same row_blocks boundaries.
+            For example, if `result` has 2 row_blocks of 3 rows and `blocks` has 3 row_blocks of 2 rows,
+            we modify both to have 4 row_blocks of size 2, 1, 1 and 2:
+                    [ x   x   x | x   x   x ]
+                +   [ y   y | y   y | y   y ]
+                -----------------------------
+                =   [ x   x | x | x | x   x ]
+                    [ y   y | y | y | y   y ]
+            """
+            result, blocks = list(result), list(blocks)
+            new_result, new_blocks = [], []
+            while result and blocks:
+                # we slice the longest row block to save two row blocks of same length
+                # and we replace the long row block by its remainder if necessary
+                if len(result[0][0]) > len(blocks[0][0]):
+                    new_blocks.append(blocks[0])
+                    sliced, result[0] = _slice_row_block(result[0], len(blocks.pop(0)[0]))
+                    new_result.append(sliced)
+                elif len(result[0][0]) < len(blocks[0][0]):
+                    new_result.append(result[0])
+                    sliced, blocks[0] = _slice_row_block(blocks[0], len(result.pop(0)[0]))
+                    new_blocks.append(sliced)
+                else:
+                    new_result.append(result.pop(0))
+                    new_blocks.append(blocks.pop(0))
+            if result or blocks:
+                raise ValueError("Failed to concatenate on axis=1 because tables don't have the same number of rows")
+            return new_result, new_blocks
+        def _extend_blocks(
+            result: List[List[TableBlock]], blocks: List[List[TableBlock]], axis: int = 0
+        ) -> List[List[TableBlock]]:
+            if axis == 0:
+                result.extend(blocks)
+            elif axis == 1:
+                # We make sure each row_block have the same num_rows
+                result, blocks = _split_both_like(result, blocks)
+                for i, row_block in enumerate(blocks):
+                    result[i].extend(row_block)
+            return result
+        blocks = to_blocks(tables[0])
+        for table in tables[1:]:
+            table_blocks = to_blocks(table)
+            blocks = _extend_blocks(blocks, table_blocks, axis=axis)
+        return cls.from_blocks(blocks)
+    @property
+    def _slices(self):
+        offset = 0
+        for tables in self.blocks:
+            length = len(tables[0])
+            yield (offset, length)
+            offset += length
+    def slice(self, offset=0, length=None):
+        """
+        Compute zero-copy slice of this Table.
+        Args:
+            offset (`int`, defaults to `0`):
+                Offset from start of table to slice.
+            length (`int`, defaults to `None`):
+                Length of slice (default is until end of table starting from
+                offset).
+        Returns:
+            `datasets.table.Table`
+        """
+        table = self.table.slice(offset, length=length)
+        length = length if length is not None else self.num_rows - offset
+        blocks = []
+        for tables in self.blocks:
+            n_rows = len(tables[0])
+            if length == 0:
+                break
+            elif n_rows <= offset:
+                offset = offset - n_rows
+            elif n_rows <= offset + length:
+                blocks.append([t.slice(offset) for t in tables])
+                length, offset = length + offset - n_rows, 0
+            else:
+                blocks.append([t.slice(offset, length) for t in tables])
+                length, offset = 0, 0
+        return ConcatenationTable(table, blocks)
+    def filter(self, mask, *args, **kwargs):
+        """
+        Select records from a Table. See `pyarrow.compute.filter` for full usage.
+        """
+        table = self.table.filter(mask, *args, **kwargs)
+        blocks = []
+        for (offset, length), tables in zip(self._slices, self.blocks):
+            submask = mask.slice(offset, length)
+            blocks.append([t.filter(submask, *args, **kwargs) for t in tables])
+        return ConcatenationTable(table, blocks)
+    def flatten(self, *args, **kwargs):
+        """
+        Flatten this Table.  Each column with a struct type is flattened
+        into one column per struct field.  Other columns are left unchanged.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        table = table_flatten(self.table, *args, **kwargs)
+        blocks = []
+        for tables in self.blocks:
+            blocks.append([t.flatten(*args, **kwargs) for t in tables])
+        return ConcatenationTable(table, blocks)
+    def combine_chunks(self, *args, **kwargs):
+        """
+        Make a new table by combining the chunks this table has.
+        All the underlying chunks in the `ChunkedArray` of each column are
+        concatenated into zero or one chunk.
+        Args:
+            memory_pool (`MemoryPool`, defaults to `None`):
+                For memory allocations, if required, otherwise use default pool.
+        Returns:
+            `datasets.table.Table`
+        """
+        table = self.table.combine_chunks(*args, **kwargs)
+        blocks = []
+        for tables in self.blocks:
+            blocks.append([t.combine_chunks(*args, **kwargs) for t in tables])
+        return ConcatenationTable(table, blocks)
+    def cast(self, target_schema, *args, **kwargs):
+        """
+        Cast table values to another schema.
+        Args:
+            target_schema (`Schema`):
+                Schema to cast to, the names and order of fields must match.
+            safe (`bool`, defaults to `True`):
+                Check for overflows or other unsafe conversions.
+        Returns:
+            `datasets.table.Table`
+        """
+        from .features import Features
+        table = table_cast(self.table, target_schema, *args, **kwargs)
+        target_features = Features.from_arrow_schema(target_schema)
+        blocks = []
+        for subtables in self.blocks:
+            new_tables = []
+            fields = list(target_schema)
+            for subtable in subtables:
+                subfields = []
+                for name in subtable.column_names:
+                    subfields.append(fields.pop(next(i for i, field in enumerate(fields) if field.name == name)))
+                subfeatures = Features({subfield.name: target_features[subfield.name] for subfield in subfields})
+                subschema = subfeatures.arrow_schema
+                new_tables.append(subtable.cast(subschema, *args, **kwargs))
+            blocks.append(new_tables)
+        return ConcatenationTable(table, blocks)
+    def replace_schema_metadata(self, *args, **kwargs):
+        """
+        EXPERIMENTAL: Create shallow copy of table by replacing schema
+        key-value metadata with the indicated new metadata (which may be `None`,
+        which deletes any existing metadata).
+        Args:
+            metadata (`dict`, defaults to `None`):
+        Returns:
+            `datasets.table.Table`: shallow_copy
+        """
+        table = self.table.replace_schema_metadata(*args, **kwargs)
+        blocks = []
+        for tables in self.blocks:
+            blocks.append([t.replace_schema_metadata(*args, **kwargs) for t in tables])
+        return ConcatenationTable(table, self.blocks)
+    def add_column(self, *args, **kwargs):
+        """
+        Add column to Table at position.
+        A new table is returned with the column added, the original table
+        object is left unchanged.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`: New table with the passed column added.
+        """
+        raise NotImplementedError()
+    def append_column(self, *args, **kwargs):
+        """
+        Append column at end of columns.
+        Args:
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:
+                New table with the passed column added.
+        """
+        raise NotImplementedError()
+    def remove_column(self, i, *args, **kwargs):
+        """
+        Create new Table with the indicated column removed.
+        Args:
+            i (`int`):
+                Index of column to remove.
+        Returns:
+            `datasets.table.Table`:
+                New table without the column.
+        """
+        table = self.table.remove_column(i, *args, **kwargs)
+        name = self.table.column_names[i]
+        blocks = []
+        for tables in self.blocks:
+            blocks.append(
+                [
+                    t.remove_column(t.column_names.index(name), *args, **kwargs) if name in t.column_names else t
+                    for t in tables
+                ]
+            )
+        return ConcatenationTable(table, blocks)
+    def set_column(self, *args, **kwargs):
+        """
+        Replace column in Table at position.
+        Args:
+            i (`int`):
+                Index to place the column at.
+            field_ (`Union[str, pyarrow.Field]`):
+                If a string is passed then the type is deduced from the column
+                data.
+            column (`Union[pyarrow.Array, List[pyarrow.Array]]`):
+                Column data.
+        Returns:
+            `datasets.table.Table`:
+                New table with the passed column set.
+        """
+        raise NotImplementedError()
+    def rename_columns(self, names, *args, **kwargs):
+        """
+        Create new table with columns renamed to provided names.
+        """
+        table = self.table.rename_columns(names, *args, **kwargs)
+        names = dict(zip(self.table.column_names, names))
+        blocks = []
+        for tables in self.blocks:
+            blocks.append(
+                [t.rename_columns([names[name] for name in t.column_names], *args, **kwargs) for t in tables]
+            )
+        return ConcatenationTable(table, blocks)
+    def drop(self, columns, *args, **kwargs):
+        """
+        Drop one or more columns and return a new table.
+        Args:
+            columns (`List[str]`):
+                List of field names referencing existing columns.
+        Raises:
+            `KeyError` : if any of the passed columns name are not existing.
+        Returns:
+            `datasets.table.Table`:
+                New table without the columns.
+        """
+        table = self.table.drop(columns, *args, **kwargs)
+        blocks = []
+        for tables in self.blocks:
+            blocks.append([t.drop([c for c in columns if c in t.column_names], *args, **kwargs) for t in tables])
+        return ConcatenationTable(table, blocks)
+    def select(self, columns, *args, **kwargs):
+        """
+        Select columns of the table.
+        Returns a new table with the specified columns, and metadata preserved.
+        Args:
+            columns (:obj:`Union[List[str], List[int]]`):
+                The column names or integer indices to select.
+        Returns:
+            :class:`datasets.table.Table`: New table with the specified columns, and metadata preserved.
+        """
+        table = self.table.select(columns, *args, **kwargs)
+        blocks = []
+        for tables in self.blocks:
+            blocks.append([t.select([c for c in columns if c in t.column_names], *args, **kwargs) for t in tables])
+        return ConcatenationTable(table, blocks)
+def concat_tables(tables: List[Table], axis: int = 0) -> Table:
+    """
+    Concatenate tables.
+    Args:
+        tables (list of `Table`):
+            List of tables to be concatenated.
+        axis (`{0, 1}`, defaults to `0`, meaning over rows):
+            Axis to concatenate over, where `0` means over rows (vertically) and `1` means over columns
+            (horizontally).
+            <Added version="1.6.0"/>
+    Returns:
+        `datasets.table.Table`:
+            If the number of input tables is > 1, then the returned table is a `datasets.table.ConcatenationTable`.
+            Otherwise if there's only one table, it is returned as is.
+    """
+    tables = list(tables)
+    if len(tables) == 1:
+        return tables[0]
+    return ConcatenationTable.from_tables(tables, axis=axis)
+def list_table_cache_files(table: Table) -> List[str]:
+    """
+    Get the cache files that are loaded by the table.
+    Cache file are used when parts of the table come from the disk via memory mapping.
+    Returns:
+        `List[str]`:
+            A list of paths to the cache files loaded by the table.
+    """
+    if isinstance(table, ConcatenationTable):
+        cache_files = []
+        for subtables in table.blocks:
+            for subtable in subtables:
+                cache_files += list_table_cache_files(subtable)
+        return cache_files
+    elif isinstance(table, MemoryMappedTable):
+        return [table.path]
+    else:
+        return []
+def _wrap_for_chunked_arrays(func):
+    """Apply the function on each chunk of a `pyarrow.ChunkedArray`, or on the array directly"""
+    def wrapper(array, *args, **kwargs):
+        if isinstance(array, pa.ChunkedArray):
+            return pa.chunked_array([func(chunk, *args, **kwargs) for chunk in array.chunks])
+        else:
+            return func(array, *args, **kwargs)
+    return wrapper
+def _are_list_values_of_length(array: pa.ListArray, length: int) -> bool:
+    """Check if all the sub-lists of a `pa.ListArray` have the specified length."""
+    return pc.all(pc.equal(array.value_lengths(), length)).as_py() or array.null_count == len(array)
+def _combine_list_array_offsets_with_mask(array: pa.ListArray) -> pa.Array:
+    """Add the null bitmap to the offsets of a `pa.ListArray`."""
+    offsets = array.offsets
+    if array.null_count > 0:
+        offsets = pa.concat_arrays(
+            [
+                pc.replace_with_mask(offsets[:-1], array.is_null(), pa.nulls(len(array), pa.int32())),
+                offsets[-1:],
+            ]
+        )
+    return offsets
+def _storage_type(type: pa.DataType) -> pa.DataType:
+    """Convert a (possibly nested) `pa.ExtensionType` to its storage type."""
+    if isinstance(type, pa.ExtensionType):
+        return _storage_type(type.storage_type)
+    elif isinstance(type, pa.StructType):
+        return pa.struct([pa.field(field.name, _storage_type(field.type)) for field in type])
+    elif isinstance(type, pa.ListType):
+        return pa.list_(_storage_type(type.value_type))
+    elif isinstance(type, pa.FixedSizeListType):
+        return pa.list_(_storage_type(type.value_type), type.list_size)
+    return type
+def _short_str(value: Any) -> str:
+    out = str(value)
+    if len(out) > 3000:
+        out = out[:1500] + "\n...\n" + out[-1500:]
+    return out
+@_wrap_for_chunked_arrays
+def array_cast(
+    array: pa.Array, pa_type: pa.DataType, allow_primitive_to_str: bool = True, allow_decimal_to_str: bool = True
+) -> Union[pa.Array, pa.FixedSizeListArray, pa.ListArray, pa.StructArray, pa.ExtensionArray]:
+    """Improved version of `pa.Array.cast`
+    It supports casting `pa.StructArray` objects to re-order the fields.
+    It also let you control certain aspects of the casting, e.g. whether
+    to disable casting primitives (`booleans`, `floats` or `ints`) or
+    disable casting decimals to strings.
+    Args:
+        array (`pa.Array`):
+            PyArrow array to cast
+        pa_type (`pa.DataType`):
+            Target PyArrow type
+        allow_primitive_to_str (`bool`, defaults to `True`):
+            Whether to allow casting primitives to strings.
+            Defaults to `True`.
+        allow_decimal_to_str (`bool`, defaults to `True`):
+            Whether to allow casting decimals to strings.
+            Defaults to `True`.
+    Raises:
+        `pa.ArrowInvalidError`: if the arrow data casting fails
+        `TypeError`: if the target type is not supported according, e.g.
+            - if a field is missing
+            - if casting from primitives to strings and `allow_primitive_to_str` is `False`
+            - if casting from decimals to strings and `allow_decimal_to_str` is `False`
+    Returns:
+        `List[pyarrow.Array]`: the casted array
+    """
+    _c = partial(array_cast, allow_primitive_to_str=allow_primitive_to_str, allow_decimal_to_str=allow_decimal_to_str)
+    if isinstance(array, pa.ExtensionArray):
+        array = array.storage
+    if isinstance(pa_type, pa.ExtensionType):
+        return pa_type.wrap_array(_c(array, pa_type.storage_type))
+    elif array.type == pa_type:
+        return array
+    elif pa.types.is_struct(array.type):
+        if pa.types.is_struct(pa_type) and ({field.name for field in pa_type} == {field.name for field in array.type}):
+            if array.type.num_fields == 0:
+                return array
+            arrays = [_c(array.field(field.name), field.type) for field in pa_type]
+            return pa.StructArray.from_arrays(arrays, fields=list(pa_type), mask=array.is_null())
+    elif pa.types.is_list(array.type):
+        if pa.types.is_fixed_size_list(pa_type):
+            if _are_list_values_of_length(array, pa_type.list_size):
+                if array.null_count > 0:
+                    # Ensure each null value in the array translates to [null] * pa_type.list_size in the array's values array
+                    array_type = array.type
+                    storage_type = _storage_type(array_type)
+                    if array_type != storage_type:
+                        # Temporarily convert to the storage type to support extension types in the slice operation
+                        array = _c(array, storage_type)
+                        array = pc.list_slice(array, 0, pa_type.list_size, return_fixed_size_list=True)
+                        array = _c(array, array_type)
+                    else:
+                        array = pc.list_slice(array, 0, pa_type.list_size, return_fixed_size_list=True)
+                    array_values = array.values
+                    if config.PYARROW_VERSION.major < 15:
+                        return pa.Array.from_buffers(
+                            pa_type,
+                            len(array),
+                            [array.is_valid().buffers()[1]],
+                            children=[_c(array_values, pa_type.value_type)],
+                        )
+                    else:
+                        return pa.FixedSizeListArray.from_arrays(
+                            _c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null()
+                        )
+                else:
+                    array_values = array.values[
+                        array.offset * pa_type.length : (array.offset + len(array)) * pa_type.length
+                    ]
+                    return pa.FixedSizeListArray.from_arrays(_c(array_values, pa_type.value_type), pa_type.list_size)
+        elif pa.types.is_list(pa_type):
+            # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
+            array_offsets = _combine_list_array_offsets_with_mask(array)
+            return pa.ListArray.from_arrays(array_offsets, _c(array.values, pa_type.value_type))
+    elif pa.types.is_fixed_size_list(array.type):
+        if pa.types.is_fixed_size_list(pa_type):
+            if pa_type.list_size == array.type.list_size:
+                array_values = array.values[
+                    array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
+                ]
+                if config.PYARROW_VERSION.major < 15:
+                    return pa.Array.from_buffers(
+                        pa_type,
+                        len(array),
+                        [array.is_valid().buffers()[1]],
+                        children=[_c(array_values, pa_type.value_type)],
+                    )
+                else:
+                    return pa.FixedSizeListArray.from_arrays(
+                        _c(array_values, pa_type.value_type), pa_type.list_size, mask=array.is_null()
+                    )
+        elif pa.types.is_list(pa_type):
+            array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
+            return pa.ListArray.from_arrays(array_offsets, _c(array.values, pa_type.value_type), mask=array.is_null())
+    else:
+        if pa.types.is_string(pa_type):
+            if not allow_primitive_to_str and pa.types.is_primitive(array.type):
+                raise TypeError(
+                    f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)} "
+                    f"since allow_primitive_to_str is set to {allow_primitive_to_str} "
+                )
+            if not allow_decimal_to_str and pa.types.is_decimal(array.type):
+                raise TypeError(
+                    f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)} "
+                    f"and allow_decimal_to_str is set to {allow_decimal_to_str}"
+                )
+        if pa.types.is_null(pa_type) and not pa.types.is_null(array.type):
+            raise TypeError(f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)}")
+        return array.cast(pa_type)
+    raise TypeError(f"Couldn't cast array of type {_short_str(array.type)} to {_short_str(pa_type)}")
+@_wrap_for_chunked_arrays
+def cast_array_to_feature(
+    array: pa.Array, feature: "FeatureType", allow_primitive_to_str: bool = True, allow_decimal_to_str: bool = True
+) -> pa.Array:
+    """Cast an array to the arrow type that corresponds to the requested feature type.
+    For custom features like [`Audio`] or [`Image`], it takes into account the "cast_storage" methods
+    they defined to enable casting from other arrow types.
+    Args:
+        array (`pa.Array`):
+            The PyArrow array to cast.
+        feature (`datasets.features.FeatureType`):
+            The target feature type.
+        allow_primitive_to_str (`bool`, defaults to `True`):
+            Whether to allow casting primitives to strings.
+            Defaults to `True`.
+        allow_decimal_to_str (`bool`, defaults to `True`):
+            Whether to allow casting decimals to strings.
+            Defaults to `True`.
+    Raises:
+        `pa.ArrowInvalidError`: if the arrow data casting fails
+        `TypeError`: if the target type is not supported according, e.g.
+            - if a field is missing
+            - if casting from primitives and `allow_primitive_to_str` is `False`
+            - if casting from decimals and `allow_decimal_to_str` is `False`
+    Returns:
+        array (`pyarrow.Array`): the casted array
+    """
+    from .features.features import Sequence, get_nested_type
+    _c = partial(
+        cast_array_to_feature,
+        allow_primitive_to_str=allow_primitive_to_str,
+        allow_decimal_to_str=allow_decimal_to_str,
+    )
+    if isinstance(array, pa.ExtensionArray):
+        array = array.storage
+    if hasattr(feature, "cast_storage"):
+        return feature.cast_storage(array)
+    elif pa.types.is_struct(array.type):
+        # feature must be a dict or Sequence(subfeatures_dict)
+        if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
+            feature = {
+                name: Sequence(subfeature, length=feature.length) for name, subfeature in feature.feature.items()
+            }
+        if isinstance(feature, dict) and {field.name for field in array.type} == set(feature):
+            if array.type.num_fields == 0:
+                return array
+            arrays = [_c(array.field(name), subfeature) for name, subfeature in feature.items()]
+            return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null())
+    elif pa.types.is_list(array.type):
+        # feature must be either [subfeature] or Sequence(subfeature)
+        if isinstance(feature, list):
+            casted_array_values = _c(array.values, feature[0])
+            if casted_array_values.type == array.values.type:
+                return array
+            else:
+                # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
+                array_offsets = _combine_list_array_offsets_with_mask(array)
+                return pa.ListArray.from_arrays(array_offsets, casted_array_values)
+        elif isinstance(feature, Sequence):
+            if feature.length > -1:
+                if _are_list_values_of_length(array, feature.length):
+                    if array.null_count > 0:
+                        # Ensure each null value in the array translates to [null] * pa_type.list_size in the array's values array
+                        array_type = array.type
+                        storage_type = _storage_type(array_type)
+                        if array_type != storage_type:
+                            # Temporarily convert to the storage type to support extension types in the slice operation
+                            array = array_cast(
+                                array,
+                                storage_type,
+                                allow_primitive_to_str=allow_primitive_to_str,
+                                allow_decimal_to_str=allow_decimal_to_str,
+                            )
+                            array = pc.list_slice(array, 0, feature.length, return_fixed_size_list=True)
+                            array = array_cast(
+                                array,
+                                array_type,
+                                allow_primitive_to_str=allow_primitive_to_str,
+                                allow_decimal_to_str=allow_decimal_to_str,
+                            )
+                        else:
+                            array = pc.list_slice(array, 0, feature.length, return_fixed_size_list=True)
+                        array_values = array.values
+                        casted_array_values = _c(array_values, feature.feature)
+                        if config.PYARROW_VERSION.major < 15:
+                            return pa.Array.from_buffers(
+                                pa.list_(casted_array_values.type, feature.length),
+                                len(array),
+                                [array.is_valid().buffers()[1]],
+                                children=[casted_array_values],
+                            )
+                        else:
+                            return pa.FixedSizeListArray.from_arrays(
+                                casted_array_values, feature.length, mask=array.is_null()
+                            )
+                    else:
+                        array_values = array.values[
+                            array.offset * feature.length : (array.offset + len(array)) * feature.length
+                        ]
+                        return pa.FixedSizeListArray.from_arrays(_c(array_values, feature.feature), feature.length)
+            else:
+                casted_array_values = _c(array.values, feature.feature)
+                if casted_array_values.type == array.values.type:
+                    return array
+                else:
+                    # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
+                    array_offsets = _combine_list_array_offsets_with_mask(array)
+                    return pa.ListArray.from_arrays(array_offsets, casted_array_values)
+    elif pa.types.is_fixed_size_list(array.type):
+        # feature must be either [subfeature] or Sequence(subfeature)
+        if isinstance(feature, list):
+            array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
+            return pa.ListArray.from_arrays(array_offsets, _c(array.values, feature[0]), mask=array.is_null())
+        elif isinstance(feature, Sequence):
+            if feature.length > -1:
+                if feature.length == array.type.list_size:
+                    array_values = array.values[
+                        array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
+                    ]
+                    casted_array_values = _c(array_values, feature.feature)
+                    if config.PYARROW_VERSION.major < 15:
+                        return pa.Array.from_buffers(
+                            pa.list_(casted_array_values.type, feature.length),
+                            len(array),
+                            [array.is_valid().buffers()[1]],
+                            children=[casted_array_values],
+                        )
+                    else:
+                        return pa.FixedSizeListArray.from_arrays(
+                            casted_array_values, feature.length, mask=array.is_null()
+                        )
+            else:
+                array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
+                return pa.ListArray.from_arrays(array_offsets, _c(array.values, feature.feature), mask=array.is_null())
+    if pa.types.is_null(array.type):
+        return array_cast(
+            array,
+            get_nested_type(feature),
+            allow_primitive_to_str=allow_primitive_to_str,
+            allow_decimal_to_str=allow_decimal_to_str,
+        )
+    elif not isinstance(feature, (Sequence, dict, list, tuple)):
+        return array_cast(
+            array,
+            feature(),
+            allow_primitive_to_str=allow_primitive_to_str,
+            allow_decimal_to_str=allow_decimal_to_str,
+        )
+    raise TypeError(f"Couldn't cast array of type\n{_short_str(array.type)}\nto\n{_short_str(feature)}")
+@_wrap_for_chunked_arrays
+def embed_array_storage(array: pa.Array, feature: "FeatureType"):
+    """Embed data into an arrays's storage.
+    For custom features like Audio or Image, it takes into account the "embed_storage" methods
+    they define to embed external data (e.g. an image file) into an array.
+    <Added version="2.4.0"/>
+    Args:
+        array (`pa.Array`):
+            The PyArrow array in which to embed data.
+        feature (`datasets.features.FeatureType`):
+            Array features.
+    Raises:
+        `TypeError`: if the target type is not supported according, e.g.
+            - if a field is missing
+    Returns:
+         array (`pyarrow.Array`): the casted array
+    """
+    from .features import Sequence
+    _e = embed_array_storage
+    if isinstance(array, pa.ExtensionArray):
+        array = array.storage
+    if hasattr(feature, "embed_storage"):
+        return feature.embed_storage(array)
+    elif pa.types.is_struct(array.type):
+        # feature must be a dict or Sequence(subfeatures_dict)
+        if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
+            feature = {
+                name: Sequence(subfeature, length=feature.length) for name, subfeature in feature.feature.items()
+            }
+        if isinstance(feature, dict):
+            arrays = [_e(array.field(name), subfeature) for name, subfeature in feature.items()]
+            return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null())
+    elif pa.types.is_list(array.type):
+        # feature must be either [subfeature] or Sequence(subfeature)
+        # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
+        array_offsets = _combine_list_array_offsets_with_mask(array)
+        if isinstance(feature, list):
+            return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature[0]))
+        if isinstance(feature, Sequence) and feature.length == -1:
+            return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
+    elif pa.types.is_fixed_size_list(array.type):
+        # feature must be Sequence(subfeature)
+        if isinstance(feature, Sequence) and feature.length > -1:
+            array_values = array.values[
+                array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
+            ]
+            embedded_array_values = _e(array_values, feature.feature)
+            if config.PYARROW_VERSION.major < 15:
+                return pa.Array.from_buffers(
+                    pa.list_(array_values.type, feature.length),
+                    len(array),
+                    [array.is_valid().buffers()[1]],
+                    children=[embedded_array_values],
+                )
+            else:
+                return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
+    if not isinstance(feature, (Sequence, dict, list, tuple)):
+        return array
+    raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}")
+class CastError(ValueError):
+    """When it's not possible to cast an Arrow table to a specific schema or set of features"""
+    def __init__(self, *args, table_column_names: List[str], requested_column_names: List[str]) -> None:
+        super().__init__(*args)
+        self.table_column_names = table_column_names
+        self.requested_column_names = requested_column_names
+    def __reduce__(self):
+        # Fix unpickling: TypeError: __init__() missing 2 required keyword-only arguments: 'table_column_names' and 'requested_column_names'
+        return partial(
+            CastError, table_column_names=self.table_column_names, requested_column_names=self.requested_column_names
+        ), ()
+    def details(self):
+        new_columns = set(self.table_column_names) - set(self.requested_column_names)
+        missing_columns = set(self.requested_column_names) - set(self.table_column_names)
+        if new_columns and missing_columns:
+            return f"there are {len(new_columns)} new columns ({_short_str(new_columns)}) and {len(missing_columns)} missing columns ({_short_str(missing_columns)})."
+        elif new_columns:
+            return f"there are {len(new_columns)} new columns ({_short_str(new_columns)})"
+        else:
+            return f"there are {len(missing_columns)} missing columns ({_short_str(missing_columns)})"
+def cast_table_to_features(table: pa.Table, features: "Features"):
+    """Cast a table to the arrow schema that corresponds to the requested features.
+    Args:
+        table (`pyarrow.Table`):
+            PyArrow table to cast.
+        features ([`Features`]):
+            Target features.
+    Returns:
+        table (`pyarrow.Table`): the casted table
+    """
+    if sorted(table.column_names) != sorted(features):
+        raise CastError(
+            f"Couldn't cast\n{_short_str(table.schema)}\nto\n{_short_str(features)}\nbecause column names don't match",
+            table_column_names=table.column_names,
+            requested_column_names=list(features),
+        )
+    arrays = [cast_array_to_feature(table[name], feature) for name, feature in features.items()]
+    return pa.Table.from_arrays(arrays, schema=features.arrow_schema)
+def cast_table_to_schema(table: pa.Table, schema: pa.Schema):
+    """Cast a table to the arrow schema. Different from `cast_table_to_features`, this method can preserve nullability.
+    Args:
+        table (`pa.Table`):
+            PyArrow table to cast.
+        features ([`Features`]):
+            Target features.
+    Returns:
+        `pa.Table`: the casted table
+    """
+    from .features import Features
+    features = Features.from_arrow_schema(schema)
+    if sorted(table.column_names) != sorted(features):
+        raise CastError(
+            f"Couldn't cast\n{_short_str(table.schema)}\nto\n{_short_str(features)}\nbecause column names don't match",
+            table_column_names=table.column_names,
+            requested_column_names=list(features),
+        )
+    arrays = [cast_array_to_feature(table[name], feature) for name, feature in features.items()]
+    return pa.Table.from_arrays(arrays, schema=schema)
+def embed_table_storage(table: pa.Table):
+    """Embed external data into a table's storage.
+    <Added version="2.4.0"/>
+    Args:
+        table (`pyarrow.Table`):
+            PyArrow table in which to embed data.
+    Returns:
+        table (`pyarrow.Table`): the table with embedded data
+    """
+    from .features.features import Features, require_storage_embed
+    features = Features.from_arrow_schema(table.schema)
+    arrays = [
+        embed_array_storage(table[name], feature) if require_storage_embed(feature) else table[name]
+        for name, feature in features.items()
+    ]
+    return pa.Table.from_arrays(arrays, schema=features.arrow_schema)
+def table_cast(table: pa.Table, schema: pa.Schema):
+    """Improved version of `pa.Table.cast`.
+    It supports casting to feature types stored in the schema metadata.
+    Args:
+        table (`pyarrow.Table`):
+            PyArrow table to cast.
+        schema (`pyarrow.Schema`):
+            Target PyArrow schema.
+    Returns:
+        table (`pyarrow.Table`): the casted table
+    """
+    if table.schema != schema:
+        return cast_table_to_schema(table, schema)
+    elif table.schema.metadata != schema.metadata:
+        return table.replace_schema_metadata(schema.metadata)
+    else:
+        return table
+def table_flatten(table: pa.Table):
+    """Improved version of `pa.Table.flatten`.
+    It behaves as `pa.Table.flatten` in a sense it does 1-step flatten of the columns with a struct type into one column per struct field,
+    but updates the metadata and skips decodable features unless the `decode` attribute of these features is set to False.
+    Args:
+        table (`pa.Table`):
+            PyArrow table to flatten.
+    Returns:
+        `Table`: the flattened table
+    """
+    from .features import Features
+    features = Features.from_arrow_schema(table.schema)
+    if any(hasattr(subfeature, "flatten") and subfeature.flatten() == subfeature for subfeature in features.values()):
+        flat_arrays = []
+        flat_column_names = []
+        for field in table.schema:
+            array = table.column(field.name)
+            subfeature = features[field.name]
+            if pa.types.is_struct(field.type) and (
+                not hasattr(subfeature, "flatten") or subfeature.flatten() != subfeature
+            ):
+                flat_arrays.extend(array.flatten())
+                flat_column_names.extend([f"{field.name}.{subfield.name}" for subfield in field.type])
+            else:
+                flat_arrays.append(array)
+                flat_column_names.append(field.name)
+        flat_table = pa.Table.from_arrays(
+            flat_arrays,
+            names=flat_column_names,
+        )
+    else:
+        flat_table = table.flatten()
+    # Preserve complex types in the metadata
+    flat_features = features.flatten(max_depth=2)
+    flat_features = Features({column_name: flat_features[column_name] for column_name in flat_table.column_names})
+    return flat_table.replace_schema_metadata(flat_features.arrow_schema.metadata)
+def table_visitor(table: pa.Table, function: Callable[[pa.Array], None]):
+    """Visit all arrays in a table and apply a function to them.
+    Args:
+        table (`pyarrow.Table`):
+            PyArrow table to visit.
+        function (`Callable[[pa.Array], None]`):
+            Function to apply to each array.
+    """
+    from .features import Features, Sequence
+    features = Features.from_arrow_schema(table.schema)
+    def _visit(array, feature):
+        if isinstance(array, pa.ChunkedArray):
+            for chunk in array.chunks:
+                _visit(chunk, feature)
+        else:
+            if isinstance(array, pa.ExtensionArray):
+                array = array.storage
+            function(array, feature)
+            if pa.types.is_struct(array.type) and not hasattr(feature, "cast_storage"):
+                if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
+                    feature = {
+                        name: Sequence(subfeature, length=feature.length)
+                        for name, subfeature in feature.feature.items()
+                    }
+                for name, subfeature in feature.items():
+                    _visit(array.field(name), subfeature)
+            elif pa.types.is_list(array.type):
+                if isinstance(feature, list):
+                    _visit(array.values, feature[0])
+                elif isinstance(feature, Sequence):
+                    _visit(array.values, feature.feature)
+    for name, feature in features.items():
+        _visit(table[name], feature)
+def table_iter(table: Table, batch_size: int, drop_last_batch=False) -> Iterator[pa.Table]:
+    """Iterate over sub-tables of size `batch_size`.
+    Args:
+        table (`pyarrow.Table`):
+            PyArrow table to iterate over.
+        batch_size (`int`):
+            Size of each sub-table to yield.
+        drop_last_batch (`bool`, defaults to `False`):
+            Drop the last batch if it is smaller than `batch_size`.
+    """
+    chunks_buffer = []
+    chunks_buffer_size = 0
+    for chunk in table.to_reader(max_chunksize=batch_size):
+        if len(chunk) == 0:
+            continue
+        elif chunks_buffer_size + len(chunk) < batch_size:
+            chunks_buffer.append(chunk)
+            chunks_buffer_size += len(chunk)
+            continue
+        elif chunks_buffer_size + len(chunk) == batch_size:
+            chunks_buffer.append(chunk)
+            yield pa.Table.from_batches(chunks_buffer)
+            chunks_buffer = []
+            chunks_buffer_size = 0
+        else:
+            cropped_chunk_length = batch_size - chunks_buffer_size
+            chunks_buffer.append(chunk.slice(0, cropped_chunk_length))
+            yield pa.Table.from_batches(chunks_buffer)
+            chunks_buffer = [chunk.slice(cropped_chunk_length, len(chunk) - cropped_chunk_length)]
+            chunks_buffer_size = len(chunk) - cropped_chunk_length
+    if not drop_last_batch and chunks_buffer:
+        yield pa.Table.from_batches(chunks_buffer)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.py ADDED Viewed

	@@ -0,0 +1,98 @@

+import os
+import sys
+import types
+from collections.abc import MutableSequence
+from functools import total_ordering
+from typing import Any, Type
+__version__ = "1.5.0"
+__all__ = ("FrozenList", "PyFrozenList")  # type: Tuple[str, ...]
+NO_EXTENSIONS = bool(os.environ.get("FROZENLIST_NO_EXTENSIONS"))  # type: bool
+@total_ordering
+class FrozenList(MutableSequence):
+    __slots__ = ("_frozen", "_items")
+    if sys.version_info >= (3, 9):
+        __class_getitem__ = classmethod(types.GenericAlias)
+    else:
+        @classmethod
+        def __class_getitem__(
+            cls: Type["FrozenList"],
+            cls_item: Any,
+        ) -> Type["FrozenList"]:
+            return cls
+    def __init__(self, items=None):
+        self._frozen = False
+        if items is not None:
+            items = list(items)
+        else:
+            items = []
+        self._items = items
+    @property
+    def frozen(self):
+        return self._frozen
+    def freeze(self):
+        self._frozen = True
+    def __getitem__(self, index):
+        return self._items[index]
+    def __setitem__(self, index, value):
+        if self._frozen:
+            raise RuntimeError("Cannot modify frozen list.")
+        self._items[index] = value
+    def __delitem__(self, index):
+        if self._frozen:
+            raise RuntimeError("Cannot modify frozen list.")
+        del self._items[index]
+    def __len__(self):
+        return self._items.__len__()
+    def __iter__(self):
+        return self._items.__iter__()
+    def __reversed__(self):
+        return self._items.__reversed__()
+    def __eq__(self, other):
+        return list(self) == other
+    def __le__(self, other):
+        return list(self) <= other
+    def insert(self, pos, item):
+        if self._frozen:
+            raise RuntimeError("Cannot modify frozen list.")
+        self._items.insert(pos, item)
+    def __repr__(self):
+        return f"<FrozenList(frozen={self._frozen}, {self._items!r})>"
+    def __hash__(self):
+        if self._frozen:
+            return hash(tuple(self))
+        else:
+            raise RuntimeError("Cannot hash unfrozen list.")
+PyFrozenList = FrozenList
+if not NO_EXTENSIONS:
+    try:
+        from ._frozenlist import FrozenList as CFrozenList  # type: ignore
+    except ImportError:  # pragma: no cover
+        pass
+    else:
+        FrozenList = CFrozenList  # type: ignore

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/__init__.pyi ADDED Viewed

	@@ -0,0 +1,47 @@

+from typing import (
+    Generic,
+    Iterable,
+    Iterator,
+    List,
+    MutableSequence,
+    Optional,
+    TypeVar,
+    Union,
+    overload,
+)
+_T = TypeVar("_T")
+_Arg = Union[List[_T], Iterable[_T]]
+class FrozenList(MutableSequence[_T], Generic[_T]):
+    def __init__(self, items: Optional[_Arg[_T]] = None) -> None: ...
+    @property
+    def frozen(self) -> bool: ...
+    def freeze(self) -> None: ...
+    @overload
+    def __getitem__(self, i: int) -> _T: ...
+    @overload
+    def __getitem__(self, s: slice) -> FrozenList[_T]: ...
+    @overload
+    def __setitem__(self, i: int, o: _T) -> None: ...
+    @overload
+    def __setitem__(self, s: slice, o: Iterable[_T]) -> None: ...
+    @overload
+    def __delitem__(self, i: int) -> None: ...
+    @overload
+    def __delitem__(self, i: slice) -> None: ...
+    def __len__(self) -> int: ...
+    def __iter__(self) -> Iterator[_T]: ...
+    def __reversed__(self) -> Iterator[_T]: ...
+    def __eq__(self, other: object) -> bool: ...
+    def __le__(self, other: FrozenList[_T]) -> bool: ...
+    def __ne__(self, other: object) -> bool: ...
+    def __lt__(self, other: FrozenList[_T]) -> bool: ...
+    def __ge__(self, other: FrozenList[_T]) -> bool: ...
+    def __gt__(self, other: FrozenList[_T]) -> bool: ...
+    def insert(self, pos: int, item: _T) -> None: ...
+    def __repr__(self) -> str: ...
+    def __hash__(self) -> int: ...
+# types for C accelerators are the same
+CFrozenList = PyFrozenList = FrozenList

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/_frozenlist.pyx ADDED Viewed

	@@ -0,0 +1,123 @@

+import sys
+import types
+from collections.abc import MutableSequence
+cdef class FrozenList:
+    if sys.version_info >= (3, 9):
+        __class_getitem__ = classmethod(types.GenericAlias)
+    else:
+        @classmethod
+        def __class_getitem__(cls, cls_item):
+            return cls
+    cdef readonly bint frozen
+    cdef list _items
+    def __init__(self, items=None):
+        self.frozen = False
+        if items is not None:
+            items = list(items)
+        else:
+            items = []
+        self._items = items
+    cdef object _check_frozen(self):
+        if self.frozen:
+            raise RuntimeError("Cannot modify frozen list.")
+    cdef inline object _fast_len(self):
+        return len(self._items)
+    def freeze(self):
+        self.frozen = True
+    def __getitem__(self, index):
+        return self._items[index]
+    def __setitem__(self, index, value):
+        self._check_frozen()
+        self._items[index] = value
+    def __delitem__(self, index):
+        self._check_frozen()
+        del self._items[index]
+    def __len__(self):
+        return self._fast_len()
+    def __iter__(self):
+        return self._items.__iter__()
+    def __reversed__(self):
+        return self._items.__reversed__()
+    def __richcmp__(self, other, op):
+        if op == 0:  # <
+            return list(self) < other
+        if op == 1:  # <=
+            return list(self) <= other
+        if op == 2:  # ==
+            return list(self) == other
+        if op == 3:  # !=
+            return list(self) != other
+        if op == 4:  # >
+            return list(self) > other
+        if op == 5:  # =>
+            return list(self) >= other
+    def insert(self, pos, item):
+        self._check_frozen()
+        self._items.insert(pos, item)
+    def __contains__(self, item):
+        return item in self._items
+    def __iadd__(self, items):
+        self._check_frozen()
+        self._items += list(items)
+        return self
+    def index(self, item):
+        return self._items.index(item)
+    def remove(self, item):
+        self._check_frozen()
+        self._items.remove(item)
+    def clear(self):
+        self._check_frozen()
+        self._items.clear()
+    def extend(self, items):
+        self._check_frozen()
+        self._items += list(items)
+    def reverse(self):
+        self._check_frozen()
+        self._items.reverse()
+    def pop(self, index=-1):
+        self._check_frozen()
+        return self._items.pop(index)
+    def append(self, item):
+        self._check_frozen()
+        return self._items.append(item)
+    def count(self, item):
+        return self._items.count(item)
+    def __repr__(self):
+        return '<FrozenList(frozen={}, {!r})>'.format(self.frozen,
+                                                      self._items)
+    def __hash__(self):
+        if self.frozen:
+            return hash(tuple(self._items))
+        else:
+            raise RuntimeError("Cannot hash unfrozen list.")
+MutableSequence.register(FrozenList)

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/frozenlist/py.typed ADDED Viewed

	@@ -0,0 +1 @@


1	+ Marker

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_models.py ADDED Viewed

	@@ -0,0 +1,516 @@

+from __future__ import annotations
+import base64
+import ssl
+import typing
+import urllib.parse
+# Functions for typechecking...
+ByteOrStr = typing.Union[bytes, str]
+HeadersAsSequence = typing.Sequence[typing.Tuple[ByteOrStr, ByteOrStr]]
+HeadersAsMapping = typing.Mapping[ByteOrStr, ByteOrStr]
+HeaderTypes = typing.Union[HeadersAsSequence, HeadersAsMapping, None]
+Extensions = typing.MutableMapping[str, typing.Any]
+def enforce_bytes(value: bytes | str, *, name: str) -> bytes:
+    """
+    Any arguments that are ultimately represented as bytes can be specified
+    either as bytes or as strings.
+    However we enforce that any string arguments must only contain characters in
+    the plain ASCII range. chr(0)...chr(127). If you need to use characters
+    outside that range then be precise, and use a byte-wise argument.
+    """
+    if isinstance(value, str):
+        try:
+            return value.encode("ascii")
+        except UnicodeEncodeError:
+            raise TypeError(f"{name} strings may not include unicode characters.")
+    elif isinstance(value, bytes):
+        return value
+    seen_type = type(value).__name__
+    raise TypeError(f"{name} must be bytes or str, but got {seen_type}.")
+def enforce_url(value: URL | bytes | str, *, name: str) -> URL:
+    """
+    Type check for URL parameters.
+    """
+    if isinstance(value, (bytes, str)):
+        return URL(value)
+    elif isinstance(value, URL):
+        return value
+    seen_type = type(value).__name__
+    raise TypeError(f"{name} must be a URL, bytes, or str, but got {seen_type}.")
+def enforce_headers(
+    value: HeadersAsMapping | HeadersAsSequence | None = None, *, name: str
+) -> list[tuple[bytes, bytes]]:
+    """
+    Convienence function that ensure all items in request or response headers
+    are either bytes or strings in the plain ASCII range.
+    """
+    if value is None:
+        return []
+    elif isinstance(value, typing.Mapping):
+        return [
+            (
+                enforce_bytes(k, name="header name"),
+                enforce_bytes(v, name="header value"),
+            )
+            for k, v in value.items()
+        ]
+    elif isinstance(value, typing.Sequence):
+        return [
+            (
+                enforce_bytes(k, name="header name"),
+                enforce_bytes(v, name="header value"),
+            )
+            for k, v in value
+        ]
+    seen_type = type(value).__name__
+    raise TypeError(
+        f"{name} must be a mapping or sequence of two-tuples, but got {seen_type}."
+    )
+def enforce_stream(
+    value: bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes] | None,
+    *,
+    name: str,
+) -> typing.Iterable[bytes] | typing.AsyncIterable[bytes]:
+    if value is None:
+        return ByteStream(b"")
+    elif isinstance(value, bytes):
+        return ByteStream(value)
+    return value
+# * https://tools.ietf.org/html/rfc3986#section-3.2.3
+# * https://url.spec.whatwg.org/#url-miscellaneous
+# * https://url.spec.whatwg.org/#scheme-state
+DEFAULT_PORTS = {
+    b"ftp": 21,
+    b"http": 80,
+    b"https": 443,
+    b"ws": 80,
+    b"wss": 443,
+}
+def include_request_headers(
+    headers: list[tuple[bytes, bytes]],
+    *,
+    url: "URL",
+    content: None | bytes | typing.Iterable[bytes] | typing.AsyncIterable[bytes],
+) -> list[tuple[bytes, bytes]]:
+    headers_set = set(k.lower() for k, v in headers)
+    if b"host" not in headers_set:
+        default_port = DEFAULT_PORTS.get(url.scheme)
+        if url.port is None or url.port == default_port:
+            header_value = url.host
+        else:
+            header_value = b"%b:%d" % (url.host, url.port)
+        headers = [(b"Host", header_value)] + headers
+    if (
+        content is not None
+        and b"content-length" not in headers_set
+        and b"transfer-encoding" not in headers_set
+    ):
+        if isinstance(content, bytes):
+            content_length = str(len(content)).encode("ascii")
+            headers += [(b"Content-Length", content_length)]
+        else:
+            headers += [(b"Transfer-Encoding", b"chunked")]  # pragma: nocover
+    return headers
+# Interfaces for byte streams...
+class ByteStream:
+    """
+    A container for non-streaming content, and that supports both sync and async
+    stream iteration.
+    """
+    def __init__(self, content: bytes) -> None:
+        self._content = content
+    def __iter__(self) -> typing.Iterator[bytes]:
+        yield self._content
+    async def __aiter__(self) -> typing.AsyncIterator[bytes]:
+        yield self._content
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} [{len(self._content)} bytes]>"
+class Origin:
+    def __init__(self, scheme: bytes, host: bytes, port: int) -> None:
+        self.scheme = scheme
+        self.host = host
+        self.port = port
+    def __eq__(self, other: typing.Any) -> bool:
+        return (
+            isinstance(other, Origin)
+            and self.scheme == other.scheme
+            and self.host == other.host
+            and self.port == other.port
+        )
+    def __str__(self) -> str:
+        scheme = self.scheme.decode("ascii")
+        host = self.host.decode("ascii")
+        port = str(self.port)
+        return f"{scheme}://{host}:{port}"
+class URL:
+    """
+    Represents the URL against which an HTTP request may be made.
+    The URL may either be specified as a plain string, for convienence:
+    ```python
+    url = httpcore.URL("https://www.example.com/")
+    ```
+    Or be constructed with explicitily pre-parsed components:
+    ```python
+    url = httpcore.URL(scheme=b'https', host=b'www.example.com', port=None, target=b'/')
+    ```
+    Using this second more explicit style allows integrations that are using
+    `httpcore` to pass through URLs that have already been parsed in order to use
+    libraries such as `rfc-3986` rather than relying on the stdlib. It also ensures
+    that URL parsing is treated identically at both the networking level and at any
+    higher layers of abstraction.
+    The four components are important here, as they allow the URL to be precisely
+    specified in a pre-parsed format. They also allow certain types of request to
+    be created that could not otherwise be expressed.
+    For example, an HTTP request to `http://www.example.com/` forwarded via a proxy
+    at `http://localhost:8080`...
+    ```python
+    # Constructs an HTTP request with a complete URL as the target:
+    # GET https://www.example.com/ HTTP/1.1
+    url = httpcore.URL(
+        scheme=b'http',
+        host=b'localhost',
+        port=8080,
+        target=b'https://www.example.com/'
+    )
+    request = httpcore.Request(
+        method="GET",
+        url=url
+    )
+    ```
+    Another example is constructing an `OPTIONS *` request...
+    ```python
+    # Constructs an 'OPTIONS *' HTTP request:
+    # OPTIONS * HTTP/1.1
+    url = httpcore.URL(scheme=b'https', host=b'www.example.com', target=b'*')
+    request = httpcore.Request(method="OPTIONS", url=url)
+    ```
+    This kind of request is not possible to formulate with a URL string,
+    because the `/` delimiter is always used to demark the target from the
+    host/port portion of the URL.
+    For convenience, string-like arguments may be specified either as strings or
+    as bytes. However, once a request is being issue over-the-wire, the URL
+    components are always ultimately required to be a bytewise representation.
+    In order to avoid any ambiguity over character encodings, when strings are used
+    as arguments, they must be strictly limited to the ASCII range `chr(0)`-`chr(127)`.
+    If you require a bytewise representation that is outside this range you must
+    handle the character encoding directly, and pass a bytes instance.
+    """
+    def __init__(
+        self,
+        url: bytes | str = "",
+        *,
+        scheme: bytes | str = b"",
+        host: bytes | str = b"",
+        port: int | None = None,
+        target: bytes | str = b"",
+    ) -> None:
+        """
+        Parameters:
+            url: The complete URL as a string or bytes.
+            scheme: The URL scheme as a string or bytes.
+                Typically either `"http"` or `"https"`.
+            host: The URL host as a string or bytes. Such as `"www.example.com"`.
+            port: The port to connect to. Either an integer or `None`.
+            target: The target of the HTTP request. Such as `"/items?search=red"`.
+        """
+        if url:
+            parsed = urllib.parse.urlparse(enforce_bytes(url, name="url"))
+            self.scheme = parsed.scheme
+            self.host = parsed.hostname or b""
+            self.port = parsed.port
+            self.target = (parsed.path or b"/") + (
+                b"?" + parsed.query if parsed.query else b""
+            )
+        else:
+            self.scheme = enforce_bytes(scheme, name="scheme")
+            self.host = enforce_bytes(host, name="host")
+            self.port = port
+            self.target = enforce_bytes(target, name="target")
+    @property
+    def origin(self) -> Origin:
+        default_port = {
+            b"http": 80,
+            b"https": 443,
+            b"ws": 80,
+            b"wss": 443,
+            b"socks5": 1080,
+            b"socks5h": 1080,
+        }[self.scheme]
+        return Origin(
+            scheme=self.scheme, host=self.host, port=self.port or default_port
+        )
+    def __eq__(self, other: typing.Any) -> bool:
+        return (
+            isinstance(other, URL)
+            and other.scheme == self.scheme
+            and other.host == self.host
+            and other.port == self.port
+            and other.target == self.target
+        )
+    def __bytes__(self) -> bytes:
+        if self.port is None:
+            return b"%b://%b%b" % (self.scheme, self.host, self.target)
+        return b"%b://%b:%d%b" % (self.scheme, self.host, self.port, self.target)
+    def __repr__(self) -> str:
+        return (
+            f"{self.__class__.__name__}(scheme={self.scheme!r}, "
+            f"host={self.host!r}, port={self.port!r}, target={self.target!r})"
+        )
+class Request:
+    """
+    An HTTP request.
+    """
+    def __init__(
+        self,
+        method: bytes | str,
+        url: URL | bytes | str,
+        *,
+        headers: HeaderTypes = None,
+        content: bytes
+        | typing.Iterable[bytes]
+        | typing.AsyncIterable[bytes]
+        | None = None,
+        extensions: Extensions | None = None,
+    ) -> None:
+        """
+        Parameters:
+            method: The HTTP request method, either as a string or bytes.
+                For example: `GET`.
+            url: The request URL, either as a `URL` instance, or as a string or bytes.
+                For example: `"https://www.example.com".`
+            headers: The HTTP request headers.
+            content: The content of the request body.
+            extensions: A dictionary of optional extra information included on
+                the request. Possible keys include `"timeout"`, and `"trace"`.
+        """
+        self.method: bytes = enforce_bytes(method, name="method")
+        self.url: URL = enforce_url(url, name="url")
+        self.headers: list[tuple[bytes, bytes]] = enforce_headers(
+            headers, name="headers"
+        )
+        self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
+            enforce_stream(content, name="content")
+        )
+        self.extensions = {} if extensions is None else extensions
+        if "target" in self.extensions:
+            self.url = URL(
+                scheme=self.url.scheme,
+                host=self.url.host,
+                port=self.url.port,
+                target=self.extensions["target"],
+            )
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} [{self.method!r}]>"
+class Response:
+    """
+    An HTTP response.
+    """
+    def __init__(
+        self,
+        status: int,
+        *,
+        headers: HeaderTypes = None,
+        content: bytes
+        | typing.Iterable[bytes]
+        | typing.AsyncIterable[bytes]
+        | None = None,
+        extensions: Extensions | None = None,
+    ) -> None:
+        """
+        Parameters:
+            status: The HTTP status code of the response. For example `200`.
+            headers: The HTTP response headers.
+            content: The content of the response body.
+            extensions: A dictionary of optional extra information included on
+                the responseself.Possible keys include `"http_version"`,
+                `"reason_phrase"`, and `"network_stream"`.
+        """
+        self.status: int = status
+        self.headers: list[tuple[bytes, bytes]] = enforce_headers(
+            headers, name="headers"
+        )
+        self.stream: typing.Iterable[bytes] | typing.AsyncIterable[bytes] = (
+            enforce_stream(content, name="content")
+        )
+        self.extensions = {} if extensions is None else extensions
+        self._stream_consumed = False
+    @property
+    def content(self) -> bytes:
+        if not hasattr(self, "_content"):
+            if isinstance(self.stream, typing.Iterable):
+                raise RuntimeError(
+                    "Attempted to access 'response.content' on a streaming response. "
+                    "Call 'response.read()' first."
+                )
+            else:
+                raise RuntimeError(
+                    "Attempted to access 'response.content' on a streaming response. "
+                    "Call 'await response.aread()' first."
+                )
+        return self._content
+    def __repr__(self) -> str:
+        return f"<{self.__class__.__name__} [{self.status}]>"
+    # Sync interface...
+    def read(self) -> bytes:
+        if not isinstance(self.stream, typing.Iterable):  # pragma: nocover
+            raise RuntimeError(
+                "Attempted to read an asynchronous response using 'response.read()'. "
+                "You should use 'await response.aread()' instead."
+            )
+        if not hasattr(self, "_content"):
+            self._content = b"".join([part for part in self.iter_stream()])
+        return self._content
+    def iter_stream(self) -> typing.Iterator[bytes]:
+        if not isinstance(self.stream, typing.Iterable):  # pragma: nocover
+            raise RuntimeError(
+                "Attempted to stream an asynchronous response using 'for ... in "
+                "response.iter_stream()'. "
+                "You should use 'async for ... in response.aiter_stream()' instead."
+            )
+        if self._stream_consumed:
+            raise RuntimeError(
+                "Attempted to call 'for ... in response.iter_stream()' more than once."
+            )
+        self._stream_consumed = True
+        for chunk in self.stream:
+            yield chunk
+    def close(self) -> None:
+        if not isinstance(self.stream, typing.Iterable):  # pragma: nocover
+            raise RuntimeError(
+                "Attempted to close an asynchronous response using 'response.close()'. "
+                "You should use 'await response.aclose()' instead."
+            )
+        if hasattr(self.stream, "close"):
+            self.stream.close()
+    # Async interface...
+    async def aread(self) -> bytes:
+        if not isinstance(self.stream, typing.AsyncIterable):  # pragma: nocover
+            raise RuntimeError(
+                "Attempted to read an synchronous response using "
+                "'await response.aread()'. "
+                "You should use 'response.read()' instead."
+            )
+        if not hasattr(self, "_content"):
+            self._content = b"".join([part async for part in self.aiter_stream()])
+        return self._content
+    async def aiter_stream(self) -> typing.AsyncIterator[bytes]:
+        if not isinstance(self.stream, typing.AsyncIterable):  # pragma: nocover
+            raise RuntimeError(
+                "Attempted to stream an synchronous response using 'async for ... in "
+                "response.aiter_stream()'. "
+                "You should use 'for ... in response.iter_stream()' instead."
+            )
+        if self._stream_consumed:
+            raise RuntimeError(
+                "Attempted to call 'async for ... in response.aiter_stream()' "
+                "more than once."
+            )
+        self._stream_consumed = True
+        async for chunk in self.stream:
+            yield chunk
+    async def aclose(self) -> None:
+        if not isinstance(self.stream, typing.AsyncIterable):  # pragma: nocover
+            raise RuntimeError(
+                "Attempted to close a synchronous response using "
+                "'await response.aclose()'. "
+                "You should use 'response.close()' instead."
+            )
+        if hasattr(self.stream, "aclose"):
+            await self.stream.aclose()
+class Proxy:
+    def __init__(
+        self,
+        url: URL | bytes | str,
+        auth: tuple[bytes | str, bytes | str] | None = None,
+        headers: HeadersAsMapping | HeadersAsSequence | None = None,
+        ssl_context: ssl.SSLContext | None = None,
+    ):
+        self.url = enforce_url(url, name="url")
+        self.headers = enforce_headers(headers, name="headers")
+        self.ssl_context = ssl_context
+        if auth is not None:
+            username = enforce_bytes(auth[0], name="auth")
+            password = enforce_bytes(auth[1], name="auth")
+            userpass = username + b":" + password
+            authorization = b"Basic " + base64.b64encode(userpass)
+            self.auth: tuple[bytes, bytes] | None = (username, password)
+            self.headers = [(b"Proxy-Authorization", authorization)] + self.headers
+        else:
+            self.auth = None

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_ssl.py ADDED Viewed

	@@ -0,0 +1,9 @@

+import ssl
+import certifi
+def default_ssl_context() -> ssl.SSLContext:
+    context = ssl.create_default_context()
+    context.load_verify_locations(certifi.where())
+    return context

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/_utils.py ADDED Viewed

	@@ -0,0 +1,37 @@

+from __future__ import annotations
+import select
+import socket
+import sys
+def is_socket_readable(sock: socket.socket | None) -> bool:
+    """
+    Return whether a socket, as identifed by its file descriptor, is readable.
+    "A socket is readable" means that the read buffer isn't empty, i.e. that calling
+    .recv() on it would immediately return some data.
+    """
+    # NOTE: we want check for readability without actually attempting to read, because
+    # we don't want to block forever if it's not readable.
+    # In the case that the socket no longer exists, or cannot return a file
+    # descriptor, we treat it as being readable, as if it the next read operation
+    # on it is ready to return the terminating `b""`.
+    sock_fd = None if sock is None else sock.fileno()
+    if sock_fd is None or sock_fd < 0:  # pragma: nocover
+        return True
+    # The implementation below was stolen from:
+    # https://github.com/python-trio/trio/blob/20ee2b1b7376db637435d80e266212a35837ddcc/trio/_socket.py#L471-L478
+    # See also: https://github.com/encode/httpcore/pull/193#issuecomment-703129316
+    # Use select.select on Windows, and when poll is unavailable and select.poll
+    # everywhere else. (E.g. When eventlet is in use. See #327)
+    if (
+        sys.platform == "win32" or getattr(select, "poll", None) is None
+    ):  # pragma: nocover
+        rready, _, _ = select.select([sock_fd], [], [], 0)
+        return bool(rready)
+    p = select.poll()
+    p.register(sock_fd, select.POLLIN)
+    return bool(p.poll(0))

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/httpcore/py.typed ADDED Viewed

File without changes

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,84 @@

+Metadata-Version: 2.4
+Name: Jinja2
+Version: 3.1.6
+Summary: A very fast and expressive template engine.
+Maintainer-email: Pallets <contact@palletsprojects.com>
+Requires-Python: >=3.7
+Description-Content-Type: text/markdown
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Web Environment
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
+Classifier: Topic :: Text Processing :: Markup :: HTML
+Classifier: Typing :: Typed
+License-File: LICENSE.txt
+Requires-Dist: MarkupSafe>=2.0
+Requires-Dist: Babel>=2.7 ; extra == "i18n"
+Project-URL: Changes, https://jinja.palletsprojects.com/changes/
+Project-URL: Chat, https://discord.gg/pallets
+Project-URL: Documentation, https://jinja.palletsprojects.com/
+Project-URL: Donate, https://palletsprojects.com/donate
+Project-URL: Source, https://github.com/pallets/jinja/
+Provides-Extra: i18n
+# Jinja
+Jinja is a fast, expressive, extensible templating engine. Special
+placeholders in the template allow writing code similar to Python
+syntax. Then the template is passed data to render the final document.
+It includes:
+-   Template inheritance and inclusion.
+-   Define and import macros within templates.
+-   HTML templates can use autoescaping to prevent XSS from untrusted
+    user input.
+-   A sandboxed environment can safely render untrusted templates.
+-   AsyncIO support for generating templates and calling async
+    functions.
+-   I18N support with Babel.
+-   Templates are compiled to optimized Python code just-in-time and
+    cached, or can be compiled ahead-of-time.
+-   Exceptions point to the correct line in templates to make debugging
+    easier.
+-   Extensible filters, tests, functions, and even syntax.
+Jinja's philosophy is that while application logic belongs in Python if
+possible, it shouldn't make the template designer's job difficult by
+restricting functionality too much.
+## In A Nutshell
+```jinja
+{% extends "base.html" %}
+{% block title %}Members{% endblock %}
+{% block content %}
+  <ul>
+  {% for user in users %}
+    <li><a href="{{ user.url }}">{{ user.username }}</a></li>
+  {% endfor %}
+  </ul>
+{% endblock %}
+```
+## Donate
+The Pallets organization develops and supports Jinja and other popular
+packages. In order to grow the community of contributors and users, and
+allow the maintainers to devote more time to the projects, [please
+donate today][].
+[please donate today]: https://palletsprojects.com/donate
+## Contributing
+See our [detailed contributing documentation][contrib] for many ways to
+contribute, including reporting issues, requesting features, asking or answering
+questions, and making PRs.
+[contrib]: https://palletsprojects.com/contributing/

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,57 @@

+jinja2-3.1.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+jinja2-3.1.6.dist-info/METADATA,sha256=aMVUj7Z8QTKhOJjZsx7FDGvqKr3ZFdkh8hQ1XDpkmcg,2871
+jinja2-3.1.6.dist-info/RECORD,,
+jinja2-3.1.6.dist-info/WHEEL,sha256=_2ozNFCLWc93bK4WKHCO-eDUENDlo-dgc9cU3qokYO4,82
+jinja2-3.1.6.dist-info/entry_points.txt,sha256=OL85gYU1eD8cuPlikifFngXpeBjaxl6rIJ8KkC_3r-I,58
+jinja2-3.1.6.dist-info/licenses/LICENSE.txt,sha256=O0nc7kEF6ze6wQ-vG-JgQI_oXSUrjp3y4JefweCUQ3s,1475
+jinja2/__init__.py,sha256=xxepO9i7DHsqkQrgBEduLtfoz2QCuT6_gbL4XSN1hbU,1928
+jinja2/__pycache__/__init__.cpython-312.pyc,,
+jinja2/__pycache__/_identifier.cpython-312.pyc,,
+jinja2/__pycache__/async_utils.cpython-312.pyc,,
+jinja2/__pycache__/bccache.cpython-312.pyc,,
+jinja2/__pycache__/compiler.cpython-312.pyc,,
+jinja2/__pycache__/constants.cpython-312.pyc,,
+jinja2/__pycache__/debug.cpython-312.pyc,,
+jinja2/__pycache__/defaults.cpython-312.pyc,,
+jinja2/__pycache__/environment.cpython-312.pyc,,
+jinja2/__pycache__/exceptions.cpython-312.pyc,,
+jinja2/__pycache__/ext.cpython-312.pyc,,
+jinja2/__pycache__/filters.cpython-312.pyc,,
+jinja2/__pycache__/idtracking.cpython-312.pyc,,
+jinja2/__pycache__/lexer.cpython-312.pyc,,
+jinja2/__pycache__/loaders.cpython-312.pyc,,
+jinja2/__pycache__/meta.cpython-312.pyc,,
+jinja2/__pycache__/nativetypes.cpython-312.pyc,,
+jinja2/__pycache__/nodes.cpython-312.pyc,,
+jinja2/__pycache__/optimizer.cpython-312.pyc,,
+jinja2/__pycache__/parser.cpython-312.pyc,,
+jinja2/__pycache__/runtime.cpython-312.pyc,,
+jinja2/__pycache__/sandbox.cpython-312.pyc,,
+jinja2/__pycache__/tests.cpython-312.pyc,,
+jinja2/__pycache__/utils.cpython-312.pyc,,
+jinja2/__pycache__/visitor.cpython-312.pyc,,
+jinja2/_identifier.py,sha256=_zYctNKzRqlk_murTNlzrju1FFJL7Va_Ijqqd7ii2lU,1958
+jinja2/async_utils.py,sha256=vK-PdsuorOMnWSnEkT3iUJRIkTnYgO2T6MnGxDgHI5o,2834
+jinja2/bccache.py,sha256=gh0qs9rulnXo0PhX5jTJy2UHzI8wFnQ63o_vw7nhzRg,14061
+jinja2/compiler.py,sha256=9RpCQl5X88BHllJiPsHPh295Hh0uApvwFJNQuutULeM,74131
+jinja2/constants.py,sha256=GMoFydBF_kdpaRKPoM5cl5MviquVRLVyZtfp5-16jg0,1433
+jinja2/debug.py,sha256=CnHqCDHd-BVGvti_8ZsTolnXNhA3ECsY-6n_2pwU8Hw,6297
+jinja2/defaults.py,sha256=boBcSw78h-lp20YbaXSJsqkAI2uN_mD_TtCydpeq5wU,1267
+jinja2/environment.py,sha256=9nhrP7Ch-NbGX00wvyr4yy-uhNHq2OCc60ggGrni_fk,61513
+jinja2/exceptions.py,sha256=ioHeHrWwCWNaXX1inHmHVblvc4haO7AXsjCp3GfWvx0,5071
+jinja2/ext.py,sha256=5PF5eHfh8mXAIxXHHRB2xXbXohi8pE3nHSOxa66uS7E,31875
+jinja2/filters.py,sha256=PQ_Egd9n9jSgtnGQYyF4K5j2nYwhUIulhPnyimkdr-k,55212
+jinja2/idtracking.py,sha256=-ll5lIp73pML3ErUYiIJj7tdmWxcH_IlDv3yA_hiZYo,10555
+jinja2/lexer.py,sha256=LYiYio6br-Tep9nPcupWXsPEtjluw3p1mU-lNBVRUfk,29786
+jinja2/loaders.py,sha256=wIrnxjvcbqh5VwW28NSkfotiDq8qNCxIOSFbGUiSLB4,24055
+jinja2/meta.py,sha256=OTDPkaFvU2Hgvx-6akz7154F8BIWaRmvJcBFvwopHww,4397
+jinja2/nativetypes.py,sha256=7GIGALVJgdyL80oZJdQUaUfwSt5q2lSSZbXt0dNf_M4,4210
+jinja2/nodes.py,sha256=m1Duzcr6qhZI8JQ6VyJgUNinjAf5bQzijSmDnMsvUx8,34579
+jinja2/optimizer.py,sha256=rJnCRlQ7pZsEEmMhsQDgC_pKyDHxP5TPS6zVPGsgcu8,1651
+jinja2/parser.py,sha256=lLOFy3sEmHc5IaEHRiH1sQVnId2moUQzhyeJZTtdY30,40383
+jinja2/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+jinja2/runtime.py,sha256=gDk-GvdriJXqgsGbHgrcKTP0Yp6zPXzhzrIpCFH3jAU,34249
+jinja2/sandbox.py,sha256=Mw2aitlY2I8la7FYhcX2YG9BtUYcLnD0Gh3d29cDWrY,15009
+jinja2/tests.py,sha256=VLsBhVFnWg-PxSBz1MhRnNWgP1ovXk3neO1FLQMeC9Q,5926
+jinja2/utils.py,sha256=rRp3o9e7ZKS4fyrWRbELyLcpuGVTFcnooaOa1qx_FIk,24129
+jinja2/visitor.py,sha256=EcnL1PIwf_4RVCOMxsRNuR8AXHbS1qfAdMOE2ngKJz4,3557

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,4 @@

+Wheel-Version: 1.0
+Generator: flit 3.11.0
+Root-Is-Purelib: true
+Tag: py3-none-any

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/jinja2-3.1.6.dist-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ [babel.extractors]
2	+ jinja2=jinja2.ext:babel_extract[i18n]
3	+

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,103 @@

+Metadata-Version: 2.4
+Name: lxml
+Version: 6.0.2
+Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.
+Home-page: https://lxml.de/
+Author: lxml dev team
+Author-email: lxml@lxml.de
+Maintainer: lxml dev team
+Maintainer-email: lxml@lxml.de
+License: BSD-3-Clause
+Project-URL: Source, https://github.com/lxml/lxml
+Project-URL: Bug Tracker, https://bugs.launchpad.net/lxml
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Information Technology
+Classifier: Programming Language :: Cython
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
+Classifier: Programming Language :: C
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Text Processing :: Markup :: HTML
+Classifier: Topic :: Text Processing :: Markup :: XML
+Classifier: Topic :: Software Development :: Libraries :: Python Modules
+Requires-Python: >=3.8
+License-File: LICENSE.txt
+License-File: LICENSES.txt
+Provides-Extra: source
+Provides-Extra: cssselect
+Requires-Dist: cssselect>=0.7; extra == "cssselect"
+Provides-Extra: html5
+Requires-Dist: html5lib; extra == "html5"
+Provides-Extra: htmlsoup
+Requires-Dist: BeautifulSoup4; extra == "htmlsoup"
+Provides-Extra: html-clean
+Requires-Dist: lxml_html_clean; extra == "html-clean"
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: license
+Dynamic: license-file
+Dynamic: maintainer
+Dynamic: maintainer-email
+Dynamic: project-url
+Dynamic: provides-extra
+Dynamic: requires-python
+Dynamic: summary
+lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries.
+It provides safe and convenient access to these libraries using the
+ElementTree API.
+It extends the ElementTree API significantly to offer support for XPath,
+RelaxNG, XML Schema, XSLT, C14N and much more.
+To contact the project, go to the `project home page <https://lxml.de/>`_
+or see our bug tracker at https://launchpad.net/lxml
+In case you want to use the current in-development version of lxml,
+you can get it from the github repository at
+https://github.com/lxml/lxml .  Note that this requires Cython to
+build the sources, see the build instructions on the project home page.
+After an official release of a new stable series, bug fixes may become available at
+https://github.com/lxml/lxml/tree/lxml-6.0 .
+Running ``pip install https://github.com/lxml/lxml/archive/refs/heads/lxml-6.0.tar.gz``
+will install the unreleased branch state as soon as a maintenance branch has been established.
+Note that this requires Cython to be installed at an appropriate version for the build.
+6.0.2 (2025-09-21)
+==================
+Bugs fixed
+----------
+* LP#2125278: Compilation with libxml2 2.15.0 failed.
+  Original patch by Xi Ruoyao.
+* Setting ``decompress=True`` in the parser had no effect in libxml2 2.15.
+* Binary wheels on Linux and macOS use the library version libxml2 2.14.6.
+  See https://gitlab.gnome.org/GNOME/libxml2/-/releases/v2.14.6
+* Test failures in libxml2 2.15.0 were fixed.
+Other changes
+-------------
+* Binary wheels for Py3.9-3.11 on the ``riscv64`` architecture were added.
+* Error constants were updated to match libxml2 2.15.0.
+* Built using Cython 3.1.4.

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,204 @@

+lxml-6.0.2.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+lxml-6.0.2.dist-info/METADATA,sha256=0qIHkwlNTTMz4-c5e8ZnbbGgt_vpYZHCEoqXyckR95Q,3622
+lxml-6.0.2.dist-info/RECORD,,
+lxml-6.0.2.dist-info/WHEEL,sha256=1rk9WkINO5IYd_dGyocTHV6htge3I27wu_Vax8WCadA,152
+lxml-6.0.2.dist-info/licenses/LICENSE.txt,sha256=j8K1aBM1FuRoRdIUeRet7uFkjnCumrXtbFQXr-9M6FU,1507
+lxml-6.0.2.dist-info/licenses/LICENSES.txt,sha256=QdSd1AaqDhVIptXyGjDWv2OLPNlutyid00jYPtLkA5I,1514
+lxml-6.0.2.dist-info/top_level.txt,sha256=NjD988wqaKq512nshNdLt-uDxsjkp4Bh51m6N-dhUrk,5
+lxml/ElementInclude.py,sha256=PSLeZFvCa76WHJulPLxcZXJtCI2-4dK2CtqPRiYOAQg,8560
+lxml/__init__.py,sha256=rgOcPyZUNBFL30ylxIxd8fHHWi6TwyIUCi8Av84XWwo,574
+lxml/__pycache__/ElementInclude.cpython-312.pyc,,
+lxml/__pycache__/__init__.cpython-312.pyc,,
+lxml/__pycache__/_elementpath.cpython-312.pyc,,
+lxml/__pycache__/builder.cpython-312.pyc,,
+lxml/__pycache__/cssselect.cpython-312.pyc,,
+lxml/__pycache__/doctestcompare.cpython-312.pyc,,
+lxml/__pycache__/pyclasslookup.cpython-312.pyc,,
+lxml/__pycache__/sax.cpython-312.pyc,,
+lxml/__pycache__/usedoctest.cpython-312.pyc,,
+lxml/_elementpath.cpython-312-x86_64-linux-gnu.so,sha256=1mB7tnIOx_08TqlYHQQSYJX5SXE4lQZrrnexJZBuvi8,217352
+lxml/_elementpath.py,sha256=b80hM3ndAkTtRX6v54za3LkkAqCcd0700BbMPZHnTBU,10959
+lxml/apihelpers.pxi,sha256=9S6bzp-VKCUPZv0f6-el5PsbPFN4FJqSnMCIYilS0eU,63881
+lxml/builder.cpython-312-x86_64-linux-gnu.so,sha256=iSov_1syOR8dCLyAPsAlfGOkc67Yl1GX7I93Af993ZI,129080
+lxml/builder.py,sha256=KI1HxHTd4wJqqVfmTRtSbXBQdl2T-P36ih4hT-J3MNw,8485
+lxml/classlookup.pxi,sha256=Tax8Vhbm5C6UCjgmRFsYjW0pFHxIuTthH1MOgASDLgc,22435
+lxml/cleanup.pxi,sha256=ZNEpbv7qx_ICPzsxhCaMUHCOfiznOoZ_u3jlYXHAuh4,8454
+lxml/cssselect.py,sha256=_wZdX-B9p5MeIYABmENIYRWEkwXwX-7jO8Dkf-1rUZU,3306
+lxml/debug.pxi,sha256=KTcpR8-slUYvmIPbE35GoHDNTb-gjTEvD7bw6LltM4c,1125
+lxml/docloader.pxi,sha256=bYSZAxxbBEfVzfLXTUWFRfOyUTfV23L7i9hR2dgtSNY,5772
+lxml/doctestcompare.py,sha256=40EDnkwpcvW86qNa86990OXF42xdHaosSZoiBsEjkzU,17731
+lxml/dtd.pxi,sha256=IAKkmA4ZoC68sqAWcTqoS8jEGYcPQrVMCZgn4iLBYko,15281
+lxml/etree.cpython-312-x86_64-linux-gnu.so,sha256=4SybuGGBSJ2dF8AZo5PSuo8BaiLbT3eF8sofIH2RT_U,5395056
+lxml/etree.h,sha256=_NkGkD3C_jpE4UegvQ6Y32_ycTbUCLyOBz9xfWRPkug,9792
+lxml/etree.pyx,sha256=2qCb8ZNjsdoB0fUELYwAM4ldLQZWS5_gt-OxKEUM-vs,138014
+lxml/etree_api.h,sha256=dNCm28ubaVS8SbhLuxs9JvYWg41NoR_yD3qTRr7hliA,17372
+lxml/extensions.pxi,sha256=xKLad35EQgpsDhs07tw31aKJBBMWIK9rMc0JTXETAUA,32022
+lxml/html/ElementSoup.py,sha256=s_dLobLMuKn2DhexR-iDXdZrMFg1RjLy1feHsIeZMpw,320
+lxml/html/__init__.py,sha256=CC5WdsvSptZhr9MZya1qsL6JKVbviYdrHOhXrGhmORg,64425
+lxml/html/__pycache__/ElementSoup.cpython-312.pyc,,
+lxml/html/__pycache__/__init__.cpython-312.pyc,,
+lxml/html/__pycache__/_diffcommand.cpython-312.pyc,,
+lxml/html/__pycache__/_difflib.cpython-312.pyc,,
+lxml/html/__pycache__/_html5builder.cpython-312.pyc,,
+lxml/html/__pycache__/_setmixin.cpython-312.pyc,,
+lxml/html/__pycache__/builder.cpython-312.pyc,,
+lxml/html/__pycache__/clean.cpython-312.pyc,,
+lxml/html/__pycache__/defs.cpython-312.pyc,,
+lxml/html/__pycache__/diff.cpython-312.pyc,,
+lxml/html/__pycache__/formfill.cpython-312.pyc,,
+lxml/html/__pycache__/html5parser.cpython-312.pyc,,
+lxml/html/__pycache__/soupparser.cpython-312.pyc,,
+lxml/html/__pycache__/usedoctest.cpython-312.pyc,,
+lxml/html/_diffcommand.py,sha256=kz_7EP9PmYWuczlZcGiw74_rG0eTKvQ2lrO0rkiwlYE,2081
+lxml/html/_difflib.cpython-312-x86_64-linux-gnu.so,sha256=XuPeciCf-4e7FpclT9B1viDjUaTJVJg4zkeEW_zXauo,570296
+lxml/html/_difflib.py,sha256=GgH_jVrZQC8tI8WV_lFZQsXFJ3mOTAPup1zjBJNvkPo,84954
+lxml/html/_html5builder.py,sha256=NLaT-Ev-aBgJpeQl-6ZbJChLZK5GV-znDkHOJD5VQC4,3230
+lxml/html/_setmixin.py,sha256=8IFIOLmVz0G-XzsD2tCEkSFWO-dgPBHgvHufC8ni67s,1188
+lxml/html/builder.py,sha256=Uz3r5uiuCdoN0UPa7ngoLMwAadVIhslzGvlRPGigY_M,6187
+lxml/html/clean.py,sha256=FghSJy4jt2RaBy6dgusowkU18hxpZ4XLE5ceCK9qxyA,503
+lxml/html/defs.py,sha256=l_6nh4DHvrsVyWVqWCUUx14QiahRyZv4Melqy_thf6Q,4250
+lxml/html/diff.cpython-312-x86_64-linux-gnu.so,sha256=iWcPoTRaf2StqEyPKB6xz1j15rvZDLvW_a-KwYLJLyY,377848
+lxml/html/diff.py,sha256=Za0By-yeYlQEjUu7m7xKB288kKiy8VBS5gT0RPOaFY0,32989
+lxml/html/formfill.py,sha256=umgk0BbkAI1W6q9musFbL-cDnI_aap2NsLBJqk0UmVI,9681
+lxml/html/html5parser.py,sha256=dnyC4cqHxywjZSzk0mu2L7THTZjxhg4yF4pncjusa_w,8634
+lxml/html/soupparser.py,sha256=xo8VvNeOEb-SChuXLKCRECh8J7HBiJLE9sAbEskoUUQ,10197
+lxml/html/usedoctest.py,sha256=tPlmVz4KK1GRKV5DJLrdVECeqsT9PlDzSqqTodVi5s0,249
+lxml/includes/__init__.pxd,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lxml/includes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lxml/includes/__pycache__/__init__.cpython-312.pyc,,
+lxml/includes/c14n.pxd,sha256=DBQcOJ0c_YS245ohMb8fmuEC1kFyv1LrNY_8Mf-syZg,1110
+lxml/includes/config.pxd,sha256=H6Mrl8It21hzRI2hzMId9W48QqkYYkoLT4dniLNmdTw,96
+lxml/includes/dtdvalid.pxd,sha256=Nv0OykjYehv2lO-Zj--q6jS3TAC_dvQVPSgPMuse1NM,689
+lxml/includes/etree_defs.h,sha256=h_UjJTmNUqPyKNNrWB9hxmt6v4CF7_83XVY8dOfxqW0,14524
+lxml/includes/etreepublic.pxd,sha256=Bn4d3JkWPqXputXqI-eJ0xmPrwNFPTfDCa7axgjB7FM,10184
+lxml/includes/extlibs/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lxml/includes/extlibs/__pycache__/__init__.cpython-312.pyc,,
+lxml/includes/extlibs/libcharset.h,sha256=GA0FumrbNI4VDGlzq3lf5CLaCwXgn4unw2l0btGQFwI,1510
+lxml/includes/extlibs/localcharset.h,sha256=Z_AagaQeq0aDE7NPsVOqEf4nO4KcUp46ggo4d0ONIOQ,6338
+lxml/includes/extlibs/zconf.h,sha256=ROVD_0UUx6mgHWSAGcLJqB0RBcv6PHfx-vbNhur6ir0,16464
+lxml/includes/extlibs/zlib.h,sha256=ilV5r3LqT0J_8ApBUPDMs_xcHkN59ybhARM7Grn8YAw,96829
+lxml/includes/htmlparser.pxd,sha256=9uASkP5dU7OE2lCOLT-z2e01qSbFlp4ehgwdostF_qk,2802
+lxml/includes/libexslt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lxml/includes/libexslt/__pycache__/__init__.cpython-312.pyc,,
+lxml/includes/libexslt/exslt.h,sha256=eSW5tMJAewSUANLqk7AGEiU8b2BbCNRyauHnez7nKSU,3114
+lxml/includes/libexslt/exsltconfig.h,sha256=QHxzEbRlv_h0USBvpr0Zrl0Muzlc71VCrvgR6lqnLEY,1172
+lxml/includes/libexslt/exsltexports.h,sha256=1Jm9KTXm2FUUJIZ6V6-Uw55yG0BMULX3_goyxDd2LL8,1077
+lxml/includes/libxml/HTMLparser.h,sha256=sU4xGqj-vBtEvzlxA3hBPWJboifvkc4F1hynKXmsl3k,9569
+lxml/includes/libxml/HTMLtree.h,sha256=Q7UBKFbQ8fx4d_dMnmR6ay8JmfOhopFkDp2B63YkLDU,3517
+lxml/includes/libxml/SAX.h,sha256=SFnG27EFrYGUB9HDL_xSIGBwEns5pl07rApXWThFZFM,386
+lxml/includes/libxml/SAX2.h,sha256=RfFP5o3le-Rg8bnA2GW7L7L9_pfXCs3TieODcv1DTWY,4240
+lxml/includes/libxml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lxml/includes/libxml/__pycache__/__init__.cpython-312.pyc,,
+lxml/includes/libxml/c14n.h,sha256=BSBXw6nIZutC8mWvbRrLLmoWjw3wRt-nM93vjXGMCm8,2742
+lxml/includes/libxml/catalog.h,sha256=H9ssTCaBjtDqc-AZqCk1R7h8F2iD9szqLjJyHpaczXg,4633
+lxml/includes/libxml/chvalid.h,sha256=TZcceNp6Cw0QlYwIqK9GxyYqL5UiAjpQyjt_yrZGTQE,5087
+lxml/includes/libxml/debugXML.h,sha256=XXRNI39gJW7bGRC4SzE4ad-SJ906BsUGz3AwOtkKuS4,1667
+lxml/includes/libxml/dict.h,sha256=SweaPGMtTTf4je6dNTIoEzcfEvpsAT9_PhR7FC0K-rQ,1770
+lxml/includes/libxml/encoding.h,sha256=haL7ratww2wkIERGmtwUqU2BbTVe52FZFU7MmrOpsPk,9623
+lxml/includes/libxml/entities.h,sha256=LEOCA826-0f8dhRJzC_2hvUVsSH7lKQjrea9hSTdBbo,4419
+lxml/includes/libxml/globals.h,sha256=NH8zyRI5cXJJGp5k2aLxOm-reJEGOFX6LYP82GBXRlY,583
+lxml/includes/libxml/hash.h,sha256=KIIpAYKBfGUU3ydWhGehUyfuauZz_Ps0gyambzQo_rc,7017
+lxml/includes/libxml/list.h,sha256=oh7iJNQajRA_cHsNk9CcFPYkaW2smf4J_MpedPPjC4k,3128
+lxml/includes/libxml/nanoftp.h,sha256=22PBtWhJueYLFvwukt4oFooRct_xJA83hbluHRBNXUM,302
+lxml/includes/libxml/nanohttp.h,sha256=bLbzYjAyAKmP3ComMOPH6XaUImu6bNAESF1HrVtRve0,2124
+lxml/includes/libxml/parser.h,sha256=Uq7-ce55UUAsvo4n6CiBlNQpmowewvWhOsQtgGM1UQ8,48498
+lxml/includes/libxml/parserInternals.h,sha256=8_Wr6UgRzm8BRn1RPLxyBkw6BagAdDvVqMA_e181_EI,14539
+lxml/includes/libxml/relaxng.h,sha256=VXZ74r5Yja06KqypdBHc8neDwPxQ2aMrsWHSdRt5oi4,5991
+lxml/includes/libxml/schemasInternals.h,sha256=V8M4In3zf24EX55Yt4dcfxwp7NpHGYViKnLKwtyrPJ4,26233
+lxml/includes/libxml/schematron.h,sha256=8EhPDhvtlMxl9e0C5rSbEruOvzJS5BC_OOFbq9RXZnY,4255
+lxml/includes/libxml/threads.h,sha256=mT3CgK4lXK7-NDnUOFXqYuCK6fyY70S3BsHF-TnT45k,1619
+lxml/includes/libxml/tree.h,sha256=zTRLt6h5x6ApyeXgs90CKQZSAl2hKm7b5NxtPKUQFAE,36106
+lxml/includes/libxml/uri.h,sha256=J9teJHme5z883c4twF5oImEYY-E3xSvhdSGpyRVtvIg,2855
+lxml/includes/libxml/valid.h,sha256=By61IbPvk_eLux7a8x0mOaly7oclFaSGaFE8b2xZcUE,13226
+lxml/includes/libxml/xinclude.h,sha256=K3I5jhw2zAMj26LuRNZc15Bwv2JE2hWxwVn4TCqv2b4,3258
+lxml/includes/libxml/xlink.h,sha256=TVLOkISrcKDelo9n_XIUyPiStDYa8NxuF2dz70aBFCI,5062
+lxml/includes/libxml/xmlIO.h,sha256=FvbuMYTy1-S5PScabE03wz0oWKf626pmXvOPZNuLm-w,11948
+lxml/includes/libxml/xmlautomata.h,sha256=7Sc3YgPz1ZIBKCHPSxs5oAwJEZWQ1RT2kyUw85pUtmU,4004
+lxml/includes/libxml/xmlerror.h,sha256=mMfltMxUza6kiSBfP2QfnY3UlMP_rEXKfX0wruBLl4A,37561
+lxml/includes/libxml/xmlexports.h,sha256=IyV3AMeQVbOl0wkjlnNX4B8WUZ-5GNKQmxZc6-maWUU,2025
+lxml/includes/libxml/xmlmemory.h,sha256=m7wGvVMxNzZiuOAo3vkjxaVWstc8aQLzb6obbjPsebE,4658
+lxml/includes/libxml/xmlmodule.h,sha256=ERUHUmDdZRmh6NjLYWUpse51rLWR8qNjPHOtdgmlLF0,1198
+lxml/includes/libxml/xmlreader.h,sha256=BAHinlSOTXX3DEax9BniaIIPAXJyLGfzym9R-27LCcU,12387
+lxml/includes/libxml/xmlregexp.h,sha256=_q6C1XRy8DS3kSmLbEKpvkKQciTgjTJgGc_zUQ6m22M,2632
+lxml/includes/libxml/xmlsave.h,sha256=zcEQr9sO5CsFrnoOLshhdsqMEr8k4AeFhbkYyNfO9Fs,2934
+lxml/includes/libxml/xmlschemas.h,sha256=5AfLnYUcfmxHRzg0dVpdHig--4ui1-XDwDgpKGDKCiU,7067
+lxml/includes/libxml/xmlschemastypes.h,sha256=MYwlGmoKAo3lHRaaKgnCXiLmPT9KRjdxyCJ7TEyZ6jM,4583
+lxml/includes/libxml/xmlstring.h,sha256=d5PpqxP1I1sfmCUHvVJtjoC9h7hLHcAAQ5ok_Rtf50I,5271
+lxml/includes/libxml/xmlunicode.h,sha256=8sq3wEW2AiyTCuc3ZceOEkce7lfrI7VnkRfwEQgc6pU,278
+lxml/includes/libxml/xmlversion.h,sha256=oVpaE_xbttaeZNFKSuSfcLOceWz7LQgKP71Z1msXZNo,5112
+lxml/includes/libxml/xmlwriter.h,sha256=BEUwYNKx3xymDE9vepksEK7yVq9SXYm1d2pQnzlPy90,20688
+lxml/includes/libxml/xpath.h,sha256=CQv6X_pRhuXoCVpqoDXYB7FfusLK7AuPxCNigwhNYAA,16156
+lxml/includes/libxml/xpathInternals.h,sha256=mc9B5tdpfssyz_NPUzww6dKuWCtBybBiBRJkTe4AE4U,18504
+lxml/includes/libxml/xpointer.h,sha256=DAxMsfPp2SSZgXFrPbxBA84RwTMRf35Qg_LBbUzPQhA,1026
+lxml/includes/libxslt/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+lxml/includes/libxslt/__pycache__/__init__.cpython-312.pyc,,
+lxml/includes/libxslt/attributes.h,sha256=qKwzfGf7r89esLC65s96iYJWRA-s-Ezss2_V6Mmo1hk,957
+lxml/includes/libxslt/documents.h,sha256=kBihgH5pqRvFalhm_fOFHtJTFhTpBcm681yT5dxgwfw,2704
+lxml/includes/libxslt/extensions.h,sha256=W5UMyJqUP_1zt6sXZ0mgc0gAIwDJrZ8gjByhyrWqvd8,6899
+lxml/includes/libxslt/extra.h,sha256=6X3Wu3NdPtrlqz-Koo7dB-rccnnszi6j3zg599gTByg,1640
+lxml/includes/libxslt/functions.h,sha256=fc4CZj-9KeBHzO9-WWU_bNqmaEZAz3n7NNwClIBXk14,1972
+lxml/includes/libxslt/imports.h,sha256=18kIjoGqdFXR63Ce3ZtzxsTiYV3XGKpchYakMUPDuUI,1840
+lxml/includes/libxslt/keys.h,sha256=16v25VEluS7jYhgg6gYFwVxgGMn-1ctnlhhWWT4RcBY,1155
+lxml/includes/libxslt/namespaces.h,sha256=VofSn2Kkn-a5JyRKCmY3jPp7amQy3n09vzy0KUQt4q0,1666
+lxml/includes/libxslt/numbersInternals.h,sha256=Eg5gYZ5p3h0_e5wyI61S-0E6_ArVJzv0yr63j6BU2fc,2019
+lxml/includes/libxslt/pattern.h,sha256=tJ-BPfs9UYgiZMMoQZbhij3g7xVppYq7TrrOu25eR7Q,2110
+lxml/includes/libxslt/preproc.h,sha256=D_LjEdHhsdyBnEAvflnwFgoR4hGUb72kgEhXkkmPRsw,896
+lxml/includes/libxslt/security.h,sha256=fUD1cy_WxFCTvTNAF0WOQIU4p5CNWn1LHFyZJd-Fx5U,2652
+lxml/includes/libxslt/templates.h,sha256=bnt6Jqui6KU5pNUdMNPbQZkZ5d-VTWqC0TMGkOlVoIo,2268
+lxml/includes/libxslt/transform.h,sha256=ICT7meUV0OTAx27WaKVrKj-aUmR9LSpTNaOAJd2UStg,6311
+lxml/includes/libxslt/variables.h,sha256=cQAgPe4QCcK2uKbWg7Iz-9peM9xWGm7m3M6jQm0sjIA,3143
+lxml/includes/libxslt/xslt.h,sha256=wmFx2Q31Pd8Iq2phAQpY9J3QQatb8lWg3gABtqKFgEw,1964
+lxml/includes/libxslt/xsltInternals.h,sha256=2EbEKYmnYZq0HjGnUMAlpqnqZJurRXzjlgk5Js1WYaY,57949
+lxml/includes/libxslt/xsltconfig.h,sha256=cV5scdRK6xmOHeOg3OCw6hBfcQ_nrtNs_tKefX67304,2910
+lxml/includes/libxslt/xsltexports.h,sha256=1-luH-0bCIgBAlKAXhV-dqHBfwOAQNDamiYbxIlTf0k,1124
+lxml/includes/libxslt/xsltlocale.h,sha256=ppxGEmJfZIJgwRQzCM0_77p9WNekEWq1NrdYZrQl4IE,942
+lxml/includes/libxslt/xsltutils.h,sha256=1eguYgR9-jeNOVlBUktHboaq-VLX6JXraO80TfbARKM,9085
+lxml/includes/lxml-version.h,sha256=KZfk_lJnXSnxkyRdUV5taHsWJe4xbC6UEYfYldlfouI,71
+lxml/includes/relaxng.pxd,sha256=HzHlQ6mCcf_tj_JZ9NAVJTVAv8ScCkE8Ifq15y3bS0c,2615
+lxml/includes/schematron.pxd,sha256=Hob7xh-K-MKqp7WiG8thMagf5EkQzmgfi4ds0EF91JA,1604
+lxml/includes/tree.pxd,sha256=XApzMRy_LSqCtQ-OTS-vNSW7CT_OWstybfIT2H84LsA,20179
+lxml/includes/uri.pxd,sha256=3vOXw6AbSPxAM9uo71T1qnfx-wd9ezXLDQtWsb2zX0I,145
+lxml/includes/xinclude.pxd,sha256=CuO_XZNB6E2JK1qXXWn11APrjFQV5kA6SMyb77WZn0A,804
+lxml/includes/xmlerror.pxd,sha256=OQqayytkV0NigAPbsQCCcvmy7luRe0XhVzpTdzJjP3g,58837
+lxml/includes/xmlparser.pxd,sha256=eDGyU5kZyNVksK0dUhMIi7rnE-LSevXsqyl72v99Ess,13730
+lxml/includes/xmlschema.pxd,sha256=OLZPd2WDJyopiXJJyo-dAyyYHaeSYFiMAI4tqIiv-Ik,1702
+lxml/includes/xpath.pxd,sha256=e8-ZYUbRG7N1mHETAlknJ_QqAteOosrYLRgpH-OsTkg,5603
+lxml/includes/xslt.pxd,sha256=4yl3pOu7pAvsx5Tc-W4IWCoB8wgtSSR62HI1jqu6jko,8241
+lxml/isoschematron/__init__.py,sha256=uauerYeKTlWFCJSqieIHhF5l6rYV2myeEJ0Imd1LzRc,13274
+lxml/isoschematron/__pycache__/__init__.cpython-312.pyc,,
+lxml/isoschematron/resources/rng/iso-schematron.rng,sha256=VsWxPyi3iViJDDbjJJw0wWkEHkLrz9zoCA8zJLor9N4,18337
+lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl,sha256=ObebsB8Wt-d3uIA_U5NU85TpnQ3PxPX38TdOAqosMac,3172
+lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl,sha256=QweRrIIM-zFcgg98GXA2CaWfIbgVE0XKEeYSfvv67A0,4563
+lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl,sha256=xSZ_Ekq_I-62ZpiE5AqYYHwFW_qh855zt9V4_s7rbkY,11703
+lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl,sha256=x42QJ-dxQ1waPzydsCoQnp2Xj15y53nW43O7BuoDRHk,39957
+lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl,sha256=Tr9BnO6pzjVWwhqJfm10UlvAy95EgfSCz2iMlrVGT6Q,2015
+lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl,sha256=ue8q_88X4e_jsJizo31GRNBxNhdxkEE9fY20oq0Iqwk,71764
+lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl,sha256=BBAdsVSi5zAzeGepuN6gS1saQINDqITXKplmmj4dTWg,20382
+lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt,sha256=OGLiFswuLJEW5EPYKOeoauuCJFEtVa6jyzBE1OcJI98,3310
+lxml/iterparse.pxi,sha256=JXvYhSOCaRjT_hYbRGMlJt2rlqx0TiRpN4FE1jQc63w,16521
+lxml/lxml.etree.h,sha256=_NkGkD3C_jpE4UegvQ6Y32_ycTbUCLyOBz9xfWRPkug,9792
+lxml/lxml.etree_api.h,sha256=dAbJPd53D_9CIGzePAUB3otgyhG4o2cSdA4-6apdzRA,17377
+lxml/nsclasses.pxi,sha256=5pzNBhBtlqObPdThL9QIGRs1Dxj1qnr0PyXuTCURqTg,9129
+lxml/objectify.cpython-312-x86_64-linux-gnu.so,sha256=TYF3CoGF-cenIwFh_1nY0sr2UI2wdsS8tZO2Wi0evyg,2933112
+lxml/objectify.pyx,sha256=I4bQQXmQssBtk5bTrid-eVURBLKRTM5iQZiviugIrts,75823
+lxml/objectpath.pxi,sha256=s5TNG2-EbaWWKLFAiX303B95zK_Ui8ausB__3QvFFGw,11450
+lxml/parser.pxi,sha256=VZfychEJ3-XPE3x6oGOEzn6HVAr74R7lXfDSVF-hq-U,85411
+lxml/parsertarget.pxi,sha256=v1PidxRaG5giwXcTDkpBI7PDFmsZuOcK0y9LdkQaY8M,6326
+lxml/proxy.pxi,sha256=8IVvYF2KTuzl7Hb3XGHEmcxfSLbUZkA2Q1Y50hLsyzE,23929
+lxml/public-api.pxi,sha256=XoP6_cJOEoQIItvE1RiYCKYD1ry4AobaOr4XLo0KSE4,6666
+lxml/pyclasslookup.py,sha256=gLD1HM2HtITYYiGzjEOewSwbB7XkVx_NZv_quCt79Oc,92
+lxml/readonlytree.pxi,sha256=ddRYczhHieJ4XUvWvTPW9N9oQ8vuKtv7lC1mtE1qvH8,18976
+lxml/relaxng.pxi,sha256=3OQ-fZMzP-KF5vM6HTozT_9ee3J0DJnpj9RcHC8LoMw,6339
+lxml/sax.cpython-312-x86_64-linux-gnu.so,sha256=UQn-l56AOOT5UUJ395Fil7It-Im_brnlsMYfmUpwQe0,190272
+lxml/sax.py,sha256=yrNvKD6rlon48jrR-1qpFXER8j4psYC2R5yt0u6TWLs,9706
+lxml/saxparser.pxi,sha256=TmkdM5h9xII9iKRaBk_1NGk2KTfeesl5Ha8bpFQGqLc,33529
+lxml/schematron.pxi,sha256=F2OHKZUl57-byBk_wWtPTnHZ1fwlj0FtwG3VuGtG-UY,6064
+lxml/serializer.pxi,sha256=iIXfechFHfvFs2sTk7wMIy3sDJxmaMPbNO33mkLLBUE,68063
+lxml/usedoctest.py,sha256=qRgZKQVcAZcl-zN0AIXVJnOsETUXz2nPXkxuzs1lGgk,230
+lxml/xinclude.pxi,sha256=7eBrI_OK47mmrHQ0ixbixRI8pKqQ1nwkMV-OmKUVlD4,2456
+lxml/xmlerror.pxi,sha256=i1kR42WB2BAxtrmh7m2ADlH-jffVQ-blW3pW0Ps4s-g,50061
+lxml/xmlid.pxi,sha256=5zf9oR6bsCtavGiOmilNyHqYwgG_bnrIabSd2SURtm0,6073
+lxml/xmlschema.pxi,sha256=mumNoHni5S3BQPtcmOHRd61KRaVWu4eOie2wQeB0e6E,8490
+lxml/xpath.pxi,sha256=aqW24V817dUxps4Gnc8h7Tm3QVlITKvxU5_9WgJUIFg,19132
+lxml/xslt.pxi,sha256=wxdbuvNFVA8eP57tHmBYWER__ceFhf6HGdsbBHbx_0A,36315
+lxml/xsltext.pxi,sha256=TImDiAPlAezC07P7RY1N9YChA7AuKFH-G53hXdel9yc,11088

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,6 @@

+Wheel-Version: 1.0
+Generator: setuptools (80.9.0)
+Root-Is-Purelib: false
+Tag: cp312-cp312-manylinux_2_26_x86_64
+Tag: cp312-cp312-manylinux_2_28_x86_64

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml-6.0.2.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ lxml

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER ADDED Viewed

	@@ -0,0 +1 @@


1	+ pip

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/License.txt ADDED Viewed

	@@ -0,0 +1,1568 @@

+End User License Agreement
+--------------------------
+Preface
+-------
+The Software License Agreement in Chapter 1 and the Supplement
+in Chapter 2 contain license terms and conditions that govern
+the use of NVIDIA software. By accepting this agreement, you
+agree to comply with all the terms and conditions applicable
+to the product(s) included herein.
+NVIDIA Driver
+Description
+This package contains the operating system driver and
+fundamental system software components for NVIDIA GPUs.
+NVIDIA CUDA Toolkit
+Description
+The NVIDIA CUDA Toolkit provides command-line and graphical
+tools for building, debugging and optimizing the performance
+of applications accelerated by NVIDIA GPUs, runtime and math
+libraries, and documentation including programming guides,
+user manuals, and API references.
+Default Install Location of CUDA Toolkit
+Windows platform:
+%ProgramFiles%\NVIDIA GPU Computing Toolkit\CUDA\v#.#
+Linux platform:
+/usr/local/cuda-#.#
+Mac platform:
+/Developer/NVIDIA/CUDA-#.#
+NVIDIA CUDA Samples
+Description
+This package includes over 100+ CUDA examples that demonstrate
+various CUDA programming principles, and efficient CUDA
+implementation of algorithms in specific application domains.
+Default Install Location of CUDA Samples
+Windows platform:
+%ProgramData%\NVIDIA Corporation\CUDA Samples\v#.#
+Linux platform:
+/usr/local/cuda-#.#/samples
+and
+$HOME/NVIDIA_CUDA-#.#_Samples
+Mac platform:
+/Developer/NVIDIA/CUDA-#.#/samples
+NVIDIA Nsight Visual Studio Edition (Windows only)
+Description
+NVIDIA Nsight Development Platform, Visual Studio Edition is a
+development environment integrated into Microsoft Visual
+Studio that provides tools for debugging, profiling, analyzing
+and optimizing your GPU computing and graphics applications.
+Default Install Location of Nsight Visual Studio Edition
+Windows platform:
+%ProgramFiles(x86)%\NVIDIA Corporation\Nsight Visual Studio Edition #.#
+1. License Agreement for NVIDIA Software Development Kits
+---------------------------------------------------------
+Release Date: July 26, 2018
+---------------------------
+Important NoticeRead before downloading, installing,
+copying or using the licensed software:
+-------------------------------------------------------
+This license agreement, including exhibits attached
+("Agreement”) is a legal agreement between you and NVIDIA
+Corporation ("NVIDIA") and governs your use of a NVIDIA
+software development kit (“SDK”).
+Each SDK has its own set of software and materials, but here
+is a description of the types of items that may be included in
+a SDK: source code, header files, APIs, data sets and assets
+(examples include images, textures, models, scenes, videos,
+native API input/output files), binary software, sample code,
+libraries, utility programs, programming code and
+documentation.
+This Agreement can be accepted only by an adult of legal age
+of majority in the country in which the SDK is used.
+If you are entering into this Agreement on behalf of a company
+or other legal entity, you represent that you have the legal
+authority to bind the entity to this Agreement, in which case
+“you” will mean the entity you represent.
+If you don’t have the required age or authority to accept
+this Agreement, or if you don’t accept all the terms and
+conditions of this Agreement, do not download, install or use
+the SDK.
+You agree to use the SDK only for purposes that are permitted
+by (a) this Agreement, and (b) any applicable law, regulation
+or generally accepted practices or guidelines in the relevant
+jurisdictions.
+1.1. License
+1.1.1. License Grant
+Subject to the terms of this Agreement, NVIDIA hereby grants
+you a non-exclusive, non-transferable license, without the
+right to sublicense (except as expressly provided in this
+Agreement) to:
+  1. Install and use the SDK,
+  2. Modify and create derivative works of sample source code
+    delivered in the SDK, and
+  3. Distribute those portions of the SDK that are identified
+    in this Agreement as distributable, as incorporated in
+    object code format into a software application that meets
+    the distribution requirements indicated in this Agreement.
+1.1.2. Distribution Requirements
+These are the distribution requirements for you to exercise
+the distribution grant:
+  1. Your application must have material additional
+    functionality, beyond the included portions of the SDK.
+  2. The distributable portions of the SDK shall only be
+    accessed by your application.
+  3. The following notice shall be included in modifications
+    and derivative works of sample source code distributed:
+    “This software contains source code provided by NVIDIA
+    Corporation.”
+  4. Unless a developer tool is identified in this Agreement
+    as distributable, it is delivered for your internal use
+    only.
+  5. The terms under which you distribute your application
+    must be consistent with the terms of this Agreement,
+    including (without limitation) terms relating to the
+    license grant and license restrictions and protection of
+    NVIDIA’s intellectual property rights. Additionally, you
+    agree that you will protect the privacy, security and
+    legal rights of your application users.
+  6. You agree to notify NVIDIA in writing of any known or
+    suspected distribution or use of the SDK not in compliance
+    with the requirements of this Agreement, and to enforce
+    the terms of your agreements with respect to distributed
+    SDK.
+1.1.3. Authorized Users
+You may allow employees and contractors of your entity or of
+your subsidiary(ies) to access and use the SDK from your
+secure network to perform work on your behalf.
+If you are an academic institution you may allow users
+enrolled or employed by the academic institution to access and
+use the SDK from your secure network.
+You are responsible for the compliance with the terms of this
+Agreement by your authorized users. If you become aware that
+your authorized users didn’t follow the terms of this
+Agreement, you agree to take reasonable steps to resolve the
+non-compliance and prevent new occurrences.
+1.1.4. Pre-Release SDK
+The SDK versions identified as alpha, beta, preview or
+otherwise as pre-release, may not be fully functional, may
+contain errors or design flaws, and may have reduced or
+different security, privacy, accessibility, availability, and
+reliability standards relative to commercial versions of
+NVIDIA software and materials. Use of a pre-release SDK may
+result in unexpected results, loss of data, project delays or
+other unpredictable damage or loss.
+You may use a pre-release SDK at your own risk, understanding
+that pre-release SDKs are not intended for use in production
+or business-critical systems.
+NVIDIA may choose not to make available a commercial version
+of any pre-release SDK. NVIDIA may also choose to abandon
+development and terminate the availability of a pre-release
+SDK at any time without liability.
+1.1.5. Updates
+NVIDIA may, at its option, make available patches, workarounds
+or other updates to this SDK. Unless the updates are provided
+with their separate governing terms, they are deemed part of
+the SDK licensed to you as provided in this Agreement. You
+agree that the form and content of the SDK that NVIDIA
+provides may change without prior notice to you. While NVIDIA
+generally maintains compatibility between versions, NVIDIA may
+in some cases make changes that introduce incompatibilities in
+future versions of the SDK.
+1.1.6. Third Party Licenses
+The SDK may come bundled with, or otherwise include or be
+distributed with, third party software licensed by a NVIDIA
+supplier and/or open source software provided under an open
+source license. Use of third party software is subject to the
+third-party license terms, or in the absence of third party
+terms, the terms of this Agreement. Copyright to third party
+software is held by the copyright holders indicated in the
+third-party software or license.
+1.1.7. Reservation of Rights
+NVIDIA reserves all rights, title, and interest in and to the
+SDK, not expressly granted to you under this Agreement.
+1.2. Limitations
+The following license limitations apply to your use of the
+SDK:
+  1. You may not reverse engineer, decompile or disassemble,
+    or remove copyright or other proprietary notices from any
+    portion of the SDK or copies of the SDK.
+  2. Except as expressly provided in this Agreement, you may
+    not copy, sell, rent, sublicense, transfer, distribute,
+    modify, or create derivative works of any portion of the
+    SDK. For clarity, you may not distribute or sublicense the
+    SDK as a stand-alone product.
+  3. Unless you have an agreement with NVIDIA for this
+    purpose, you may not indicate that an application created
+    with the SDK is sponsored or endorsed by NVIDIA.
+  4. You may not bypass, disable, or circumvent any
+    encryption, security, digital rights management or
+    authentication mechanism in the SDK.
+  5. You may not use the SDK in any manner that would cause it
+    to become subject to an open source software license. As
+    examples, licenses that require as a condition of use,
+    modification, and/or distribution that the SDK be:
+      a. Disclosed or distributed in source code form;
+      b. Licensed for the purpose of making derivative works;
+        or
+      c. Redistributable at no charge.
+  6. Unless you have an agreement with NVIDIA for this
+    purpose, you may not use the SDK with any system or
+    application where the use or failure of the system or
+    application can reasonably be expected to threaten or
+    result in personal injury, death, or catastrophic loss.
+    Examples include use in avionics, navigation, military,
+    medical, life support or other life critical applications.
+    NVIDIA does not design, test or manufacture the SDK for
+    these critical uses and NVIDIA shall not be liable to you
+    or any third party, in whole or in part, for any claims or
+    damages arising from such uses.
+  7. You agree to defend, indemnify and hold harmless NVIDIA
+    and its affiliates, and their respective employees,
+    contractors, agents, officers and directors, from and
+    against any and all claims, damages, obligations, losses,
+    liabilities, costs or debt, fines, restitutions and
+    expenses (including but not limited to attorney’s fees
+    and costs incident to establishing the right of
+    indemnification) arising out of or related to your use of
+    the SDK outside of the scope of this Agreement, or not in
+    compliance with its terms.
+1.3. Ownership
+  1.  NVIDIA or its licensors hold all rights, title and
+    interest in and to the SDK and its modifications and
+    derivative works, including their respective intellectual
+    property rights, subject to your rights described in this
+    section. This SDK may include software and materials from
+    NVIDIA’s licensors, and these licensors are intended
+    third party beneficiaries that may enforce this Agreement
+    with respect to their intellectual property rights.
+  2.  You hold all rights, title and interest in and to your
+    applications and your derivative works of the sample
+    source code delivered in the SDK, including their
+    respective intellectual property rights, subject to
+    NVIDIA’s rights described in this section.
+  3. You may, but don’t have to, provide to NVIDIA
+    suggestions, feature requests or other feedback regarding
+    the SDK, including possible enhancements or modifications
+    to the SDK. For any feedback that you voluntarily provide,
+    you hereby grant NVIDIA and its affiliates a perpetual,
+    non-exclusive, worldwide, irrevocable license to use,
+    reproduce, modify, license, sublicense (through multiple
+    tiers of sublicensees), and distribute (through multiple
+    tiers of distributors) it without the payment of any
+    royalties or fees to you. NVIDIA will use feedback at its
+    choice. NVIDIA is constantly looking for ways to improve
+    its products, so you may send feedback to NVIDIA through
+    the developer portal at https://developer.nvidia.com.
+1.4. No Warranties
+THE SDK IS PROVIDED BY NVIDIA “AS IS” AND “WITH ALL
+FAULTS.” TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND
+ITS AFFILIATES EXPRESSLY DISCLAIM ALL WARRANTIES OF ANY KIND
+OR NATURE, WHETHER EXPRESS, IMPLIED OR STATUTORY, INCLUDING,
+BUT NOT LIMITED TO, ANY WARRANTIES OF MERCHANTABILITY, FITNESS
+FOR A PARTICULAR PURPOSE, TITLE, NON-INFRINGEMENT, OR THE
+ABSENCE OF ANY DEFECTS THEREIN, WHETHER LATENT OR PATENT. NO
+WARRANTY IS MADE ON THE BASIS OF TRADE USAGE, COURSE OF
+DEALING OR COURSE OF TRADE.
+1.5. Limitation of Liability
+TO THE MAXIMUM EXTENT PERMITTED BY LAW, NVIDIA AND ITS
+AFFILIATES SHALL NOT BE LIABLE FOR ANY SPECIAL, INCIDENTAL,
+PUNITIVE OR CONSEQUENTIAL DAMAGES, OR ANY LOST PROFITS, LOSS
+OF USE, LOSS OF DATA OR LOSS OF GOODWILL, OR THE COSTS OF
+PROCURING SUBSTITUTE PRODUCTS, ARISING OUT OF OR IN CONNECTION
+WITH THIS AGREEMENT OR THE USE OR PERFORMANCE OF THE SDK,
+WHETHER SUCH LIABILITY ARISES FROM ANY CLAIM BASED UPON BREACH
+OF CONTRACT, BREACH OF WARRANTY, TORT (INCLUDING NEGLIGENCE),
+PRODUCT LIABILITY OR ANY OTHER CAUSE OF ACTION OR THEORY OF
+LIABILITY. IN NO EVENT WILL NVIDIA’S AND ITS AFFILIATES
+TOTAL CUMULATIVE LIABILITY UNDER OR ARISING OUT OF THIS
+AGREEMENT EXCEED US$10.00. THE NATURE OF THE LIABILITY OR THE
+NUMBER OF CLAIMS OR SUITS SHALL NOT ENLARGE OR EXTEND THIS
+LIMIT.
+These exclusions and limitations of liability shall apply
+regardless if NVIDIA or its affiliates have been advised of
+the possibility of such damages, and regardless of whether a
+remedy fails its essential purpose. These exclusions and
+limitations of liability form an essential basis of the
+bargain between the parties, and, absent any of these
+exclusions or limitations of liability, the provisions of this
+Agreement, including, without limitation, the economic terms,
+would be substantially different.
+1.6. Termination
+  1. This Agreement will continue to apply until terminated by
+    either you or NVIDIA as described below.
+  2. If you want to terminate this Agreement, you may do so by
+    stopping to use the SDK.
+  3. NVIDIA may, at any time, terminate this Agreement if:
+      a. (i) you fail to comply with any term of this
+        Agreement and the non-compliance is not fixed within
+        thirty (30) days following notice from NVIDIA (or
+        immediately if you violate NVIDIA’s intellectual
+        property rights);
+      b. (ii) you commence or participate in any legal
+        proceeding against NVIDIA with respect to the SDK; or
+      c. (iii) NVIDIA decides to no longer provide the SDK in
+        a country or, in NVIDIA’s sole discretion, the
+        continued use of it is no longer commercially viable.
+  4. Upon any termination of this Agreement, you agree to
+    promptly discontinue use of the SDK and destroy all copies
+    in your possession or control. Your prior distributions in
+    accordance with this Agreement are not affected by the
+    termination of this Agreement. Upon written request, you
+    will certify in writing that you have complied with your
+    commitments under this section. Upon any termination of
+    this Agreement all provisions survive except for the
+    license grant provisions.
+1.7. General
+If you wish to assign this Agreement or your rights and
+obligations, including by merger, consolidation, dissolution
+or operation of law, contact NVIDIA to ask for permission. Any
+attempted assignment not approved by NVIDIA in writing shall
+be void and of no effect. NVIDIA may assign, delegate or
+transfer this Agreement and its rights and obligations, and if
+to a non-affiliate you will be notified.
+You agree to cooperate with NVIDIA and provide reasonably
+requested information to verify your compliance with this
+Agreement.
+This Agreement will be governed in all respects by the laws of
+the United States and of the State of Delaware as those laws
+are applied to contracts entered into and performed entirely
+within Delaware by Delaware residents, without regard to the
+conflicts of laws principles. The United Nations Convention on
+Contracts for the International Sale of Goods is specifically
+disclaimed. You agree to all terms of this Agreement in the
+English language.
+The state or federal courts residing in Santa Clara County,
+California shall have exclusive jurisdiction over any dispute
+or claim arising out of this Agreement. Notwithstanding this,
+you agree that NVIDIA shall still be allowed to apply for
+injunctive remedies or an equivalent type of urgent legal
+relief in any jurisdiction.
+If any court of competent jurisdiction determines that any
+provision of this Agreement is illegal, invalid or
+unenforceable, such provision will be construed as limited to
+the extent necessary to be consistent with and fully
+enforceable under the law and the remaining provisions will
+remain in full force and effect. Unless otherwise specified,
+remedies are cumulative.
+Each party acknowledges and agrees that the other is an
+independent contractor in the performance of this Agreement.
+The SDK has been developed entirely at private expense and is
+“commercial items” consisting of “commercial computer
+software” and “commercial computer software
+documentation” provided with RESTRICTED RIGHTS. Use,
+duplication or disclosure by the U.S. Government or a U.S.
+Government subcontractor is subject to the restrictions in
+this Agreement pursuant to DFARS 227.7202-3(a) or as set forth
+in subparagraphs (c)(1) and (2) of the Commercial Computer
+Software - Restricted Rights clause at FAR 52.227-19, as
+applicable. Contractor/manufacturer is NVIDIA, 2788 San Tomas
+Expressway, Santa Clara, CA 95051.
+The SDK is subject to United States export laws and
+regulations. You agree that you will not ship, transfer or
+export the SDK into any country, or use the SDK in any manner,
+prohibited by the United States Bureau of Industry and
+Security or economic sanctions regulations administered by the
+U.S. Department of Treasury’s Office of Foreign Assets
+Control (OFAC), or any applicable export laws, restrictions or
+regulations. These laws include restrictions on destinations,
+end users and end use. By accepting this Agreement, you
+confirm that you are not a resident or citizen of any country
+currently embargoed by the U.S. and that you are not otherwise
+prohibited from receiving the SDK.
+Any notice delivered by NVIDIA to you under this Agreement
+will be delivered via mail, email or fax. You agree that any
+notices that NVIDIA sends you electronically will satisfy any
+legal communication requirements. Please direct your legal
+notices or other correspondence to NVIDIA Corporation, 2788
+San Tomas Expressway, Santa Clara, California 95051, United
+States of America, Attention: Legal Department.
+This Agreement and any exhibits incorporated into this
+Agreement constitute the entire agreement of the parties with
+respect to the subject matter of this Agreement and supersede
+all prior negotiations or documentation exchanged between the
+parties relating to this SDK license. Any additional and/or
+conflicting terms on documents issued by you are null, void,
+and invalid. Any amendment or waiver under this Agreement
+shall be in writing and signed by representatives of both
+parties.
+2. CUDA Toolkit Supplement to Software License Agreement for
+NVIDIA Software Development Kits
+------------------------------------------------------------
+Release date: August 16, 2018
+-----------------------------
+The terms in this supplement govern your use of the NVIDIA
+CUDA Toolkit SDK under the terms of your license agreement
+(“Agreement”) as modified by this supplement. Capitalized
+terms used but not defined below have the meaning assigned to
+them in the Agreement.
+This supplement is an exhibit to the Agreement and is
+incorporated as an integral part of the Agreement. In the
+event of conflict between the terms in this supplement and the
+terms in the Agreement, the terms in this supplement govern.
+2.1. License Scope
+The SDK is licensed for you to develop applications only for
+use in systems with NVIDIA GPUs.
+2.2. Distribution
+The portions of the SDK that are distributable under the
+Agreement are listed in Attachment A.
+2.3. Operating Systems
+Those portions of the SDK designed exclusively for use on the
+Linux or FreeBSD operating systems, or other operating systems
+derived from the source code to these operating systems, may
+be copied and redistributed for use in accordance with this
+Agreement, provided that the object code files are not
+modified in any way (except for unzipping of compressed
+files).
+2.4. Audio and Video Encoders and Decoders
+You acknowledge and agree that it is your sole responsibility
+to obtain any additional third-party licenses required to
+make, have made, use, have used, sell, import, and offer for
+sale your products or services that include or incorporate any
+third-party software and content relating to audio and/or
+video encoders and decoders from, including but not limited
+to, Microsoft, Thomson, Fraunhofer IIS, Sisvel S.p.A.,
+MPEG-LA, and Coding Technologies. NVIDIA does not grant to you
+under this Agreement any necessary patent or other rights with
+respect to any audio and/or video encoders and decoders.
+2.5. Licensing
+If the distribution terms in this Agreement are not suitable
+for your organization, or for any questions regarding this
+Agreement, please contact NVIDIA at
+nvidia-compute-license-questions@nvidia.com.
+2.6. Attachment A
+The following portions of the SDK are distributable under the
+Agreement:
+Component
+CUDA Runtime
+Windows
+cudart.dll, cudart_static.lib, cudadevrt.lib
+Mac OSX
+libcudart.dylib, libcudart_static.a, libcudadevrt.a
+Linux
+libcudart.so, libcudart_static.a, libcudadevrt.a
+Android
+libcudart.so, libcudart_static.a, libcudadevrt.a
+Component
+CUDA FFT Library
+Windows
+cufft.dll, cufftw.dll, cufft.lib, cufftw.lib
+Mac OSX
+libcufft.dylib, libcufft_static.a, libcufftw.dylib,
+libcufftw_static.a
+Linux
+libcufft.so, libcufft_static.a, libcufftw.so,
+libcufftw_static.a
+Android
+libcufft.so, libcufft_static.a, libcufftw.so,
+libcufftw_static.a
+Component
+CUDA BLAS Library
+Windows
+cublas.dll, cublasLt.dll
+Mac OSX
+libcublas.dylib, libcublasLt.dylib, libcublas_static.a,
+libcublasLt_static.a
+Linux
+libcublas.so, libcublasLt.so, libcublas_static.a,
+libcublasLt_static.a
+Android
+libcublas.so, libcublasLt.so, libcublas_static.a,
+libcublasLt_static.a
+Component
+NVIDIA "Drop-in" BLAS Library
+Windows
+nvblas.dll
+Mac OSX
+libnvblas.dylib
+Linux
+libnvblas.so
+Component
+CUDA Sparse Matrix Library
+Windows
+cusparse.dll, cusparse.lib
+Mac OSX
+libcusparse.dylib, libcusparse_static.a
+Linux
+libcusparse.so, libcusparse_static.a
+Android
+libcusparse.so, libcusparse_static.a
+Component
+CUDA Linear Solver Library
+Windows
+cusolver.dll, cusolver.lib
+Mac OSX
+libcusolver.dylib, libcusolver_static.a
+Linux
+libcusolver.so, libcusolver_static.a
+Android
+libcusolver.so, libcusolver_static.a
+Component
+CUDA Random Number Generation Library
+Windows
+curand.dll, curand.lib
+Mac OSX
+libcurand.dylib, libcurand_static.a
+Linux
+libcurand.so, libcurand_static.a
+Android
+libcurand.so, libcurand_static.a
+Component
+CUDA Accelerated Graph Library
+Component
+NVIDIA Performance Primitives Library
+Windows
+nppc.dll, nppc.lib, nppial.dll, nppial.lib, nppicc.dll,
+nppicc.lib, nppicom.dll, nppicom.lib, nppidei.dll,
+nppidei.lib, nppif.dll, nppif.lib, nppig.dll, nppig.lib,
+nppim.dll, nppim.lib, nppist.dll, nppist.lib, nppisu.dll,
+nppisu.lib, nppitc.dll, nppitc.lib, npps.dll, npps.lib
+Mac OSX
+libnppc.dylib, libnppc_static.a, libnppial.dylib,
+libnppial_static.a, libnppicc.dylib, libnppicc_static.a,
+libnppicom.dylib, libnppicom_static.a, libnppidei.dylib,
+libnppidei_static.a, libnppif.dylib, libnppif_static.a,
+libnppig.dylib, libnppig_static.a, libnppim.dylib,
+libnppisu_static.a, libnppitc.dylib, libnppitc_static.a,
+libnpps.dylib, libnpps_static.a
+Linux
+libnppc.so, libnppc_static.a, libnppial.so,
+libnppial_static.a, libnppicc.so, libnppicc_static.a,
+libnppicom.so, libnppicom_static.a, libnppidei.so,
+libnppidei_static.a, libnppif.so, libnppif_static.a
+libnppig.so, libnppig_static.a, libnppim.so,
+libnppim_static.a, libnppist.so, libnppist_static.a,
+libnppisu.so, libnppisu_static.a, libnppitc.so
+libnppitc_static.a, libnpps.so, libnpps_static.a
+Android
+libnppc.so, libnppc_static.a, libnppial.so,
+libnppial_static.a, libnppicc.so, libnppicc_static.a,
+libnppicom.so, libnppicom_static.a, libnppidei.so,
+libnppidei_static.a, libnppif.so, libnppif_static.a
+libnppig.so, libnppig_static.a, libnppim.so,
+libnppim_static.a, libnppist.so, libnppist_static.a,
+libnppisu.so, libnppisu_static.a, libnppitc.so
+libnppitc_static.a, libnpps.so, libnpps_static.a
+Component
+NVIDIA JPEG Library
+Linux
+libnvjpeg.so, libnvjpeg_static.a
+Component
+Internal common library required for statically linking to
+cuBLAS, cuSPARSE, cuFFT, cuRAND, nvJPEG and NPP
+Mac OSX
+libculibos.a
+Linux
+libculibos.a
+Component
+NVIDIA Runtime Compilation Library and Header
+All
+nvrtc.h
+Windows
+nvrtc.dll, nvrtc-builtins.dll
+Mac OSX
+libnvrtc.dylib, libnvrtc-builtins.dylib
+Linux
+libnvrtc.so, libnvrtc-builtins.so
+Component
+NVIDIA Optimizing Compiler Library
+Windows
+nvvm.dll
+Mac OSX
+libnvvm.dylib
+Linux
+libnvvm.so
+Component
+NVIDIA Common Device Math Functions Library
+Windows
+libdevice.10.bc
+Mac OSX
+libdevice.10.bc
+Linux
+libdevice.10.bc
+Component
+CUDA Occupancy Calculation Header Library
+All
+cuda_occupancy.h
+Component
+CUDA Half Precision Headers
+All
+cuda_fp16.h, cuda_fp16.hpp
+Component
+CUDA Profiling Tools Interface (CUPTI) Library
+Windows
+cupti.dll
+Mac OSX
+libcupti.dylib
+Linux
+libcupti.so
+Component
+NVIDIA Tools Extension Library
+Windows
+nvToolsExt.dll, nvToolsExt.lib
+Mac OSX
+libnvToolsExt.dylib
+Linux
+libnvToolsExt.so
+Component
+NVIDIA CUDA Driver Libraries
+Linux
+libcuda.so, libnvidia-fatbinaryloader.so,
+libnvidia-ptxjitcompiler.so
+The NVIDIA CUDA Driver Libraries are only distributable in
+applications that meet this criteria:
+  1. The application was developed starting from a NVIDIA CUDA
+    container obtained from Docker Hub or the NVIDIA GPU
+    Cloud, and
+  2. The resulting application is packaged as a Docker
+    container and distributed to users on Docker Hub or the
+    NVIDIA GPU Cloud only.
+2.7. Attachment B
+Additional Licensing Obligations
+The following third party components included in the SOFTWARE
+are licensed to Licensee pursuant to the following terms and
+conditions:
+  1. Licensee's use of the GDB third party component is
+    subject to the terms and conditions of GNU GPL v3:
+    This product includes copyrighted third-party software licensed
+    under the terms of the GNU General Public License v3 ("GPL v3").
+    All third-party software packages are copyright by their respective
+    authors. GPL v3 terms and conditions are hereby incorporated into
+    the Agreement by this reference:     http://www.gnu.org/licenses/gpl.txt
+    Consistent with these licensing requirements, the software
+    listed below is provided under the terms of the specified
+    open source software licenses. To obtain source code for
+    software provided under licenses that require
+    redistribution of source code, including the GNU General
+    Public License (GPL) and GNU Lesser General Public License
+    (LGPL), contact oss-requests@nvidia.com. This offer is
+    valid for a period of three (3) years from the date of the
+    distribution of this product by NVIDIA CORPORATION.
+    Component          License
+    CUDA-GDB           GPL v3
+  2. Licensee represents and warrants that any and all third
+    party licensing and/or royalty payment obligations in
+    connection with Licensee's use of the H.264 video codecs
+    are solely the responsibility of Licensee.
+  3. Licensee's use of the Thrust library is subject to the
+    terms and conditions of the Apache License Version 2.0.
+    All third-party software packages are copyright by their
+    respective authors. Apache License Version 2.0 terms and
+    conditions are hereby incorporated into the Agreement by
+    this reference.
+    http://www.apache.org/licenses/LICENSE-2.0.html
+    In addition, Licensee acknowledges the following notice:
+    Thrust includes source code from the Boost Iterator,
+    Tuple, System, and Random Number libraries.
+    Boost Software License - Version 1.0 - August 17th, 2003
+    . . . .
+    Permission is hereby granted, free of charge, to any person or
+    organization obtaining a copy of the software and accompanying
+    documentation covered by this license (the "Software") to use,
+    reproduce, display, distribute, execute, and transmit the Software,
+    and to prepare derivative works of the Software, and to permit
+    third-parties to whom the Software is furnished to do so, all
+    subject to the following:
+    The copyright notices in the Software and this entire statement,
+    including the above license grant, this restriction and the following
+    disclaimer, must be included in all copies of the Software, in whole
+    or in part, and all derivative works of the Software, unless such
+    copies or derivative works are solely in the form of machine-executable
+    object code generated by a source language processor.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+    NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+    ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
+    OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+  4. Licensee's use of the LLVM third party component is
+    subject to the following terms and conditions:
+    ======================================================
+    LLVM Release License
+    ======================================================
+    University of Illinois/NCSA
+    Open Source License
+    Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
+    All rights reserved.
+    Developed by:
+        LLVM Team
+        University of Illinois at Urbana-Champaign
+        http://llvm.org
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to
+    deal with the Software without restriction, including without limitation the
+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+    sell copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    *  Redistributions of source code must retain the above copyright notice,
+       this list of conditions and the following disclaimers.
+    *  Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimers in the
+       documentation and/or other materials provided with the distribution.
+    *  Neither the names of the LLVM Team, University of Illinois at Urbana-
+       Champaign, nor the names of its contributors may be used to endorse or
+       promote products derived from this Software without specific prior
+       written permission.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+    THE CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+    OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+    ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+    DEALINGS WITH THE SOFTWARE.
+  5. Licensee's use (e.g. nvprof) of the PCRE third party
+    component is subject to the following terms and
+    conditions:
+    ------------
+    PCRE LICENCE
+    ------------
+    PCRE is a library of functions to support regular expressions whose syntax
+    and semantics are as close as possible to those of the Perl 5 language.
+    Release 8 of PCRE is distributed under the terms of the "BSD" licence, as
+    specified below. The documentation for PCRE, supplied in the "doc"
+    directory, is distributed under the same terms as the software itself. The
+    basic library functions are written in C and are freestanding. Also
+    included in the distribution is a set of C++ wrapper functions, and a just-
+    in-time compiler that can be used to optimize pattern matching. These are
+    both optional features that can be omitted when the library is built.
+    THE BASIC LIBRARY FUNCTIONS
+    ---------------------------
+    Written by:       Philip Hazel
+    Email local part: ph10
+    Email domain:     cam.ac.uk
+    University of Cambridge Computing Service,
+    Cambridge, England.
+    Copyright (c) 1997-2012 University of Cambridge
+    All rights reserved.
+    PCRE JUST-IN-TIME COMPILATION SUPPORT
+    -------------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Emain domain:     freemail.hu
+    Copyright(c) 2010-2012 Zoltan Herczeg
+    All rights reserved.
+    STACK-LESS JUST-IN-TIME COMPILER
+    --------------------------------
+    Written by:       Zoltan Herczeg
+    Email local part: hzmester
+    Emain domain:     freemail.hu
+    Copyright(c) 2009-2012 Zoltan Herczeg
+    All rights reserved.
+    THE C++ WRAPPER FUNCTIONS
+    -------------------------
+    Contributed by:   Google Inc.
+    Copyright (c) 2007-2012, Google Inc.
+    All rights reserved.
+    THE "BSD" LICENCE
+    -----------------
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are met:
+      * Redistributions of source code must retain the above copyright notice,
+        this list of conditions and the following disclaimer.
+      * Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+      * Neither the name of the University of Cambridge nor the name of Google
+        Inc. nor the names of their contributors may be used to endorse or
+        promote products derived from this software without specific prior
+        written permission.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+  6. Some of the cuBLAS library routines were written by or
+    derived from code written by Vasily Volkov and are subject
+    to the Modified Berkeley Software Distribution License as
+    follows:
+    Copyright (c) 2007-2009, Regents of the University of California
+    All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the University of California, Berkeley nor
+          the names of its contributors may be used to endorse or promote
+          products derived from this software without specific prior
+          written permission.
+    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+  7. Some of the cuBLAS library routines were written by or
+    derived from code written by Davide Barbieri and are
+    subject to the Modified Berkeley Software Distribution
+    License as follows:
+    Copyright (c) 2008-2009 Davide Barbieri @ University of Rome Tor Vergata.
+    All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * The name of the author may not be used to endorse or promote
+          products derived from this software without specific prior
+          written permission.
+    THIS SOFTWARE IS PROVIDED BY THE AUTHOR "AS IS" AND ANY EXPRESS OR
+    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+    WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
+    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
+    IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+    POSSIBILITY OF SUCH DAMAGE.
+  8. Some of the cuBLAS library routines were derived from
+    code developed by the University of Tennessee and are
+    subject to the Modified Berkeley Software Distribution
+    License as follows:
+    Copyright (c) 2010 The University of Tennessee.
+    All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer listed in this license in the documentation and/or
+          other materials provided with the distribution.
+        * Neither the name of the copyright holders nor the names of its
+          contributors may be used to endorse or promote products derived
+          from this software without specific prior written permission.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  9. Some of the cuBLAS library routines were written by or
+    derived from code written by Jonathan Hogg and are subject
+    to the Modified Berkeley Software Distribution License as
+    follows:
+    Copyright (c) 2012, The Science and Technology Facilities Council (STFC).
+    All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the STFC nor the names of its contributors
+          may be used to endorse or promote products derived from this
+          software without specific prior written permission.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE STFC BE
+    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+    BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+    WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+    OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
+    IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  10. Some of the cuBLAS library routines were written by or
+    derived from code written by Ahmad M. Abdelfattah, David
+    Keyes, and Hatem Ltaief, and are subject to the Apache
+    License, Version 2.0, as follows:
+     -- (C) Copyright 2013 King Abdullah University of Science and Technology
+      Authors:
+      Ahmad Abdelfattah (ahmad.ahmad@kaust.edu.sa)
+      David Keyes (david.keyes@kaust.edu.sa)
+      Hatem Ltaief (hatem.ltaief@kaust.edu.sa)
+      Redistribution  and  use  in  source and binary forms, with or without
+      modification,  are  permitted  provided  that the following conditions
+      are met:
+      * Redistributions  of  source  code  must  retain  the above copyright
+        notice,  this  list  of  conditions  and  the  following  disclaimer.
+      * Redistributions  in  binary  form must reproduce the above copyright
+        notice,  this list of conditions and the following disclaimer in the
+        documentation  and/or other materials provided with the distribution.
+      * Neither  the  name of the King Abdullah University of Science and
+        Technology nor the names of its contributors may be used to endorse
+        or promote products derived from this software without specific prior
+        written permission.
+      THIS  SOFTWARE  IS  PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+      ``AS IS''  AND  ANY  EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+      LIMITED  TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+      A  PARTICULAR  PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+      HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+      SPECIAL,  EXEMPLARY,  OR  CONSEQUENTIAL  DAMAGES  (INCLUDING,  BUT NOT
+      LIMITED  TO,  PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+      DATA,  OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+      THEORY  OF  LIABILITY,  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+      (INCLUDING  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+      OF  THIS  SOFTWARE,  EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
+  11. Some of the cuSPARSE library routines were written by or
+    derived from code written by Li-Wen Chang and are subject
+    to the NCSA Open Source License as follows:
+    Copyright (c) 2012, University of Illinois.
+    All rights reserved.
+    Developed by: IMPACT Group, University of Illinois, http://impact.crhc.illinois.edu
+    Permission is hereby granted, free of charge, to any person obtaining
+    a copy of this software and associated documentation files (the
+    "Software"), to deal with the Software without restriction, including
+    without limitation the rights to use, copy, modify, merge, publish,
+    distribute, sublicense, and/or sell copies of the Software, and to
+    permit persons to whom the Software is furnished to do so, subject to
+    the following conditions:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimers in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the names of IMPACT Group, University of Illinois, nor
+          the names of its contributors may be used to endorse or promote
+          products derived from this Software without specific prior
+          written permission.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+    NONINFRINGEMENT. IN NO EVENT SHALL THE CONTRIBUTORS OR COPYRIGHT
+    HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+    IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
+    IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
+    SOFTWARE.
+  12. Some of the cuRAND library routines were written by or
+    derived from code written by Mutsuo Saito and Makoto
+    Matsumoto and are subject to the following license:
+    Copyright (c) 2009, 2010 Mutsuo Saito, Makoto Matsumoto and Hiroshima
+    University. All rights reserved.
+    Copyright (c) 2011 Mutsuo Saito, Makoto Matsumoto, Hiroshima
+    University and University of Tokyo.  All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of the Hiroshima University nor the names of
+          its contributors may be used to endorse or promote products
+          derived from this software without specific prior written
+          permission.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  13. Some of the cuRAND library routines were derived from
+    code developed by D. E. Shaw Research and are subject to
+    the following license:
+    Copyright 2010-2011, D. E. Shaw Research.
+    All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+          notice, this list of conditions, and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+          copyright notice, this list of conditions, and the following
+          disclaimer in the documentation and/or other materials provided
+          with the distribution.
+        * Neither the name of D. E. Shaw Research nor the names of its
+          contributors may be used to endorse or promote products derived
+          from this software without specific prior written permission.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  14. Some of the Math library routines were written by or
+    derived from code developed by Norbert Juffa and are
+    subject to the following license:
+    Copyright (c) 2015-2017, Norbert Juffa
+    All rights reserved.
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions
+    are met:
+    1. Redistributions of source code must retain the above copyright
+       notice, this list of conditions and the following disclaimer.
+    2. Redistributions in binary form must reproduce the above copyright
+       notice, this list of conditions and the following disclaimer in the
+       documentation and/or other materials provided with the distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  15. Licensee's use of the lz4 third party component is
+    subject to the following terms and conditions:
+    Copyright (C) 2011-2013, Yann Collet.
+    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
+    Redistribution and use in source and binary forms, with or without
+    modification, are permitted provided that the following conditions are
+    met:
+        * Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+        * Redistributions in binary form must reproduce the above
+    copyright notice, this list of conditions and the following disclaimer
+    in the documentation and/or other materials provided with the
+    distribution.
+    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  16. The NPP library uses code from the Boost Math Toolkit,
+    and is subject to the following license:
+    Boost Software License - Version 1.0 - August 17th, 2003
+    . . . .
+    Permission is hereby granted, free of charge, to any person or
+    organization obtaining a copy of the software and accompanying
+    documentation covered by this license (the "Software") to use,
+    reproduce, display, distribute, execute, and transmit the Software,
+    and to prepare derivative works of the Software, and to permit
+    third-parties to whom the Software is furnished to do so, all
+    subject to the following:
+    The copyright notices in the Software and this entire statement,
+    including the above license grant, this restriction and the following
+    disclaimer, must be included in all copies of the Software, in whole
+    or in part, and all derivative works of the Software, unless such
+    copies or derivative works are solely in the form of machine-executable
+    object code generated by a source language processor.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE AND
+    NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR
+    ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE FOR ANY DAMAGES OR
+    OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, ARISING
+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+    OTHER DEALINGS IN THE SOFTWARE.
+  17. Portions of the Nsight Eclipse Edition is subject to the
+    following license:
+    The Eclipse Foundation makes available all content in this plug-in
+    ("Content"). Unless otherwise indicated below, the Content is provided
+    to you under the terms and conditions of the Eclipse Public License
+    Version 1.0 ("EPL"). A copy of the EPL is available at http://
+    www.eclipse.org/legal/epl-v10.html. For purposes of the EPL, "Program"
+    will mean the Content.
+    If you did not receive this Content directly from the Eclipse
+    Foundation, the Content is being redistributed by another party
+    ("Redistributor") and different terms and conditions may apply to your
+    use of any object code in the Content. Check the Redistributor's
+    license that was provided with the Content. If no such license exists,
+    contact the Redistributor. Unless otherwise indicated below, the terms
+    and conditions of the EPL still apply to any source code in the
+    Content and such source code may be obtained at http://www.eclipse.org.
+  18. Some of the cuBLAS library routines uses code from
+    OpenAI, which is subject to the following license:
+    License URL
+    https://github.com/openai/openai-gemm/blob/master/LICENSE
+    License Text
+    The MIT License
+    Copyright (c) 2016 OpenAI (http://openai.com), 2016 Google Inc.
+    Permission is hereby granted, free of charge, to any person obtaining a copy
+    of this software and associated documentation files (the "Software"), to deal
+    in the Software without restriction, including without limitation the rights
+    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+    copies of the Software, and to permit persons to whom the Software is
+    furnished to do so, subject to the following conditions:
+    The above copyright notice and this permission notice shall be included in
+    all copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+    THE SOFTWARE.
+  19. Licensee's use of the Visual Studio Setup Configuration
+    Samples is subject to the following license:
+    The MIT License (MIT)
+    Copyright (C) Microsoft Corporation. All rights reserved.
+    Permission is hereby granted, free of charge, to any person
+    obtaining a copy of this software and associated documentation
+    files (the "Software"), to deal in the Software without restriction,
+    including without limitation the rights to use, copy, modify, merge,
+    publish, distribute, sublicense, and/or sell copies of the Software,
+    and to permit persons to whom the Software is furnished to do so,
+    subject to the following conditions:
+    The above copyright notice and this permission notice shall be included
+    in all copies or substantial portions of the Software.
+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+    OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+  20. Licensee's use of linmath.h header for CPU functions for
+    GL vector/matrix operations from lunarG is subject to the
+    Apache License Version 2.0.
+  21. The DX12-CUDA sample uses the d3dx12.h header, which is
+    subject to the MIT license .
+-----------------

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/METADATA ADDED Viewed

	@@ -0,0 +1,44 @@

+Metadata-Version: 2.2
+Name: nvidia-curand-cu12
+Version: 10.3.9.90
+Summary: CURAND native runtime libraries
+Home-page: https://developer.nvidia.com/cuda-zone
+Author: Nvidia CUDA Installer Team
+Author-email: compute_installer@nvidia.com
+License: NVIDIA Proprietary Software
+Keywords: cuda,nvidia,runtime,machine learning,deep learning
+Classifier: Development Status :: 4 - Beta
+Classifier: Intended Audience :: Developers
+Classifier: Intended Audience :: Education
+Classifier: Intended Audience :: Science/Research
+Classifier: License :: Other/Proprietary License
+Classifier: Natural Language :: English
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
+Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
+Classifier: Programming Language :: Python :: 3.9
+Classifier: Programming Language :: Python :: 3.10
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3 :: Only
+Classifier: Topic :: Scientific/Engineering
+Classifier: Topic :: Scientific/Engineering :: Mathematics
+Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
+Classifier: Topic :: Software Development
+Classifier: Topic :: Software Development :: Libraries
+Classifier: Operating System :: Microsoft :: Windows
+Classifier: Operating System :: POSIX :: Linux
+Requires-Python: >=3
+License-File: License.txt
+Dynamic: author
+Dynamic: author-email
+Dynamic: classifier
+Dynamic: description
+Dynamic: home-page
+Dynamic: keywords
+Dynamic: license
+Dynamic: requires-python
+Dynamic: summary
+CURAND native runtime libraries

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/RECORD ADDED Viewed

	@@ -0,0 +1,32 @@

+nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/__pycache__/__init__.cpython-312.pyc,,
+nvidia/curand/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/curand/__pycache__/__init__.cpython-312.pyc,,
+nvidia/curand/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/curand/include/__pycache__/__init__.cpython-312.pyc,,
+nvidia/curand/include/curand.h,sha256=strQ9idlRTQoBJy_hAbAT4pgkW6BKYg8p_nUjbb8BVw,44075
+nvidia/curand/include/curand_discrete.h,sha256=2qD3BkI622XEu0444wVP7HeYkKAx0Rjr2HDhqU4SA7E,3486
+nvidia/curand/include/curand_discrete2.h,sha256=ZrQTO5R9x83AMX88uq7M8M94DLSC5VEz0PAkfcwtQeg,10883
+nvidia/curand/include/curand_globals.h,sha256=bES1Kx0NrATXk1DReMMkqWrB062nOnaAp39y22wViXU,3717
+nvidia/curand/include/curand_kernel.h,sha256=SjfAeh13ybXIxiekcgczzua02kIAqETopJKRhYvCat8,53133
+nvidia/curand/include/curand_lognormal.h,sha256=-X-iNkJSzWpAYYjogm689EJTZfzore9sxU7ObddljLk,28142
+nvidia/curand/include/curand_mrg32k3a.h,sha256=ZVVREjGNsJQJ-3IzZZ_LKGtGteslicb8E0Aly49BKPs,170296
+nvidia/curand/include/curand_mtgp32.h,sha256=Qhrmx0pHWF-P2Uu5bKwYE9ymEWq3c7qBzCITVMaKMfI,7845
+nvidia/curand/include/curand_mtgp32_host.h,sha256=SXqzmSQkzTLSRJ4pojTg_TNCC3T-G89HdBK-boSDqr4,18274
+nvidia/curand/include/curand_mtgp32_kernel.h,sha256=ajZnXr5ZXnQExElf6LPpigrrKPTmMIZbRyTEnJ-BDhw,13731
+nvidia/curand/include/curand_mtgp32dc_p_11213.h,sha256=7_gGYUH47UugIAEt60vYH5nFa-QUwTpDwSEgLg9cZts,276889
+nvidia/curand/include/curand_normal.h,sha256=lnmYVk2fn0oEVWOytdKhXrHL36GLCjMnB8OnZeCaYcA,26953
+nvidia/curand/include/curand_normal_static.h,sha256=5K4iTC9AuSWCe1LVxuj_0y3BVjtp0bxO6hndv2rbmiw,4727
+nvidia/curand/include/curand_philox4x32_x.h,sha256=T21IP-Rdg3_tSVU9Je4dLKuwEqE4ovfwi7r1hOY92Dw,7166
+nvidia/curand/include/curand_poisson.h,sha256=KrhXOmO_D7aclnj8geIyHqdpSQwWHurS9V_pVtgzodM,25461
+nvidia/curand/include/curand_precalc.h,sha256=I6NZdgT42fMm9qSCtP-rlOAqt4Zsqgal0ajktcPmEak,1392393
+nvidia/curand/include/curand_uniform.h,sha256=gpmRgQu5r6ppgLTg60NXoDdVJS6wMUy6jC5bh8l04e8,17472
+nvidia/curand/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/curand/lib/__pycache__/__init__.cpython-312.pyc,,
+nvidia/curand/lib/libcurand.so.10,sha256=-b6gOKJwO3IVcf1FopmomBQf2MsmSlkSY1yVEW9ZYP4,136749240
+nvidia_curand_cu12-10.3.9.90.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+nvidia_curand_cu12-10.3.9.90.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262
+nvidia_curand_cu12-10.3.9.90.dist-info/METADATA,sha256=fU3xSITD3i7JIsVG2ZXO5i-aDlIls-ry2JUVICEsv28,1684
+nvidia_curand_cu12-10.3.9.90.dist-info/RECORD,,
+nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL,sha256=VtFLEVB-VX8niQT4kQ5pcQOOqiKvUvqfZe5V14HmU88,109
+nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/WHEEL ADDED Viewed

	@@ -0,0 +1,5 @@

+Wheel-Version: 1.0
+Generator: setuptools (75.8.0)
+Root-Is-Purelib: true
+Tag: py3-none-manylinux_2_27_x86_64

Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/nvidia_curand_cu12-10.3.9.90.dist-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ nvidia