diff --git a/.gitignore b/.gitignore index b641685e34f2d11700d9ad75ceaf21fa65f2f665..10a199b667ef32d75f52423f819da4dbb9058df8 100644 Binary files a/.gitignore and b/.gitignore differ diff --git a/Dockerfile b/Dockerfile index 165497eb893ca8aeae6f6f28191e906d973fa278..4aaa8cb4a808c4490e62cc3e95d0fc5a91c8e48c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,15 +1,33 @@ -FROM python:3.10-slim +FROM python:3.13-slim -# System dependencies for bitsandbytes +# Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ build-essential \ && rm -rf /var/lib/apt/lists/* -WORKDIR /app +# Hugging Face Spaces requires running as a non-root user (UID 1000) +RUN useradd -m -u 1000 user +USER user +ENV HOME=/home/user \ + PATH=/home/user/.local/bin:$PATH -COPY requirements.txt . +WORKDIR $HOME/app + +# Copy requirements +COPY --chown=user requirements.txt . + +# Install PyTorch 2.11 with cu126 (matching our local battle-tested environment) +RUN pip install --no-cache-dir torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu126 --upgrade + +# Install remaining requirements and Triton explicitly for Linux cloud environment RUN pip install --no-cache-dir -r requirements.txt +RUN pip install --no-cache-dir triton + +# Copy application files +COPY --chown=user . . -COPY . . +# Expose the mandatory port required by Hugging Face Spaces +EXPOSE 7860 -CMD ["bash"] +# Launch the core FastAPI application on the expected HF Spaces port +CMD ["python", "-m", "uvicorn", "src.api.server:app", "--host", "0.0.0.0", "--port", "7860"] diff --git a/Include/Python.h b/Include/Python.h new file mode 100644 index 0000000000000000000000000000000000000000..0f75de6cb42f42948307c93f2392cb3c74b5a2da --- /dev/null +++ b/Include/Python.h @@ -0,0 +1,140 @@ +// Entry point of the Python C API. +// C extensions should only #include , and not include directly +// the other Python header files included by . + +#ifndef Py_PYTHON_H +#define Py_PYTHON_H + +// Since this is a "meta-include" file, "#ifdef __cplusplus / extern "C" {" +// is not needed. + + +// Include Python header files +#include "patchlevel.h" +#include "pyconfig.h" +#include "pymacconfig.h" + + +// Include standard header files +// When changing these files, remember to update Doc/extending/extending.rst. +#include // assert() +#include // uintptr_t +#include // INT_MAX +#include // HUGE_VAL +#include // va_list +#include // wchar_t +#ifdef HAVE_SYS_TYPES_H +# include // ssize_t +#endif + +// , , and headers are no longer used +// by Python, but kept for the backward compatibility of existing third party C +// extensions. They are not included by limited C API version 3.11 and newer. +// +// The and headers are not included by limited C API +// version 3.13 and newer. +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# include // errno +# include // FILE* +# include // getenv() +# include // memcpy() +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030d0000 +# include // tolower() +# ifndef MS_WINDOWS +# include // close() +# endif +#endif + +// gh-111506: The free-threaded build is not compatible with the limited API +// or the stable ABI. +#if defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) +# error "The limited API is not currently supported in the free-threaded build" +#endif + +#if defined(Py_GIL_DISABLED) && defined(_MSC_VER) +# include // __readgsqword() +#endif + +#if defined(Py_GIL_DISABLED) && defined(__MINGW32__) +# include // __readgsqword() +#endif + +// Include Python header files +#include "pyport.h" +#include "pymacro.h" +#include "pymath.h" +#include "pymem.h" +#include "pytypedefs.h" +#include "pybuffer.h" +#include "pystats.h" +#include "pyatomic.h" +#include "lock.h" +#include "object.h" +#include "objimpl.h" +#include "typeslots.h" +#include "pyhash.h" +#include "cpython/pydebug.h" +#include "bytearrayobject.h" +#include "bytesobject.h" +#include "unicodeobject.h" +#include "pyerrors.h" +#include "longobject.h" +#include "cpython/longintrepr.h" +#include "boolobject.h" +#include "floatobject.h" +#include "complexobject.h" +#include "rangeobject.h" +#include "memoryobject.h" +#include "tupleobject.h" +#include "listobject.h" +#include "dictobject.h" +#include "cpython/odictobject.h" +#include "enumobject.h" +#include "setobject.h" +#include "methodobject.h" +#include "moduleobject.h" +#include "monitoring.h" +#include "cpython/funcobject.h" +#include "cpython/classobject.h" +#include "fileobject.h" +#include "pycapsule.h" +#include "cpython/code.h" +#include "pyframe.h" +#include "traceback.h" +#include "sliceobject.h" +#include "cpython/cellobject.h" +#include "iterobject.h" +#include "cpython/initconfig.h" +#include "pystate.h" +#include "cpython/genobject.h" +#include "descrobject.h" +#include "genericaliasobject.h" +#include "warnings.h" +#include "weakrefobject.h" +#include "structseq.h" +#include "cpython/picklebufobject.h" +#include "cpython/pytime.h" +#include "codecs.h" +#include "pythread.h" +#include "cpython/context.h" +#include "modsupport.h" +#include "compile.h" +#include "pythonrun.h" +#include "pylifecycle.h" +#include "ceval.h" +#include "sysmodule.h" +#include "osmodule.h" +#include "intrcheck.h" +#include "import.h" +#include "abstract.h" +#include "bltinmodule.h" +#include "critical_section.h" +#include "cpython/pyctype.h" +#include "pystrtod.h" +#include "pystrcmp.h" +#include "fileutils.h" +#include "cpython/pyfpe.h" +#include "cpython/tracemalloc.h" + +#endif /* !Py_PYTHON_H */ diff --git a/Include/abstract.h b/Include/abstract.h new file mode 100644 index 0000000000000000000000000000000000000000..98e1bbe4100fc75ddc191a1045b782046ab247e6 --- /dev/null +++ b/Include/abstract.h @@ -0,0 +1,921 @@ +/* Abstract Object Interface (many thanks to Jim Fulton) */ + +#ifndef Py_ABSTRACTOBJECT_H +#define Py_ABSTRACTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* === Object Protocol ================================================== */ + +/* Implemented elsewhere: + + int PyObject_Print(PyObject *o, FILE *fp, int flags); + + Print an object 'o' on file 'fp'. Returns -1 on error. The flags argument + is used to enable certain printing options. The only option currently + supported is Py_PRINT_RAW. By default (flags=0), PyObject_Print() formats + the object by calling PyObject_Repr(). If flags equals to Py_PRINT_RAW, it + formats the object by calling PyObject_Str(). */ + + +/* Implemented elsewhere: + + int PyObject_HasAttrString(PyObject *o, const char *attr_name); + + Returns 1 if object 'o' has the attribute attr_name, and 0 otherwise. + + This is equivalent to the Python expression: hasattr(o,attr_name). + + This function always succeeds. */ + + +/* Implemented elsewhere: + + PyObject* PyObject_GetAttrString(PyObject *o, const char *attr_name); + + Retrieve an attributed named attr_name form object o. + Returns the attribute value on success, or NULL on failure. + + This is the equivalent of the Python expression: o.attr_name. */ + + +/* Implemented elsewhere: + + int PyObject_HasAttr(PyObject *o, PyObject *attr_name); + + Returns 1 if o has the attribute attr_name, and 0 otherwise. + + This is equivalent to the Python expression: hasattr(o,attr_name). + + This function always succeeds. */ + + +/* Implemented elsewhere: + + int PyObject_HasAttrStringWithError(PyObject *o, const char *attr_name); + + Returns 1 if object 'o' has the attribute attr_name, and 0 otherwise. + This is equivalent to the Python expression: hasattr(o,attr_name). + Returns -1 on failure. */ + + +/* Implemented elsewhere: + + int PyObject_HasAttrWithError(PyObject *o, PyObject *attr_name); + + Returns 1 if o has the attribute attr_name, and 0 otherwise. + This is equivalent to the Python expression: hasattr(o,attr_name). + Returns -1 on failure. */ + + +/* Implemented elsewhere: + + PyObject* PyObject_GetAttr(PyObject *o, PyObject *attr_name); + + Retrieve an attributed named 'attr_name' form object 'o'. + Returns the attribute value on success, or NULL on failure. + + This is the equivalent of the Python expression: o.attr_name. */ + + +/* Implemented elsewhere: + + int PyObject_GetOptionalAttr(PyObject *obj, PyObject *attr_name, PyObject **result); + + Variant of PyObject_GetAttr() which doesn't raise AttributeError + if the attribute is not found. + + If the attribute is found, return 1 and set *result to a new strong + reference to the attribute. + If the attribute is not found, return 0 and set *result to NULL; + the AttributeError is silenced. + If an error other than AttributeError is raised, return -1 and + set *result to NULL. +*/ + + +/* Implemented elsewhere: + + int PyObject_GetOptionalAttrString(PyObject *obj, const char *attr_name, PyObject **result); + + Variant of PyObject_GetAttrString() which doesn't raise AttributeError + if the attribute is not found. + + If the attribute is found, return 1 and set *result to a new strong + reference to the attribute. + If the attribute is not found, return 0 and set *result to NULL; + the AttributeError is silenced. + If an error other than AttributeError is raised, return -1 and + set *result to NULL. +*/ + + +/* Implemented elsewhere: + + int PyObject_SetAttrString(PyObject *o, const char *attr_name, PyObject *v); + + Set the value of the attribute named attr_name, for object 'o', + to the value 'v'. Raise an exception and return -1 on failure; return 0 on + success. + + This is the equivalent of the Python statement o.attr_name=v. */ + + +/* Implemented elsewhere: + + int PyObject_SetAttr(PyObject *o, PyObject *attr_name, PyObject *v); + + Set the value of the attribute named attr_name, for object 'o', to the value + 'v'. an exception and return -1 on failure; return 0 on success. + + This is the equivalent of the Python statement o.attr_name=v. */ + +/* Implemented elsewhere: + + int PyObject_DelAttrString(PyObject *o, const char *attr_name); + + Delete attribute named attr_name, for object o. Returns + -1 on failure. + + This is the equivalent of the Python statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttrString(O, A) PyObject_SetAttrString((O), (A), NULL) +#endif + + +/* Implemented elsewhere: + + int PyObject_DelAttr(PyObject *o, PyObject *attr_name); + + Delete attribute named attr_name, for object o. Returns -1 + on failure. This is the equivalent of the Python + statement: del o.attr_name. + + Implemented as a macro in the limited C API 3.12 and older. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030d0000 +# define PyObject_DelAttr(O, A) PyObject_SetAttr((O), (A), NULL) +#endif + + +/* Implemented elsewhere: + + PyObject *PyObject_Repr(PyObject *o); + + Compute the string representation of object 'o'. Returns the + string representation on success, NULL on failure. + + This is the equivalent of the Python expression: repr(o). + + Called by the repr() built-in function. */ + + +/* Implemented elsewhere: + + PyObject *PyObject_Str(PyObject *o); + + Compute the string representation of object, o. Returns the + string representation on success, NULL on failure. + + This is the equivalent of the Python expression: str(o). + + Called by the str() and print() built-in functions. */ + + +/* Declared elsewhere + + PyAPI_FUNC(int) PyCallable_Check(PyObject *o); + + Determine if the object, o, is callable. Return 1 if the object is callable + and 0 otherwise. + + This function always succeeds. */ + + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +/* Call a callable Python object without any arguments */ +PyAPI_FUNC(PyObject *) PyObject_CallNoArgs(PyObject *func); +#endif + + +/* Call a callable Python object 'callable' with arguments given by the + tuple 'args' and keywords arguments given by the dictionary 'kwargs'. + + 'args' must not be NULL, use an empty tuple if no arguments are + needed. If no named arguments are needed, 'kwargs' can be NULL. + + This is the equivalent of the Python expression: + callable(*args, **kwargs). */ +PyAPI_FUNC(PyObject *) PyObject_Call(PyObject *callable, + PyObject *args, PyObject *kwargs); + + +/* Call a callable Python object 'callable', with arguments given by the + tuple 'args'. If no arguments are needed, then 'args' can be NULL. + + Returns the result of the call on success, or NULL on failure. + + This is the equivalent of the Python expression: + callable(*args). */ +PyAPI_FUNC(PyObject *) PyObject_CallObject(PyObject *callable, + PyObject *args); + +/* Call a callable Python object, callable, with a variable number of C + arguments. The C arguments are described using a mkvalue-style format + string. + + The format may be NULL, indicating that no arguments are provided. + + Returns the result of the call on success, or NULL on failure. + + This is the equivalent of the Python expression: + callable(arg1, arg2, ...). */ +PyAPI_FUNC(PyObject *) PyObject_CallFunction(PyObject *callable, + const char *format, ...); + +/* Call the method named 'name' of object 'obj' with a variable number of + C arguments. The C arguments are described by a mkvalue format string. + + The format can be NULL, indicating that no arguments are provided. + + Returns the result of the call on success, or NULL on failure. + + This is the equivalent of the Python expression: + obj.name(arg1, arg2, ...). */ +PyAPI_FUNC(PyObject *) PyObject_CallMethod(PyObject *obj, + const char *name, + const char *format, ...); + +/* Call a callable Python object 'callable' with a variable number of C + arguments. The C arguments are provided as PyObject* values, terminated + by a NULL. + + Returns the result of the call on success, or NULL on failure. + + This is the equivalent of the Python expression: + callable(arg1, arg2, ...). */ +PyAPI_FUNC(PyObject *) PyObject_CallFunctionObjArgs(PyObject *callable, + ...); + +/* Call the method named 'name' of object 'obj' with a variable number of + C arguments. The C arguments are provided as PyObject* values, terminated + by NULL. + + Returns the result of the call on success, or NULL on failure. + + This is the equivalent of the Python expression: obj.name(*args). */ + +PyAPI_FUNC(PyObject *) PyObject_CallMethodObjArgs( + PyObject *obj, + PyObject *name, + ...); + +/* Given a vectorcall nargsf argument, return the actual number of arguments. + * (For use outside the limited API, this is re-defined as a static inline + * function in cpython/abstract.h) + */ +PyAPI_FUNC(Py_ssize_t) PyVectorcall_NARGS(size_t nargsf); + +/* Call "callable" (which must support vectorcall) with positional arguments + "tuple" and keyword arguments "dict". "dict" may also be NULL */ +PyAPI_FUNC(PyObject *) PyVectorcall_Call(PyObject *callable, PyObject *tuple, PyObject *dict); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030C0000 +#define PY_VECTORCALL_ARGUMENTS_OFFSET \ + (_Py_STATIC_CAST(size_t, 1) << (8 * sizeof(size_t) - 1)) + +/* Perform a PEP 590-style vector call on 'callable' */ +PyAPI_FUNC(PyObject *) PyObject_Vectorcall( + PyObject *callable, + PyObject *const *args, + size_t nargsf, + PyObject *kwnames); + +/* Call the method 'name' on args[0] with arguments in args[1..nargsf-1]. */ +PyAPI_FUNC(PyObject *) PyObject_VectorcallMethod( + PyObject *name, PyObject *const *args, + size_t nargsf, PyObject *kwnames); +#endif + +/* Implemented elsewhere: + + Py_hash_t PyObject_Hash(PyObject *o); + + Compute and return the hash, hash_value, of an object, o. On + failure, return -1. + + This is the equivalent of the Python expression: hash(o). */ + + +/* Implemented elsewhere: + + int PyObject_IsTrue(PyObject *o); + + Returns 1 if the object, o, is considered to be true, 0 if o is + considered to be false and -1 on failure. + + This is equivalent to the Python expression: not not o. */ + + +/* Implemented elsewhere: + + int PyObject_Not(PyObject *o); + + Returns 0 if the object, o, is considered to be true, 1 if o is + considered to be false and -1 on failure. + + This is equivalent to the Python expression: not o. */ + + +/* Get the type of an object. + + On success, returns a type object corresponding to the object type of object + 'o'. On failure, returns NULL. + + This is equivalent to the Python expression: type(o) */ +PyAPI_FUNC(PyObject *) PyObject_Type(PyObject *o); + + +/* Return the size of object 'o'. If the object 'o' provides both sequence and + mapping protocols, the sequence size is returned. + + On error, -1 is returned. + + This is the equivalent to the Python expression: len(o) */ +PyAPI_FUNC(Py_ssize_t) PyObject_Size(PyObject *o); + + +/* For DLL compatibility */ +#undef PyObject_Length +PyAPI_FUNC(Py_ssize_t) PyObject_Length(PyObject *o); +#define PyObject_Length PyObject_Size + +/* Return element of 'o' corresponding to the object 'key'. Return NULL + on failure. + + This is the equivalent of the Python expression: o[key] */ +PyAPI_FUNC(PyObject *) PyObject_GetItem(PyObject *o, PyObject *key); + + +/* Map the object 'key' to the value 'v' into 'o'. + + Raise an exception and return -1 on failure; return 0 on success. + + This is the equivalent of the Python statement: o[key]=v. */ +PyAPI_FUNC(int) PyObject_SetItem(PyObject *o, PyObject *key, PyObject *v); + +/* Remove the mapping for the string 'key' from the object 'o'. + Returns -1 on failure. + + This is equivalent to the Python statement: del o[key]. */ +PyAPI_FUNC(int) PyObject_DelItemString(PyObject *o, const char *key); + +/* Delete the mapping for the object 'key' from the object 'o'. + Returns -1 on failure. + + This is the equivalent of the Python statement: del o[key]. */ +PyAPI_FUNC(int) PyObject_DelItem(PyObject *o, PyObject *key); + + +/* Takes an arbitrary object and returns the result of calling + obj.__format__(format_spec). */ +PyAPI_FUNC(PyObject *) PyObject_Format(PyObject *obj, + PyObject *format_spec); + + +/* ==== Iterators ================================================ */ + +/* Takes an object and returns an iterator for it. + This is typically a new iterator but if the argument is an iterator, this + returns itself. */ +PyAPI_FUNC(PyObject *) PyObject_GetIter(PyObject *); + +/* Takes an AsyncIterable object and returns an AsyncIterator for it. + This is typically a new iterator but if the argument is an AsyncIterator, + this returns itself. */ +PyAPI_FUNC(PyObject *) PyObject_GetAIter(PyObject *); + +/* Returns non-zero if the object 'obj' provides iterator protocols, and 0 otherwise. + + This function always succeeds. */ +PyAPI_FUNC(int) PyIter_Check(PyObject *); + +/* Returns non-zero if the object 'obj' provides AsyncIterator protocols, and 0 otherwise. + + This function always succeeds. */ +PyAPI_FUNC(int) PyAIter_Check(PyObject *); + +/* Takes an iterator object and calls its tp_iternext slot, + returning the next value. + + If the iterator is exhausted, this returns NULL without setting an + exception. + + NULL with an exception means an error occurred. */ +PyAPI_FUNC(PyObject *) PyIter_Next(PyObject *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 + +/* Takes generator, coroutine or iterator object and sends the value into it. + Returns: + - PYGEN_RETURN (0) if generator has returned. + 'result' parameter is filled with return value + - PYGEN_ERROR (-1) if exception was raised. + 'result' parameter is NULL + - PYGEN_NEXT (1) if generator has yielded. + 'result' parameter is filled with yielded value. */ +PyAPI_FUNC(PySendResult) PyIter_Send(PyObject *, PyObject *, PyObject **); +#endif + + +/* === Number Protocol ================================================== */ + +/* Returns 1 if the object 'o' provides numeric protocols, and 0 otherwise. + + This function always succeeds. */ +PyAPI_FUNC(int) PyNumber_Check(PyObject *o); + +/* Returns the result of adding o1 and o2, or NULL on failure. + + This is the equivalent of the Python expression: o1 + o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Add(PyObject *o1, PyObject *o2); + +/* Returns the result of subtracting o2 from o1, or NULL on failure. + + This is the equivalent of the Python expression: o1 - o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Subtract(PyObject *o1, PyObject *o2); + +/* Returns the result of multiplying o1 and o2, or NULL on failure. + + This is the equivalent of the Python expression: o1 * o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Multiply(PyObject *o1, PyObject *o2); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* This is the equivalent of the Python expression: o1 @ o2. */ +PyAPI_FUNC(PyObject *) PyNumber_MatrixMultiply(PyObject *o1, PyObject *o2); +#endif + +/* Returns the result of dividing o1 by o2 giving an integral result, + or NULL on failure. + + This is the equivalent of the Python expression: o1 // o2. */ +PyAPI_FUNC(PyObject *) PyNumber_FloorDivide(PyObject *o1, PyObject *o2); + +/* Returns the result of dividing o1 by o2 giving a float result, or NULL on + failure. + + This is the equivalent of the Python expression: o1 / o2. */ +PyAPI_FUNC(PyObject *) PyNumber_TrueDivide(PyObject *o1, PyObject *o2); + +/* Returns the remainder of dividing o1 by o2, or NULL on failure. + + This is the equivalent of the Python expression: o1 % o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Remainder(PyObject *o1, PyObject *o2); + +/* See the built-in function divmod. + + Returns NULL on failure. + + This is the equivalent of the Python expression: divmod(o1, o2). */ +PyAPI_FUNC(PyObject *) PyNumber_Divmod(PyObject *o1, PyObject *o2); + +/* See the built-in function pow. Returns NULL on failure. + + This is the equivalent of the Python expression: pow(o1, o2, o3), + where o3 is optional. */ +PyAPI_FUNC(PyObject *) PyNumber_Power(PyObject *o1, PyObject *o2, + PyObject *o3); + +/* Returns the negation of o on success, or NULL on failure. + + This is the equivalent of the Python expression: -o. */ +PyAPI_FUNC(PyObject *) PyNumber_Negative(PyObject *o); + +/* Returns the positive of o on success, or NULL on failure. + + This is the equivalent of the Python expression: +o. */ +PyAPI_FUNC(PyObject *) PyNumber_Positive(PyObject *o); + +/* Returns the absolute value of 'o', or NULL on failure. + + This is the equivalent of the Python expression: abs(o). */ +PyAPI_FUNC(PyObject *) PyNumber_Absolute(PyObject *o); + +/* Returns the bitwise negation of 'o' on success, or NULL on failure. + + This is the equivalent of the Python expression: ~o. */ +PyAPI_FUNC(PyObject *) PyNumber_Invert(PyObject *o); + +/* Returns the result of left shifting o1 by o2 on success, or NULL on failure. + + This is the equivalent of the Python expression: o1 << o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Lshift(PyObject *o1, PyObject *o2); + +/* Returns the result of right shifting o1 by o2 on success, or NULL on + failure. + + This is the equivalent of the Python expression: o1 >> o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Rshift(PyObject *o1, PyObject *o2); + +/* Returns the result of bitwise and of o1 and o2 on success, or NULL on + failure. + + This is the equivalent of the Python expression: o1 & o2. */ +PyAPI_FUNC(PyObject *) PyNumber_And(PyObject *o1, PyObject *o2); + +/* Returns the bitwise exclusive or of o1 by o2 on success, or NULL on failure. + + This is the equivalent of the Python expression: o1 ^ o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Xor(PyObject *o1, PyObject *o2); + +/* Returns the result of bitwise or on o1 and o2 on success, or NULL on + failure. + + This is the equivalent of the Python expression: o1 | o2. */ +PyAPI_FUNC(PyObject *) PyNumber_Or(PyObject *o1, PyObject *o2); + +/* Returns 1 if obj is an index integer (has the nb_index slot of the + tp_as_number structure filled in), and 0 otherwise. */ +PyAPI_FUNC(int) PyIndex_Check(PyObject *); + +/* Returns the object 'o' converted to a Python int, or NULL with an exception + raised on failure. */ +PyAPI_FUNC(PyObject *) PyNumber_Index(PyObject *o); + +/* Returns the object 'o' converted to Py_ssize_t by going through + PyNumber_Index() first. + + If an overflow error occurs while converting the int to Py_ssize_t, then the + second argument 'exc' is the error-type to return. If it is NULL, then the + overflow error is cleared and the value is clipped. */ +PyAPI_FUNC(Py_ssize_t) PyNumber_AsSsize_t(PyObject *o, PyObject *exc); + +/* Returns the object 'o' converted to an integer object on success, or NULL + on failure. + + This is the equivalent of the Python expression: int(o). */ +PyAPI_FUNC(PyObject *) PyNumber_Long(PyObject *o); + +/* Returns the object 'o' converted to a float object on success, or NULL + on failure. + + This is the equivalent of the Python expression: float(o). */ +PyAPI_FUNC(PyObject *) PyNumber_Float(PyObject *o); + + +/* --- In-place variants of (some of) the above number protocol functions -- */ + +/* Returns the result of adding o2 to o1, possibly in-place, or NULL + on failure. + + This is the equivalent of the Python expression: o1 += o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceAdd(PyObject *o1, PyObject *o2); + +/* Returns the result of subtracting o2 from o1, possibly in-place or + NULL on failure. + + This is the equivalent of the Python expression: o1 -= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceSubtract(PyObject *o1, PyObject *o2); + +/* Returns the result of multiplying o1 by o2, possibly in-place, or NULL on + failure. + + This is the equivalent of the Python expression: o1 *= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceMultiply(PyObject *o1, PyObject *o2); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* This is the equivalent of the Python expression: o1 @= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceMatrixMultiply(PyObject *o1, PyObject *o2); +#endif + +/* Returns the result of dividing o1 by o2 giving an integral result, possibly + in-place, or NULL on failure. + + This is the equivalent of the Python expression: o1 /= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceFloorDivide(PyObject *o1, + PyObject *o2); + +/* Returns the result of dividing o1 by o2 giving a float result, possibly + in-place, or null on failure. + + This is the equivalent of the Python expression: o1 /= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceTrueDivide(PyObject *o1, + PyObject *o2); + +/* Returns the remainder of dividing o1 by o2, possibly in-place, or NULL on + failure. + + This is the equivalent of the Python expression: o1 %= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceRemainder(PyObject *o1, PyObject *o2); + +/* Returns the result of raising o1 to the power of o2, possibly in-place, + or NULL on failure. + + This is the equivalent of the Python expression: o1 **= o2, + or o1 = pow(o1, o2, o3) if o3 is present. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlacePower(PyObject *o1, PyObject *o2, + PyObject *o3); + +/* Returns the result of left shifting o1 by o2, possibly in-place, or NULL + on failure. + + This is the equivalent of the Python expression: o1 <<= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceLshift(PyObject *o1, PyObject *o2); + +/* Returns the result of right shifting o1 by o2, possibly in-place or NULL + on failure. + + This is the equivalent of the Python expression: o1 >>= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceRshift(PyObject *o1, PyObject *o2); + +/* Returns the result of bitwise and of o1 and o2, possibly in-place, or NULL + on failure. + + This is the equivalent of the Python expression: o1 &= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceAnd(PyObject *o1, PyObject *o2); + +/* Returns the bitwise exclusive or of o1 by o2, possibly in-place, or NULL + on failure. + + This is the equivalent of the Python expression: o1 ^= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceXor(PyObject *o1, PyObject *o2); + +/* Returns the result of bitwise or of o1 and o2, possibly in-place, + or NULL on failure. + + This is the equivalent of the Python expression: o1 |= o2. */ +PyAPI_FUNC(PyObject *) PyNumber_InPlaceOr(PyObject *o1, PyObject *o2); + +/* Returns the integer n converted to a string with a base, with a base + marker of 0b, 0o or 0x prefixed if applicable. + + If n is not an int object, it is converted with PyNumber_Index first. */ +PyAPI_FUNC(PyObject *) PyNumber_ToBase(PyObject *n, int base); + + +/* === Sequence protocol ================================================ */ + +/* Return 1 if the object provides sequence protocol, and zero + otherwise. + + This function always succeeds. */ +PyAPI_FUNC(int) PySequence_Check(PyObject *o); + +/* Return the size of sequence object o, or -1 on failure. */ +PyAPI_FUNC(Py_ssize_t) PySequence_Size(PyObject *o); + +/* For DLL compatibility */ +#undef PySequence_Length +PyAPI_FUNC(Py_ssize_t) PySequence_Length(PyObject *o); +#define PySequence_Length PySequence_Size + + +/* Return the concatenation of o1 and o2 on success, and NULL on failure. + + This is the equivalent of the Python expression: o1 + o2. */ +PyAPI_FUNC(PyObject *) PySequence_Concat(PyObject *o1, PyObject *o2); + +/* Return the result of repeating sequence object 'o' 'count' times, + or NULL on failure. + + This is the equivalent of the Python expression: o * count. */ +PyAPI_FUNC(PyObject *) PySequence_Repeat(PyObject *o, Py_ssize_t count); + +/* Return the ith element of o, or NULL on failure. + + This is the equivalent of the Python expression: o[i]. */ +PyAPI_FUNC(PyObject *) PySequence_GetItem(PyObject *o, Py_ssize_t i); + +/* Return the slice of sequence object o between i1 and i2, or NULL on failure. + + This is the equivalent of the Python expression: o[i1:i2]. */ +PyAPI_FUNC(PyObject *) PySequence_GetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2); + +/* Assign object 'v' to the ith element of the sequence 'o'. Raise an exception + and return -1 on failure; return 0 on success. + + This is the equivalent of the Python statement o[i] = v. */ +PyAPI_FUNC(int) PySequence_SetItem(PyObject *o, Py_ssize_t i, PyObject *v); + +/* Delete the 'i'-th element of the sequence 'v'. Returns -1 on failure. + + This is the equivalent of the Python statement: del o[i]. */ +PyAPI_FUNC(int) PySequence_DelItem(PyObject *o, Py_ssize_t i); + +/* Assign the sequence object 'v' to the slice in sequence object 'o', + from 'i1' to 'i2'. Returns -1 on failure. + + This is the equivalent of the Python statement: o[i1:i2] = v. */ +PyAPI_FUNC(int) PySequence_SetSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2, + PyObject *v); + +/* Delete the slice in sequence object 'o' from 'i1' to 'i2'. + Returns -1 on failure. + + This is the equivalent of the Python statement: del o[i1:i2]. */ +PyAPI_FUNC(int) PySequence_DelSlice(PyObject *o, Py_ssize_t i1, Py_ssize_t i2); + +/* Returns the sequence 'o' as a tuple on success, and NULL on failure. + + This is equivalent to the Python expression: tuple(o). */ +PyAPI_FUNC(PyObject *) PySequence_Tuple(PyObject *o); + +/* Returns the sequence 'o' as a list on success, and NULL on failure. + This is equivalent to the Python expression: list(o) */ +PyAPI_FUNC(PyObject *) PySequence_List(PyObject *o); + +/* Return the sequence 'o' as a list, unless it's already a tuple or list. + + Use PySequence_Fast_GET_ITEM to access the members of this list, and + PySequence_Fast_GET_SIZE to get its length. + + Returns NULL on failure. If the object does not support iteration, raises a + TypeError exception with 'm' as the message text. */ +PyAPI_FUNC(PyObject *) PySequence_Fast(PyObject *o, const char* m); + +/* Return the size of the sequence 'o', assuming that 'o' was returned by + PySequence_Fast and is not NULL. */ +#define PySequence_Fast_GET_SIZE(o) \ + (PyList_Check(o) ? PyList_GET_SIZE(o) : PyTuple_GET_SIZE(o)) + +/* Return the 'i'-th element of the sequence 'o', assuming that o was returned + by PySequence_Fast, and that i is within bounds. */ +#define PySequence_Fast_GET_ITEM(o, i)\ + (PyList_Check(o) ? PyList_GET_ITEM((o), (i)) : PyTuple_GET_ITEM((o), (i))) + +/* Return a pointer to the underlying item array for + an object returned by PySequence_Fast */ +#define PySequence_Fast_ITEMS(sf) \ + (PyList_Check(sf) ? ((PyListObject *)(sf))->ob_item \ + : ((PyTupleObject *)(sf))->ob_item) + +/* Return the number of occurrences on value on 'o', that is, return + the number of keys for which o[key] == value. + + On failure, return -1. This is equivalent to the Python expression: + o.count(value). */ +PyAPI_FUNC(Py_ssize_t) PySequence_Count(PyObject *o, PyObject *value); + +/* Return 1 if 'ob' is in the sequence 'seq'; 0 if 'ob' is not in the sequence + 'seq'; -1 on error. + + Use __contains__ if possible, else _PySequence_IterSearch(). */ +PyAPI_FUNC(int) PySequence_Contains(PyObject *seq, PyObject *ob); + +/* For DLL-level backwards compatibility */ +#undef PySequence_In +/* Determine if the sequence 'o' contains 'value'. If an item in 'o' is equal + to 'value', return 1, otherwise return 0. On error, return -1. + + This is equivalent to the Python expression: value in o. */ +PyAPI_FUNC(int) PySequence_In(PyObject *o, PyObject *value); + +/* For source-level backwards compatibility */ +#define PySequence_In PySequence_Contains + + +/* Return the first index for which o[i] == value. + On error, return -1. + + This is equivalent to the Python expression: o.index(value). */ +PyAPI_FUNC(Py_ssize_t) PySequence_Index(PyObject *o, PyObject *value); + + +/* --- In-place versions of some of the above Sequence functions --- */ + +/* Append sequence 'o2' to sequence 'o1', in-place when possible. Return the + resulting object, which could be 'o1', or NULL on failure. + + This is the equivalent of the Python expression: o1 += o2. */ +PyAPI_FUNC(PyObject *) PySequence_InPlaceConcat(PyObject *o1, PyObject *o2); + +/* Repeat sequence 'o' by 'count', in-place when possible. Return the resulting + object, which could be 'o', or NULL on failure. + + This is the equivalent of the Python expression: o1 *= count. */ +PyAPI_FUNC(PyObject *) PySequence_InPlaceRepeat(PyObject *o, Py_ssize_t count); + + +/* === Mapping protocol ================================================= */ + +/* Return 1 if the object provides mapping protocol, and 0 otherwise. + + This function always succeeds. */ +PyAPI_FUNC(int) PyMapping_Check(PyObject *o); + +/* Returns the number of keys in mapping object 'o' on success, and -1 on + failure. This is equivalent to the Python expression: len(o). */ +PyAPI_FUNC(Py_ssize_t) PyMapping_Size(PyObject *o); + +/* For DLL compatibility */ +#undef PyMapping_Length +PyAPI_FUNC(Py_ssize_t) PyMapping_Length(PyObject *o); +#define PyMapping_Length PyMapping_Size + + +/* Implemented as a macro: + + int PyMapping_DelItemString(PyObject *o, const char *key); + + Remove the mapping for the string 'key' from the mapping 'o'. Returns -1 on + failure. + + This is equivalent to the Python statement: del o[key]. */ +#define PyMapping_DelItemString(O, K) PyObject_DelItemString((O), (K)) + +/* Implemented as a macro: + + int PyMapping_DelItem(PyObject *o, PyObject *key); + + Remove the mapping for the object 'key' from the mapping object 'o'. + Returns -1 on failure. + + This is equivalent to the Python statement: del o[key]. */ +#define PyMapping_DelItem(O, K) PyObject_DelItem((O), (K)) + +/* On success, return 1 if the mapping object 'o' has the key 'key', + and 0 otherwise. + + This is equivalent to the Python expression: key in o. + + This function always succeeds. */ +PyAPI_FUNC(int) PyMapping_HasKeyString(PyObject *o, const char *key); + +/* Return 1 if the mapping object has the key 'key', and 0 otherwise. + + This is equivalent to the Python expression: key in o. + + This function always succeeds. */ +PyAPI_FUNC(int) PyMapping_HasKey(PyObject *o, PyObject *key); + +/* Return 1 if the mapping object has the key 'key', and 0 otherwise. + This is equivalent to the Python expression: key in o. + On failure, return -1. */ + +PyAPI_FUNC(int) PyMapping_HasKeyWithError(PyObject *o, PyObject *key); + +/* Return 1 if the mapping object has the key 'key', and 0 otherwise. + This is equivalent to the Python expression: key in o. + On failure, return -1. */ + +PyAPI_FUNC(int) PyMapping_HasKeyStringWithError(PyObject *o, const char *key); + +/* On success, return a list or tuple of the keys in mapping object 'o'. + On failure, return NULL. */ +PyAPI_FUNC(PyObject *) PyMapping_Keys(PyObject *o); + +/* On success, return a list or tuple of the values in mapping object 'o'. + On failure, return NULL. */ +PyAPI_FUNC(PyObject *) PyMapping_Values(PyObject *o); + +/* On success, return a list or tuple of the items in mapping object 'o', + where each item is a tuple containing a key-value pair. On failure, return + NULL. */ +PyAPI_FUNC(PyObject *) PyMapping_Items(PyObject *o); + +/* Return element of 'o' corresponding to the string 'key' or NULL on failure. + + This is the equivalent of the Python expression: o[key]. */ +PyAPI_FUNC(PyObject *) PyMapping_GetItemString(PyObject *o, + const char *key); + +/* Variants of PyObject_GetItem() and PyMapping_GetItemString() which don't + raise KeyError if the key is not found. + + If the key is found, return 1 and set *result to a new strong + reference to the corresponding value. + If the key is not found, return 0 and set *result to NULL; + the KeyError is silenced. + If an error other than KeyError is raised, return -1 and + set *result to NULL. +*/ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(int) PyMapping_GetOptionalItem(PyObject *, PyObject *, PyObject **); +PyAPI_FUNC(int) PyMapping_GetOptionalItemString(PyObject *, const char *, PyObject **); +#endif + +/* Map the string 'key' to the value 'v' in the mapping 'o'. + Returns -1 on failure. + + This is the equivalent of the Python statement: o[key]=v. */ +PyAPI_FUNC(int) PyMapping_SetItemString(PyObject *o, const char *key, + PyObject *value); + +/* isinstance(object, typeorclass) */ +PyAPI_FUNC(int) PyObject_IsInstance(PyObject *object, PyObject *typeorclass); + +/* issubclass(object, typeorclass) */ +PyAPI_FUNC(int) PyObject_IsSubclass(PyObject *object, PyObject *typeorclass); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_ABSTRACTOBJECT_H +# include "cpython/abstract.h" +# undef Py_CPYTHON_ABSTRACTOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* Py_ABSTRACTOBJECT_H */ diff --git a/Include/bltinmodule.h b/Include/bltinmodule.h new file mode 100644 index 0000000000000000000000000000000000000000..868c9e6443bfc1d1d48fb0806af1bf21490fc44c --- /dev/null +++ b/Include/bltinmodule.h @@ -0,0 +1,14 @@ +#ifndef Py_BLTINMODULE_H +#define Py_BLTINMODULE_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyFilter_Type; +PyAPI_DATA(PyTypeObject) PyMap_Type; +PyAPI_DATA(PyTypeObject) PyZip_Type; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BLTINMODULE_H */ diff --git a/Include/boolobject.h b/Include/boolobject.h new file mode 100644 index 0000000000000000000000000000000000000000..b56e2baecaa36c54c11ea90ac50ffe9921b8b5e0 --- /dev/null +++ b/Include/boolobject.h @@ -0,0 +1,54 @@ +/* Boolean object interface */ + +#ifndef Py_BOOLOBJECT_H +#define Py_BOOLOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +// PyBool_Type is declared by object.h + +#define PyBool_Check(x) Py_IS_TYPE((x), &PyBool_Type) + +/* Py_False and Py_True are the only two bools in existence. */ + +/* Don't use these directly */ +PyAPI_DATA(PyLongObject) _Py_FalseStruct; +PyAPI_DATA(PyLongObject) _Py_TrueStruct; + +/* Use these macros */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_False Py_GetConstantBorrowed(Py_CONSTANT_FALSE) +# define Py_True Py_GetConstantBorrowed(Py_CONSTANT_TRUE) +#else +# define Py_False _PyObject_CAST(&_Py_FalseStruct) +# define Py_True _PyObject_CAST(&_Py_TrueStruct) +#endif + +// Test if an object is the True singleton, the same as "x is True" in Python. +PyAPI_FUNC(int) Py_IsTrue(PyObject *x); +#define Py_IsTrue(x) Py_Is((x), Py_True) + +// Test if an object is the False singleton, the same as "x is False" in Python. +PyAPI_FUNC(int) Py_IsFalse(PyObject *x); +#define Py_IsFalse(x) Py_Is((x), Py_False) + +/* Macros for returning Py_True or Py_False, respectively. + * Only treat Py_True and Py_False as immortal in the limited C API 3.12 + * and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_TRUE return Py_NewRef(Py_True) +# define Py_RETURN_FALSE return Py_NewRef(Py_False) +#else +# define Py_RETURN_TRUE return Py_True +# define Py_RETURN_FALSE return Py_False +#endif + +/* Function to return a bool from a C long */ +PyAPI_FUNC(PyObject *) PyBool_FromLong(long); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BOOLOBJECT_H */ diff --git a/Include/bytearrayobject.h b/Include/bytearrayobject.h new file mode 100644 index 0000000000000000000000000000000000000000..3d53fdba6432672077aa97c2db0a58fc9cbc8414 --- /dev/null +++ b/Include/bytearrayobject.h @@ -0,0 +1,44 @@ +/* ByteArray object interface */ + +#ifndef Py_BYTEARRAYOBJECT_H +#define Py_BYTEARRAYOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Type PyByteArrayObject represents a mutable array of bytes. + * The Python API is that of a sequence; + * the bytes are mapped to ints in [0, 256). + * Bytes are not characters; they may be used to encode characters. + * The only way to go between bytes and str/unicode is via encoding + * and decoding. + * For the convenience of C programmers, the bytes type is considered + * to contain a char pointer, not an unsigned char pointer. + */ + +/* Type object */ +PyAPI_DATA(PyTypeObject) PyByteArray_Type; +PyAPI_DATA(PyTypeObject) PyByteArrayIter_Type; + +/* Type check macros */ +#define PyByteArray_Check(self) PyObject_TypeCheck((self), &PyByteArray_Type) +#define PyByteArray_CheckExact(self) Py_IS_TYPE((self), &PyByteArray_Type) + +/* Direct API functions */ +PyAPI_FUNC(PyObject *) PyByteArray_FromObject(PyObject *); +PyAPI_FUNC(PyObject *) PyByteArray_Concat(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyByteArray_FromStringAndSize(const char *, Py_ssize_t); +PyAPI_FUNC(Py_ssize_t) PyByteArray_Size(PyObject *); +PyAPI_FUNC(char *) PyByteArray_AsString(PyObject *); +PyAPI_FUNC(int) PyByteArray_Resize(PyObject *, Py_ssize_t); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_BYTEARRAYOBJECT_H +# include "cpython/bytearrayobject.h" +# undef Py_CPYTHON_BYTEARRAYOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BYTEARRAYOBJECT_H */ diff --git a/Include/bytesobject.h b/Include/bytesobject.h new file mode 100644 index 0000000000000000000000000000000000000000..c5a24195be6bc3771c3b5b65e85106d516b029e1 --- /dev/null +++ b/Include/bytesobject.h @@ -0,0 +1,66 @@ +// Bytes object interface + +#ifndef Py_BYTESOBJECT_H +#define Py_BYTESOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* +Type PyBytesObject represents a byte string. An extra zero byte is +reserved at the end to ensure it is zero-terminated, but a size is +present so strings with null bytes in them can be represented. This +is an immutable object type. + +There are functions to create new bytes objects, to test +an object for bytes-ness, and to get the +byte string value. The latter function returns a null pointer +if the object is not of the proper type. +There is a variant that takes an explicit size as well as a +variant that assumes a zero-terminated string. Note that none of the +functions should be applied to NULL pointer. +*/ + +PyAPI_DATA(PyTypeObject) PyBytes_Type; +PyAPI_DATA(PyTypeObject) PyBytesIter_Type; + +#define PyBytes_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_BYTES_SUBCLASS) +#define PyBytes_CheckExact(op) Py_IS_TYPE((op), &PyBytes_Type) + +PyAPI_FUNC(PyObject *) PyBytes_FromStringAndSize(const char *, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyBytes_FromString(const char *); +PyAPI_FUNC(PyObject *) PyBytes_FromObject(PyObject *); +PyAPI_FUNC(PyObject *) PyBytes_FromFormatV(const char*, va_list) + Py_GCC_ATTRIBUTE((format(printf, 1, 0))); +PyAPI_FUNC(PyObject *) PyBytes_FromFormat(const char*, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); +PyAPI_FUNC(Py_ssize_t) PyBytes_Size(PyObject *); +PyAPI_FUNC(char *) PyBytes_AsString(PyObject *); +PyAPI_FUNC(PyObject *) PyBytes_Repr(PyObject *, int); +PyAPI_FUNC(void) PyBytes_Concat(PyObject **, PyObject *); +PyAPI_FUNC(void) PyBytes_ConcatAndDel(PyObject **, PyObject *); +PyAPI_FUNC(PyObject *) PyBytes_DecodeEscape(const char *, Py_ssize_t, + const char *, Py_ssize_t, + const char *); + +/* Provides access to the internal data buffer and size of a bytes object. + Passing NULL as len parameter will force the string buffer to be + 0-terminated (passing a string with embedded NUL characters will + cause an exception). */ +PyAPI_FUNC(int) PyBytes_AsStringAndSize( + PyObject *obj, /* bytes object */ + char **s, /* pointer to buffer variable */ + Py_ssize_t *len /* pointer to length variable or NULL */ + ); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_BYTESOBJECT_H +# include "cpython/bytesobject.h" +# undef Py_CPYTHON_BYTESOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_BYTESOBJECT_H */ diff --git a/Include/ceval.h b/Include/ceval.h new file mode 100644 index 0000000000000000000000000000000000000000..1ec746c3708220edddb42e00a55d3a04dbd0b465 --- /dev/null +++ b/Include/ceval.h @@ -0,0 +1,145 @@ +/* Interface to random parts in ceval.c */ + +#ifndef Py_CEVAL_H +#define Py_CEVAL_H +#ifdef __cplusplus +extern "C" { +#endif + + +PyAPI_FUNC(PyObject *) PyEval_EvalCode(PyObject *, PyObject *, PyObject *); + +PyAPI_FUNC(PyObject *) PyEval_EvalCodeEx(PyObject *co, + PyObject *globals, + PyObject *locals, + PyObject *const *args, int argc, + PyObject *const *kwds, int kwdc, + PyObject *const *defs, int defc, + PyObject *kwdefs, PyObject *closure); + +PyAPI_FUNC(PyObject *) PyEval_GetBuiltins(void); +PyAPI_FUNC(PyObject *) PyEval_GetGlobals(void); +PyAPI_FUNC(PyObject *) PyEval_GetLocals(void); +PyAPI_FUNC(PyFrameObject *) PyEval_GetFrame(void); + +PyAPI_FUNC(PyObject *) PyEval_GetFrameBuiltins(void); +PyAPI_FUNC(PyObject *) PyEval_GetFrameGlobals(void); +PyAPI_FUNC(PyObject *) PyEval_GetFrameLocals(void); + +PyAPI_FUNC(int) Py_AddPendingCall(int (*func)(void *), void *arg); +PyAPI_FUNC(int) Py_MakePendingCalls(void); + +/* Protection against deeply nested recursive calls + + In Python 3.0, this protection has two levels: + * normal anti-recursion protection is triggered when the recursion level + exceeds the current recursion limit. It raises a RecursionError, and sets + the "overflowed" flag in the thread state structure. This flag + temporarily *disables* the normal protection; this allows cleanup code + to potentially outgrow the recursion limit while processing the + RecursionError. + * "last chance" anti-recursion protection is triggered when the recursion + level exceeds "current recursion limit + 50". By construction, this + protection can only be triggered when the "overflowed" flag is set. It + means the cleanup code has itself gone into an infinite loop, or the + RecursionError has been mistakingly ignored. When this protection is + triggered, the interpreter aborts with a Fatal Error. + + In addition, the "overflowed" flag is automatically reset when the + recursion level drops below "current recursion limit - 50". This heuristic + is meant to ensure that the normal anti-recursion protection doesn't get + disabled too long. + + Please note: this scheme has its own limitations. See: + http://mail.python.org/pipermail/python-dev/2008-August/082106.html + for some observations. +*/ +PyAPI_FUNC(void) Py_SetRecursionLimit(int); +PyAPI_FUNC(int) Py_GetRecursionLimit(void); + +PyAPI_FUNC(int) Py_EnterRecursiveCall(const char *where); +PyAPI_FUNC(void) Py_LeaveRecursiveCall(void); + +PyAPI_FUNC(const char *) PyEval_GetFuncName(PyObject *); +PyAPI_FUNC(const char *) PyEval_GetFuncDesc(PyObject *); + +PyAPI_FUNC(PyObject *) PyEval_EvalFrame(PyFrameObject *); +PyAPI_FUNC(PyObject *) PyEval_EvalFrameEx(PyFrameObject *f, int exc); + +/* Interface for threads. + + A module that plans to do a blocking system call (or something else + that lasts a long time and doesn't touch Python data) can allow other + threads to run as follows: + + ...preparations here... + Py_BEGIN_ALLOW_THREADS + ...blocking system call here... + Py_END_ALLOW_THREADS + ...interpret result here... + + The Py_BEGIN_ALLOW_THREADS/Py_END_ALLOW_THREADS pair expands to a + {}-surrounded block. + To leave the block in the middle (e.g., with return), you must insert + a line containing Py_BLOCK_THREADS before the return, e.g. + + if (...premature_exit...) { + Py_BLOCK_THREADS + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + + An alternative is: + + Py_BLOCK_THREADS + if (...premature_exit...) { + PyErr_SetFromErrno(PyExc_OSError); + return NULL; + } + Py_UNBLOCK_THREADS + + For convenience, that the value of 'errno' is restored across + Py_END_ALLOW_THREADS and Py_BLOCK_THREADS. + + WARNING: NEVER NEST CALLS TO Py_BEGIN_ALLOW_THREADS AND + Py_END_ALLOW_THREADS!!! + + Note that not yet all candidates have been converted to use this + mechanism! +*/ + +PyAPI_FUNC(PyThreadState *) PyEval_SaveThread(void); +PyAPI_FUNC(void) PyEval_RestoreThread(PyThreadState *); + +Py_DEPRECATED(3.9) PyAPI_FUNC(void) PyEval_InitThreads(void); + +PyAPI_FUNC(void) PyEval_AcquireThread(PyThreadState *tstate); +PyAPI_FUNC(void) PyEval_ReleaseThread(PyThreadState *tstate); + +#define Py_BEGIN_ALLOW_THREADS { \ + PyThreadState *_save; \ + _save = PyEval_SaveThread(); +#define Py_BLOCK_THREADS PyEval_RestoreThread(_save); +#define Py_UNBLOCK_THREADS _save = PyEval_SaveThread(); +#define Py_END_ALLOW_THREADS PyEval_RestoreThread(_save); \ + } + +/* Masks and values used by FORMAT_VALUE opcode. */ +#define FVC_MASK 0x3 +#define FVC_NONE 0x0 +#define FVC_STR 0x1 +#define FVC_REPR 0x2 +#define FVC_ASCII 0x3 +#define FVS_MASK 0x4 +#define FVS_HAVE_SPEC 0x4 + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_CEVAL_H +# include "cpython/ceval.h" +# undef Py_CPYTHON_CEVAL_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CEVAL_H */ diff --git a/Include/codecs.h b/Include/codecs.h new file mode 100644 index 0000000000000000000000000000000000000000..512a3c723eca18344cca0b44fcf278fc79b364ac --- /dev/null +++ b/Include/codecs.h @@ -0,0 +1,176 @@ +#ifndef Py_CODECREGISTRY_H +#define Py_CODECREGISTRY_H +#ifdef __cplusplus +extern "C" { +#endif + +/* ------------------------------------------------------------------------ + + Python Codec Registry and support functions + + +Written by Marc-Andre Lemburg (mal@lemburg.com). + +Copyright (c) Corporation for National Research Initiatives. + + ------------------------------------------------------------------------ */ + +/* Register a new codec search function. + + As side effect, this tries to load the encodings package, if not + yet done, to make sure that it is always first in the list of + search functions. + + The search_function's refcount is incremented by this function. */ + +PyAPI_FUNC(int) PyCodec_Register( + PyObject *search_function + ); + +/* Unregister a codec search function and clear the registry's cache. + If the search function is not registered, do nothing. + Return 0 on success. Raise an exception and return -1 on error. */ + +PyAPI_FUNC(int) PyCodec_Unregister( + PyObject *search_function + ); + +/* Codec registry encoding check API. + + Returns 1/0 depending on whether there is a registered codec for + the given encoding. + +*/ + +PyAPI_FUNC(int) PyCodec_KnownEncoding( + const char *encoding + ); + +/* Generic codec based encoding API. + + object is passed through the encoder function found for the given + encoding using the error handling method defined by errors. errors + may be NULL to use the default method defined for the codec. + + Raises a LookupError in case no encoder can be found. + + */ + +PyAPI_FUNC(PyObject *) PyCodec_Encode( + PyObject *object, + const char *encoding, + const char *errors + ); + +/* Generic codec based decoding API. + + object is passed through the decoder function found for the given + encoding using the error handling method defined by errors. errors + may be NULL to use the default method defined for the codec. + + Raises a LookupError in case no encoder can be found. + + */ + +PyAPI_FUNC(PyObject *) PyCodec_Decode( + PyObject *object, + const char *encoding, + const char *errors + ); + +// --- Codec Lookup APIs -------------------------------------------------- + +/* Codec registry lookup API. + + Looks up the given encoding and returns a CodecInfo object with + function attributes which implement the different aspects of + processing the encoding. + + The encoding string is looked up converted to all lower-case + characters. This makes encodings looked up through this mechanism + effectively case-insensitive. + + If no codec is found, a KeyError is set and NULL returned. + + As side effect, this tries to load the encodings package, if not + yet done. This is part of the lazy load strategy for the encodings + package. + */ + +/* Get an encoder function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_Encoder(const char *encoding); + +/* Get a decoder function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_Decoder(const char *encoding); + +/* Get an IncrementalEncoder object for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_IncrementalEncoder( + const char *encoding, + const char *errors); + +/* Get an IncrementalDecoder object function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_IncrementalDecoder( + const char *encoding, + const char *errors); + +/* Get a StreamReader factory function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_StreamReader( + const char *encoding, + PyObject *stream, + const char *errors); + +/* Get a StreamWriter factory function for the given encoding. */ + +PyAPI_FUNC(PyObject *) PyCodec_StreamWriter( + const char *encoding, + PyObject *stream, + const char *errors); + +/* Unicode encoding error handling callback registry API */ + +/* Register the error handling callback function error under the given + name. This function will be called by the codec when it encounters + unencodable characters/undecodable bytes and doesn't know the + callback name, when name is specified as the error parameter + in the call to the encode/decode function. + Return 0 on success, -1 on error */ +PyAPI_FUNC(int) PyCodec_RegisterError(const char *name, PyObject *error); + +/* Lookup the error handling callback function registered under the given + name. As a special case NULL can be passed, in which case + the error handling callback for "strict" will be returned. */ +PyAPI_FUNC(PyObject *) PyCodec_LookupError(const char *name); + +/* raise exc as an exception */ +PyAPI_FUNC(PyObject *) PyCodec_StrictErrors(PyObject *exc); + +/* ignore the unicode error, skipping the faulty input */ +PyAPI_FUNC(PyObject *) PyCodec_IgnoreErrors(PyObject *exc); + +/* replace the unicode encode error with ? or U+FFFD */ +PyAPI_FUNC(PyObject *) PyCodec_ReplaceErrors(PyObject *exc); + +/* replace the unicode encode error with XML character references */ +PyAPI_FUNC(PyObject *) PyCodec_XMLCharRefReplaceErrors(PyObject *exc); + +/* replace the unicode encode error with backslash escapes (\x, \u and \U) */ +PyAPI_FUNC(PyObject *) PyCodec_BackslashReplaceErrors(PyObject *exc); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* replace the unicode encode error with backslash escapes (\N, \x, \u and \U) */ +PyAPI_FUNC(PyObject *) PyCodec_NameReplaceErrors(PyObject *exc); +#endif + +#ifndef Py_LIMITED_API +PyAPI_DATA(const char *) Py_hexdigits; +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CODECREGISTRY_H */ diff --git a/Include/compile.h b/Include/compile.h new file mode 100644 index 0000000000000000000000000000000000000000..52d0bc76c9fca4485451ee4b18034059cf91cdb5 --- /dev/null +++ b/Include/compile.h @@ -0,0 +1,22 @@ +#ifndef Py_COMPILE_H +#define Py_COMPILE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* These definitions must match corresponding definitions in graminit.h. */ +#define Py_single_input 256 +#define Py_file_input 257 +#define Py_eval_input 258 +#define Py_func_type_input 345 + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_COMPILE_H +# include "cpython/compile.h" +# undef Py_CPYTHON_COMPILE_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_COMPILE_H */ diff --git a/Include/complexobject.h b/Include/complexobject.h new file mode 100644 index 0000000000000000000000000000000000000000..ebe49a832f74141c0cc059e72a15c01edf937dba --- /dev/null +++ b/Include/complexobject.h @@ -0,0 +1,30 @@ +/* Complex number structure */ + +#ifndef Py_COMPLEXOBJECT_H +#define Py_COMPLEXOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Complex object interface */ + +PyAPI_DATA(PyTypeObject) PyComplex_Type; + +#define PyComplex_Check(op) PyObject_TypeCheck((op), &PyComplex_Type) +#define PyComplex_CheckExact(op) Py_IS_TYPE((op), &PyComplex_Type) + +PyAPI_FUNC(PyObject *) PyComplex_FromDoubles(double real, double imag); + +PyAPI_FUNC(double) PyComplex_RealAsDouble(PyObject *op); +PyAPI_FUNC(double) PyComplex_ImagAsDouble(PyObject *op); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_COMPLEXOBJECT_H +# include "cpython/complexobject.h" +# undef Py_CPYTHON_COMPLEXOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_COMPLEXOBJECT_H */ diff --git a/Include/cpython/abstract.h b/Include/cpython/abstract.h new file mode 100644 index 0000000000000000000000000000000000000000..4e7b7a46703a6d08a3d73b16d1bfb7e164d53afb --- /dev/null +++ b/Include/cpython/abstract.h @@ -0,0 +1,87 @@ +#ifndef Py_CPYTHON_ABSTRACTOBJECT_H +# error "this header file must not be included directly" +#endif + +/* === Object Protocol ================================================== */ + +/* Like PyObject_CallMethod(), but expect a _Py_Identifier* + as the method name. */ +PyAPI_FUNC(PyObject*) _PyObject_CallMethodId( + PyObject *obj, + _Py_Identifier *name, + const char *format, ...); + +/* Convert keyword arguments from the FASTCALL (stack: C array, kwnames: tuple) + format to a Python dictionary ("kwargs" dict). + + The type of kwnames keys is not checked. The final function getting + arguments is responsible to check if all keys are strings, for example using + PyArg_ParseTupleAndKeywords() or PyArg_ValidateKeywordArguments(). + + Duplicate keys are merged using the last value. If duplicate keys must raise + an exception, the caller is responsible to implement an explicit keys on + kwnames. */ +PyAPI_FUNC(PyObject*) _PyStack_AsDict(PyObject *const *values, PyObject *kwnames); + + +/* === Vectorcall protocol (PEP 590) ============================= */ + +// PyVectorcall_NARGS() is exported as a function for the stable ABI. +// Here (when we are not using the stable ABI), the name is overridden to +// call a static inline function for best performance. +static inline Py_ssize_t +_PyVectorcall_NARGS(size_t n) +{ + return n & ~PY_VECTORCALL_ARGUMENTS_OFFSET; +} +#define PyVectorcall_NARGS(n) _PyVectorcall_NARGS(n) + +PyAPI_FUNC(vectorcallfunc) PyVectorcall_Function(PyObject *callable); + +// Backwards compatibility aliases (PEP 590) for API that was provisional +// in Python 3.8 +#define _PyObject_Vectorcall PyObject_Vectorcall +#define _PyObject_VectorcallMethod PyObject_VectorcallMethod +#define _PyObject_FastCallDict PyObject_VectorcallDict +#define _PyVectorcall_Function PyVectorcall_Function +#define _PyObject_CallOneArg PyObject_CallOneArg +#define _PyObject_CallMethodNoArgs PyObject_CallMethodNoArgs +#define _PyObject_CallMethodOneArg PyObject_CallMethodOneArg + +/* Same as PyObject_Vectorcall except that keyword arguments are passed as + dict, which may be NULL if there are no keyword arguments. */ +PyAPI_FUNC(PyObject *) PyObject_VectorcallDict( + PyObject *callable, + PyObject *const *args, + size_t nargsf, + PyObject *kwargs); + +PyAPI_FUNC(PyObject *) PyObject_CallOneArg(PyObject *func, PyObject *arg); + +static inline PyObject * +PyObject_CallMethodNoArgs(PyObject *self, PyObject *name) +{ + size_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET; + return PyObject_VectorcallMethod(name, &self, nargsf, _Py_NULL); +} + +static inline PyObject * +PyObject_CallMethodOneArg(PyObject *self, PyObject *name, PyObject *arg) +{ + PyObject *args[2] = {self, arg}; + size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET; + assert(arg != NULL); + return PyObject_VectorcallMethod(name, args, nargsf, _Py_NULL); +} + +/* Guess the size of object 'o' using len(o) or o.__length_hint__(). + If neither of those return a non-negative value, then return the default + value. If one of the calls fails, this function returns -1. */ +PyAPI_FUNC(Py_ssize_t) PyObject_LengthHint(PyObject *o, Py_ssize_t); + +/* === Sequence protocol ================================================ */ + +/* Assume tp_as_sequence and sq_item exist and that 'i' does not + need to be corrected for a negative index. */ +#define PySequence_ITEM(o, i)\ + ( Py_TYPE(o)->tp_as_sequence->sq_item((o), (i)) ) diff --git a/Include/cpython/bytearrayobject.h b/Include/cpython/bytearrayobject.h new file mode 100644 index 0000000000000000000000000000000000000000..9ba176eb2d3ac2aa650f764d991c37c2d42bd925 --- /dev/null +++ b/Include/cpython/bytearrayobject.h @@ -0,0 +1,34 @@ +#ifndef Py_CPYTHON_BYTEARRAYOBJECT_H +# error "this header file must not be included directly" +#endif + +/* Object layout */ +typedef struct { + PyObject_VAR_HEAD + Py_ssize_t ob_alloc; /* How many bytes allocated in ob_bytes */ + char *ob_bytes; /* Physical backing buffer */ + char *ob_start; /* Logical start inside ob_bytes */ + Py_ssize_t ob_exports; /* How many buffer exports */ +} PyByteArrayObject; + +PyAPI_DATA(char) _PyByteArray_empty_string[]; + +/* Macros and static inline functions, trading safety for speed */ +#define _PyByteArray_CAST(op) \ + (assert(PyByteArray_Check(op)), _Py_CAST(PyByteArrayObject*, op)) + +static inline char* PyByteArray_AS_STRING(PyObject *op) +{ + PyByteArrayObject *self = _PyByteArray_CAST(op); + if (Py_SIZE(self)) { + return self->ob_start; + } + return _PyByteArray_empty_string; +} +#define PyByteArray_AS_STRING(self) PyByteArray_AS_STRING(_PyObject_CAST(self)) + +static inline Py_ssize_t PyByteArray_GET_SIZE(PyObject *op) { + PyByteArrayObject *self = _PyByteArray_CAST(op); + return Py_SIZE(self); +} +#define PyByteArray_GET_SIZE(self) PyByteArray_GET_SIZE(_PyObject_CAST(self)) diff --git a/Include/cpython/bytesobject.h b/Include/cpython/bytesobject.h new file mode 100644 index 0000000000000000000000000000000000000000..41537210b748a1c51b15f9226d14e33cdcfd43b2 --- /dev/null +++ b/Include/cpython/bytesobject.h @@ -0,0 +1,37 @@ +#ifndef Py_CPYTHON_BYTESOBJECT_H +# error "this header file must not be included directly" +#endif + +typedef struct { + PyObject_VAR_HEAD + Py_DEPRECATED(3.11) Py_hash_t ob_shash; + char ob_sval[1]; + + /* Invariants: + * ob_sval contains space for 'ob_size+1' elements. + * ob_sval[ob_size] == 0. + * ob_shash is the hash of the byte string or -1 if not computed yet. + */ +} PyBytesObject; + +PyAPI_FUNC(int) _PyBytes_Resize(PyObject **, Py_ssize_t); + +/* Macros and static inline functions, trading safety for speed */ +#define _PyBytes_CAST(op) \ + (assert(PyBytes_Check(op)), _Py_CAST(PyBytesObject*, op)) + +static inline char* PyBytes_AS_STRING(PyObject *op) +{ + return _PyBytes_CAST(op)->ob_sval; +} +#define PyBytes_AS_STRING(op) PyBytes_AS_STRING(_PyObject_CAST(op)) + +static inline Py_ssize_t PyBytes_GET_SIZE(PyObject *op) { + PyBytesObject *self = _PyBytes_CAST(op); + return Py_SIZE(self); +} +#define PyBytes_GET_SIZE(self) PyBytes_GET_SIZE(_PyObject_CAST(self)) + +/* _PyBytes_Join(sep, x) is like sep.join(x). sep must be PyBytesObject*, + x must be an iterable object. */ +PyAPI_FUNC(PyObject*) _PyBytes_Join(PyObject *sep, PyObject *x); diff --git a/Include/cpython/cellobject.h b/Include/cpython/cellobject.h new file mode 100644 index 0000000000000000000000000000000000000000..47a6a491497ea030863fb71b7b21db5560f77ed1 --- /dev/null +++ b/Include/cpython/cellobject.h @@ -0,0 +1,44 @@ +/* Cell object interface */ + +#ifndef Py_LIMITED_API +#ifndef Py_CELLOBJECT_H +#define Py_CELLOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + /* Content of the cell or NULL when empty */ + PyObject *ob_ref; +} PyCellObject; + +PyAPI_DATA(PyTypeObject) PyCell_Type; + +#define PyCell_Check(op) Py_IS_TYPE((op), &PyCell_Type) + +PyAPI_FUNC(PyObject *) PyCell_New(PyObject *); +PyAPI_FUNC(PyObject *) PyCell_Get(PyObject *); +PyAPI_FUNC(int) PyCell_Set(PyObject *, PyObject *); + +static inline PyObject* PyCell_GET(PyObject *op) { + PyCellObject *cell; + assert(PyCell_Check(op)); + cell = _Py_CAST(PyCellObject*, op); + return cell->ob_ref; +} +#define PyCell_GET(op) PyCell_GET(_PyObject_CAST(op)) + +static inline void PyCell_SET(PyObject *op, PyObject *value) { + PyCellObject *cell; + assert(PyCell_Check(op)); + cell = _Py_CAST(PyCellObject*, op); + cell->ob_ref = value; +} +#define PyCell_SET(op, value) PyCell_SET(_PyObject_CAST(op), (value)) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TUPLEOBJECT_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/cpython/ceval.h b/Include/cpython/ceval.h new file mode 100644 index 0000000000000000000000000000000000000000..78f7405661662f8c6e6f0c76c3667d902ffbc837 --- /dev/null +++ b/Include/cpython/ceval.h @@ -0,0 +1,25 @@ +#ifndef Py_CPYTHON_CEVAL_H +# error "this header file must not be included directly" +#endif + +PyAPI_FUNC(void) PyEval_SetProfile(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetProfileAllThreads(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetTrace(Py_tracefunc, PyObject *); +PyAPI_FUNC(void) PyEval_SetTraceAllThreads(Py_tracefunc, PyObject *); + +/* Look at the current frame's (if any) code's co_flags, and turn on + the corresponding compiler flags in cf->cf_flags. Return 1 if any + flag was set, else return 0. */ +PyAPI_FUNC(int) PyEval_MergeCompilerFlags(PyCompilerFlags *cf); + +PyAPI_FUNC(PyObject *) _PyEval_EvalFrameDefault(PyThreadState *tstate, struct _PyInterpreterFrame *f, int exc); + +PyAPI_FUNC(Py_ssize_t) PyUnstable_Eval_RequestCodeExtraIndex(freefunc); +// Old name -- remove when this API changes: +_Py_DEPRECATED_EXTERNALLY(3.12) static inline Py_ssize_t +_PyEval_RequestCodeExtraIndex(freefunc f) { + return PyUnstable_Eval_RequestCodeExtraIndex(f); +} + +PyAPI_FUNC(int) _PyEval_SliceIndex(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) _PyEval_SliceIndexNotNone(PyObject *, Py_ssize_t *); diff --git a/Include/cpython/classobject.h b/Include/cpython/classobject.h new file mode 100644 index 0000000000000000000000000000000000000000..d7c9ddd1336c46d8a57898cdcfcb0731c0cfb6a9 --- /dev/null +++ b/Include/cpython/classobject.h @@ -0,0 +1,71 @@ +/* Former class object interface -- now only bound methods are here */ + +/* Revealing some structures (not for general use) */ + +#ifndef Py_LIMITED_API +#ifndef Py_CLASSOBJECT_H +#define Py_CLASSOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + PyObject_HEAD + PyObject *im_func; /* The callable object implementing the method */ + PyObject *im_self; /* The instance it is bound to */ + PyObject *im_weakreflist; /* List of weak references */ + vectorcallfunc vectorcall; +} PyMethodObject; + +PyAPI_DATA(PyTypeObject) PyMethod_Type; + +#define PyMethod_Check(op) Py_IS_TYPE((op), &PyMethod_Type) + +PyAPI_FUNC(PyObject *) PyMethod_New(PyObject *, PyObject *); + +PyAPI_FUNC(PyObject *) PyMethod_Function(PyObject *); +PyAPI_FUNC(PyObject *) PyMethod_Self(PyObject *); + +#define _PyMethod_CAST(meth) \ + (assert(PyMethod_Check(meth)), _Py_CAST(PyMethodObject*, meth)) + +/* Static inline functions for direct access to these values. + Type checks are *not* done, so use with care. */ +static inline PyObject* PyMethod_GET_FUNCTION(PyObject *meth) { + return _PyMethod_CAST(meth)->im_func; +} +#define PyMethod_GET_FUNCTION(meth) PyMethod_GET_FUNCTION(_PyObject_CAST(meth)) + +static inline PyObject* PyMethod_GET_SELF(PyObject *meth) { + return _PyMethod_CAST(meth)->im_self; +} +#define PyMethod_GET_SELF(meth) PyMethod_GET_SELF(_PyObject_CAST(meth)) + +typedef struct { + PyObject_HEAD + PyObject *func; +} PyInstanceMethodObject; + +PyAPI_DATA(PyTypeObject) PyInstanceMethod_Type; + +#define PyInstanceMethod_Check(op) Py_IS_TYPE((op), &PyInstanceMethod_Type) + +PyAPI_FUNC(PyObject *) PyInstanceMethod_New(PyObject *); +PyAPI_FUNC(PyObject *) PyInstanceMethod_Function(PyObject *); + +#define _PyInstanceMethod_CAST(meth) \ + (assert(PyInstanceMethod_Check(meth)), \ + _Py_CAST(PyInstanceMethodObject*, meth)) + +/* Static inline function for direct access to these values. + Type checks are *not* done, so use with care. */ +static inline PyObject* PyInstanceMethod_GET_FUNCTION(PyObject *meth) { + return _PyInstanceMethod_CAST(meth)->func; +} +#define PyInstanceMethod_GET_FUNCTION(meth) PyInstanceMethod_GET_FUNCTION(_PyObject_CAST(meth)) + +#ifdef __cplusplus +} +#endif +#endif // !Py_CLASSOBJECT_H +#endif // !Py_LIMITED_API diff --git a/Include/cpython/code.h b/Include/cpython/code.h new file mode 100644 index 0000000000000000000000000000000000000000..bd8afab8f93ca071b264880ef6b64f1f8424bc32 --- /dev/null +++ b/Include/cpython/code.h @@ -0,0 +1,358 @@ +/* Definitions for bytecode */ + +#ifndef Py_LIMITED_API +#ifndef Py_CODE_H +#define Py_CODE_H + +#ifdef __cplusplus +extern "C" { +#endif + +/* Count of all local monitoring events */ +#define _PY_MONITORING_LOCAL_EVENTS 10 +/* Count of all "real" monitoring events (not derived from other events) */ +#define _PY_MONITORING_UNGROUPED_EVENTS 15 +/* Count of all monitoring events */ +#define _PY_MONITORING_EVENTS 17 + +/* Tables of which tools are active for each monitored event. */ +typedef struct _Py_LocalMonitors { + uint8_t tools[_PY_MONITORING_LOCAL_EVENTS]; +} _Py_LocalMonitors; + +typedef struct _Py_GlobalMonitors { + uint8_t tools[_PY_MONITORING_UNGROUPED_EVENTS]; +} _Py_GlobalMonitors; + + +typedef struct { + PyObject *_co_code; + PyObject *_co_varnames; + PyObject *_co_cellvars; + PyObject *_co_freevars; +} _PyCoCached; + +/* Ancillary data structure used for instrumentation. + Line instrumentation creates this with sufficient + space for one entry per code unit. The total size + of the data will be `bytes_per_entry * Py_SIZE(code)` */ +typedef struct { + uint8_t bytes_per_entry; + uint8_t data[1]; +} _PyCoLineInstrumentationData; + + +typedef struct { + int size; + int capacity; + struct _PyExecutorObject *executors[1]; +} _PyExecutorArray; + +/* Main data structure used for instrumentation. + * This is allocated when needed for instrumentation + */ +typedef struct { + /* Monitoring specific to this code object */ + _Py_LocalMonitors local_monitors; + /* Monitoring that is active on this code object */ + _Py_LocalMonitors active_monitors; + /* The tools that are to be notified for events for the matching code unit */ + uint8_t *tools; + /* Information to support line events */ + _PyCoLineInstrumentationData *lines; + /* The tools that are to be notified for line events for the matching code unit */ + uint8_t *line_tools; + /* Information to support instruction events */ + /* The underlying instructions, which can themselves be instrumented */ + uint8_t *per_instruction_opcodes; + /* The tools that are to be notified for instruction events for the matching code unit */ + uint8_t *per_instruction_tools; +} _PyCoMonitoringData; + +// To avoid repeating ourselves in deepfreeze.py, all PyCodeObject members are +// defined in this macro: +#define _PyCode_DEF(SIZE) { \ + PyObject_VAR_HEAD \ + \ + /* Note only the following fields are used in hash and/or comparisons \ + * \ + * - co_name \ + * - co_argcount \ + * - co_posonlyargcount \ + * - co_kwonlyargcount \ + * - co_nlocals \ + * - co_stacksize \ + * - co_flags \ + * - co_firstlineno \ + * - co_consts \ + * - co_names \ + * - co_localsplusnames \ + * This is done to preserve the name and line number for tracebacks \ + * and debuggers; otherwise, constant de-duplication would collapse \ + * identical functions/lambdas defined on different lines. \ + */ \ + \ + /* These fields are set with provided values on new code objects. */ \ + \ + /* The hottest fields (in the eval loop) are grouped here at the top. */ \ + PyObject *co_consts; /* list (constants used) */ \ + PyObject *co_names; /* list of strings (names used) */ \ + PyObject *co_exceptiontable; /* Byte string encoding exception handling \ + table */ \ + int co_flags; /* CO_..., see below */ \ + \ + /* The rest are not so impactful on performance. */ \ + int co_argcount; /* #arguments, except *args */ \ + int co_posonlyargcount; /* #positional only arguments */ \ + int co_kwonlyargcount; /* #keyword only arguments */ \ + int co_stacksize; /* #entries needed for evaluation stack */ \ + int co_firstlineno; /* first source line number */ \ + \ + /* redundant values (derived from co_localsplusnames and \ + co_localspluskinds) */ \ + int co_nlocalsplus; /* number of local + cell + free variables */ \ + int co_framesize; /* Size of frame in words */ \ + int co_nlocals; /* number of local variables */ \ + int co_ncellvars; /* total number of cell variables */ \ + int co_nfreevars; /* number of free variables */ \ + uint32_t co_version; /* version number */ \ + \ + PyObject *co_localsplusnames; /* tuple mapping offsets to names */ \ + PyObject *co_localspluskinds; /* Bytes mapping to local kinds (one byte \ + per variable) */ \ + PyObject *co_filename; /* unicode (where it was loaded from) */ \ + PyObject *co_name; /* unicode (name, for reference) */ \ + PyObject *co_qualname; /* unicode (qualname, for reference) */ \ + PyObject *co_linetable; /* bytes object that holds location info */ \ + PyObject *co_weakreflist; /* to support weakrefs to code objects */ \ + _PyExecutorArray *co_executors; /* executors from optimizer */ \ + _PyCoCached *_co_cached; /* cached co_* attributes */ \ + uintptr_t _co_instrumentation_version; /* current instrumentation version */ \ + _PyCoMonitoringData *_co_monitoring; /* Monitoring data */ \ + int _co_firsttraceable; /* index of first traceable instruction */ \ + /* Scratch space for extra data relating to the code object. \ + Type is a void* to keep the format private in codeobject.c to force \ + people to go through the proper APIs. */ \ + void *co_extra; \ + char co_code_adaptive[(SIZE)]; \ +} + +/* Bytecode object */ +struct PyCodeObject _PyCode_DEF(1); + +/* Masks for co_flags above */ +#define CO_OPTIMIZED 0x0001 +#define CO_NEWLOCALS 0x0002 +#define CO_VARARGS 0x0004 +#define CO_VARKEYWORDS 0x0008 +#define CO_NESTED 0x0010 +#define CO_GENERATOR 0x0020 + +/* The CO_COROUTINE flag is set for coroutine functions (defined with + ``async def`` keywords) */ +#define CO_COROUTINE 0x0080 +#define CO_ITERABLE_COROUTINE 0x0100 +#define CO_ASYNC_GENERATOR 0x0200 + +/* bpo-39562: These constant values are changed in Python 3.9 + to prevent collision with compiler flags. CO_FUTURE_ and PyCF_ + constants must be kept unique. PyCF_ constants can use bits from + 0x0100 to 0x10000. CO_FUTURE_ constants use bits starting at 0x20000. */ +#define CO_FUTURE_DIVISION 0x20000 +#define CO_FUTURE_ABSOLUTE_IMPORT 0x40000 /* do absolute imports by default */ +#define CO_FUTURE_WITH_STATEMENT 0x80000 +#define CO_FUTURE_PRINT_FUNCTION 0x100000 +#define CO_FUTURE_UNICODE_LITERALS 0x200000 + +#define CO_FUTURE_BARRY_AS_BDFL 0x400000 +#define CO_FUTURE_GENERATOR_STOP 0x800000 +#define CO_FUTURE_ANNOTATIONS 0x1000000 + +#define CO_NO_MONITORING_EVENTS 0x2000000 + +/* This should be defined if a future statement modifies the syntax. + For example, when a keyword is added. +*/ +#define PY_PARSER_REQUIRES_FUTURE_KEYWORD + +#define CO_MAXBLOCKS 21 /* Max static block nesting within a function */ + +PyAPI_DATA(PyTypeObject) PyCode_Type; + +#define PyCode_Check(op) Py_IS_TYPE((op), &PyCode_Type) + +static inline Py_ssize_t PyCode_GetNumFree(PyCodeObject *op) { + assert(PyCode_Check(op)); + return op->co_nfreevars; +} + +static inline int PyUnstable_Code_GetFirstFree(PyCodeObject *op) { + assert(PyCode_Check(op)); + return op->co_nlocalsplus - op->co_nfreevars; +} + +Py_DEPRECATED(3.13) static inline int PyCode_GetFirstFree(PyCodeObject *op) { + return PyUnstable_Code_GetFirstFree(op); +} + +/* Unstable public interface */ +PyAPI_FUNC(PyCodeObject *) PyUnstable_Code_New( + int, int, int, int, int, PyObject *, PyObject *, + PyObject *, PyObject *, PyObject *, PyObject *, + PyObject *, PyObject *, PyObject *, int, PyObject *, + PyObject *); + +PyAPI_FUNC(PyCodeObject *) PyUnstable_Code_NewWithPosOnlyArgs( + int, int, int, int, int, int, PyObject *, PyObject *, + PyObject *, PyObject *, PyObject *, PyObject *, + PyObject *, PyObject *, PyObject *, int, PyObject *, + PyObject *); + /* same as struct above */ +// Old names -- remove when this API changes: +_Py_DEPRECATED_EXTERNALLY(3.12) static inline PyCodeObject * +PyCode_New( + int a, int b, int c, int d, int e, PyObject *f, PyObject *g, + PyObject *h, PyObject *i, PyObject *j, PyObject *k, + PyObject *l, PyObject *m, PyObject *n, int o, PyObject *p, + PyObject *q) +{ + return PyUnstable_Code_New( + a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q); +} +_Py_DEPRECATED_EXTERNALLY(3.12) static inline PyCodeObject * +PyCode_NewWithPosOnlyArgs( + int a, int poac, int b, int c, int d, int e, PyObject *f, PyObject *g, + PyObject *h, PyObject *i, PyObject *j, PyObject *k, + PyObject *l, PyObject *m, PyObject *n, int o, PyObject *p, + PyObject *q) +{ + return PyUnstable_Code_NewWithPosOnlyArgs( + a, poac, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q); +} + +/* Creates a new empty code object with the specified source location. */ +PyAPI_FUNC(PyCodeObject *) +PyCode_NewEmpty(const char *filename, const char *funcname, int firstlineno); + +/* Return the line number associated with the specified bytecode index + in this code object. If you just need the line number of a frame, + use PyFrame_GetLineNumber() instead. */ +PyAPI_FUNC(int) PyCode_Addr2Line(PyCodeObject *, int); + +PyAPI_FUNC(int) PyCode_Addr2Location(PyCodeObject *, int, int *, int *, int *, int *); + +#define PY_FOREACH_CODE_EVENT(V) \ + V(CREATE) \ + V(DESTROY) + +typedef enum { + #define PY_DEF_EVENT(op) PY_CODE_EVENT_##op, + PY_FOREACH_CODE_EVENT(PY_DEF_EVENT) + #undef PY_DEF_EVENT +} PyCodeEvent; + + +/* + * A callback that is invoked for different events in a code object's lifecycle. + * + * The callback is invoked with a borrowed reference to co, after it is + * created and before it is destroyed. + * + * If the callback sets an exception, it must return -1. Otherwise + * it should return 0. + */ +typedef int (*PyCode_WatchCallback)( + PyCodeEvent event, + PyCodeObject* co); + +/* + * Register a per-interpreter callback that will be invoked for code object + * lifecycle events. + * + * Returns a handle that may be passed to PyCode_ClearWatcher on success, + * or -1 and sets an error if no more handles are available. + */ +PyAPI_FUNC(int) PyCode_AddWatcher(PyCode_WatchCallback callback); + +/* + * Clear the watcher associated with the watcher_id handle. + * + * Returns 0 on success or -1 if no watcher exists for the provided id. + */ +PyAPI_FUNC(int) PyCode_ClearWatcher(int watcher_id); + +/* for internal use only */ +struct _opaque { + int computed_line; + const uint8_t *lo_next; + const uint8_t *limit; +}; + +typedef struct _line_offsets { + int ar_start; + int ar_end; + int ar_line; + struct _opaque opaque; +} PyCodeAddressRange; + +/* Update *bounds to describe the first and one-past-the-last instructions in the + same line as lasti. Return the number of that line. +*/ +PyAPI_FUNC(int) _PyCode_CheckLineNumber(int lasti, PyCodeAddressRange *bounds); + +/* Create a comparable key used to compare constants taking in account the + * object type. It is used to make sure types are not coerced (e.g., float and + * complex) _and_ to distinguish 0.0 from -0.0 e.g. on IEEE platforms + * + * Return (type(obj), obj, ...): a tuple with variable size (at least 2 items) + * depending on the type and the value. The type is the first item to not + * compare bytes and str which can raise a BytesWarning exception. */ +PyAPI_FUNC(PyObject*) _PyCode_ConstantKey(PyObject *obj); + +PyAPI_FUNC(PyObject*) PyCode_Optimize(PyObject *code, PyObject* consts, + PyObject *names, PyObject *lnotab); + +PyAPI_FUNC(int) PyUnstable_Code_GetExtra( + PyObject *code, Py_ssize_t index, void **extra); +PyAPI_FUNC(int) PyUnstable_Code_SetExtra( + PyObject *code, Py_ssize_t index, void *extra); +// Old names -- remove when this API changes: +_Py_DEPRECATED_EXTERNALLY(3.12) static inline int +_PyCode_GetExtra(PyObject *code, Py_ssize_t index, void **extra) +{ + return PyUnstable_Code_GetExtra(code, index, extra); +} +_Py_DEPRECATED_EXTERNALLY(3.12) static inline int +_PyCode_SetExtra(PyObject *code, Py_ssize_t index, void *extra) +{ + return PyUnstable_Code_SetExtra(code, index, extra); +} + +/* Equivalent to getattr(code, 'co_code') in Python. + Returns a strong reference to a bytes object. */ +PyAPI_FUNC(PyObject *) PyCode_GetCode(PyCodeObject *code); +/* Equivalent to getattr(code, 'co_varnames') in Python. */ +PyAPI_FUNC(PyObject *) PyCode_GetVarnames(PyCodeObject *code); +/* Equivalent to getattr(code, 'co_cellvars') in Python. */ +PyAPI_FUNC(PyObject *) PyCode_GetCellvars(PyCodeObject *code); +/* Equivalent to getattr(code, 'co_freevars') in Python. */ +PyAPI_FUNC(PyObject *) PyCode_GetFreevars(PyCodeObject *code); + +typedef enum _PyCodeLocationInfoKind { + /* short forms are 0 to 9 */ + PY_CODE_LOCATION_INFO_SHORT0 = 0, + /* one lineforms are 10 to 12 */ + PY_CODE_LOCATION_INFO_ONE_LINE0 = 10, + PY_CODE_LOCATION_INFO_ONE_LINE1 = 11, + PY_CODE_LOCATION_INFO_ONE_LINE2 = 12, + + PY_CODE_LOCATION_INFO_NO_COLUMNS = 13, + PY_CODE_LOCATION_INFO_LONG = 14, + PY_CODE_LOCATION_INFO_NONE = 15 +} _PyCodeLocationInfoKind; + +#ifdef __cplusplus +} +#endif +#endif // !Py_CODE_H +#endif // !Py_LIMITED_API diff --git a/Include/cpython/compile.h b/Include/cpython/compile.h new file mode 100644 index 0000000000000000000000000000000000000000..cfdb7080d45f2b166082df0be2f1508a3eef9946 --- /dev/null +++ b/Include/cpython/compile.h @@ -0,0 +1,50 @@ +#ifndef Py_CPYTHON_COMPILE_H +# error "this header file must not be included directly" +#endif + +/* Public interface */ +#define PyCF_MASK (CO_FUTURE_DIVISION | CO_FUTURE_ABSOLUTE_IMPORT | \ + CO_FUTURE_WITH_STATEMENT | CO_FUTURE_PRINT_FUNCTION | \ + CO_FUTURE_UNICODE_LITERALS | CO_FUTURE_BARRY_AS_BDFL | \ + CO_FUTURE_GENERATOR_STOP | CO_FUTURE_ANNOTATIONS) +#define PyCF_MASK_OBSOLETE (CO_NESTED) + +/* bpo-39562: CO_FUTURE_ and PyCF_ constants must be kept unique. + PyCF_ constants can use bits from 0x0100 to 0x10000. + CO_FUTURE_ constants use bits starting at 0x20000. */ +#define PyCF_SOURCE_IS_UTF8 0x0100 +#define PyCF_DONT_IMPLY_DEDENT 0x0200 +#define PyCF_ONLY_AST 0x0400 +#define PyCF_IGNORE_COOKIE 0x0800 +#define PyCF_TYPE_COMMENTS 0x1000 +#define PyCF_ALLOW_TOP_LEVEL_AWAIT 0x2000 +#define PyCF_ALLOW_INCOMPLETE_INPUT 0x4000 +#define PyCF_OPTIMIZED_AST (0x8000 | PyCF_ONLY_AST) +#define PyCF_COMPILE_MASK (PyCF_ONLY_AST | PyCF_ALLOW_TOP_LEVEL_AWAIT | \ + PyCF_TYPE_COMMENTS | PyCF_DONT_IMPLY_DEDENT | \ + PyCF_ALLOW_INCOMPLETE_INPUT | PyCF_OPTIMIZED_AST) + +typedef struct { + int cf_flags; /* bitmask of CO_xxx flags relevant to future */ + int cf_feature_version; /* minor Python version (PyCF_ONLY_AST) */ +} PyCompilerFlags; + +#define _PyCompilerFlags_INIT \ + (PyCompilerFlags){.cf_flags = 0, .cf_feature_version = PY_MINOR_VERSION} + +/* Future feature support */ + +#define FUTURE_NESTED_SCOPES "nested_scopes" +#define FUTURE_GENERATORS "generators" +#define FUTURE_DIVISION "division" +#define FUTURE_ABSOLUTE_IMPORT "absolute_import" +#define FUTURE_WITH_STATEMENT "with_statement" +#define FUTURE_PRINT_FUNCTION "print_function" +#define FUTURE_UNICODE_LITERALS "unicode_literals" +#define FUTURE_BARRY_AS_BDFL "barry_as_FLUFL" +#define FUTURE_GENERATOR_STOP "generator_stop" +#define FUTURE_ANNOTATIONS "annotations" + +#define PY_INVALID_STACK_EFFECT INT_MAX +PyAPI_FUNC(int) PyCompile_OpcodeStackEffect(int opcode, int oparg); +PyAPI_FUNC(int) PyCompile_OpcodeStackEffectWithJump(int opcode, int oparg, int jump); diff --git a/Include/cpython/complexobject.h b/Include/cpython/complexobject.h new file mode 100644 index 0000000000000000000000000000000000000000..fbdc6a91fe895c0f7af0bff6a3836a1991709c20 --- /dev/null +++ b/Include/cpython/complexobject.h @@ -0,0 +1,33 @@ +#ifndef Py_CPYTHON_COMPLEXOBJECT_H +# error "this header file must not be included directly" +#endif + +typedef struct { + double real; + double imag; +} Py_complex; + +// Operations on complex numbers. +PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) _Py_c_diff(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) _Py_c_neg(Py_complex); +PyAPI_FUNC(Py_complex) _Py_c_prod(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) _Py_c_quot(Py_complex, Py_complex); +PyAPI_FUNC(Py_complex) _Py_c_pow(Py_complex, Py_complex); +PyAPI_FUNC(double) _Py_c_abs(Py_complex); + + +/* Complex object interface */ + +/* +PyComplexObject represents a complex number with double-precision +real and imaginary parts. +*/ +typedef struct { + PyObject_HEAD + Py_complex cval; +} PyComplexObject; + +PyAPI_FUNC(PyObject *) PyComplex_FromCComplex(Py_complex); + +PyAPI_FUNC(Py_complex) PyComplex_AsCComplex(PyObject *op); diff --git a/Include/cpython/context.h b/Include/cpython/context.h new file mode 100644 index 0000000000000000000000000000000000000000..a3249fc29b082e4230488e9345948eaa7f502c09 --- /dev/null +++ b/Include/cpython/context.h @@ -0,0 +1,74 @@ +#ifndef Py_LIMITED_API +#ifndef Py_CONTEXT_H +#define Py_CONTEXT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyContext_Type; +typedef struct _pycontextobject PyContext; + +PyAPI_DATA(PyTypeObject) PyContextVar_Type; +typedef struct _pycontextvarobject PyContextVar; + +PyAPI_DATA(PyTypeObject) PyContextToken_Type; +typedef struct _pycontexttokenobject PyContextToken; + + +#define PyContext_CheckExact(o) Py_IS_TYPE((o), &PyContext_Type) +#define PyContextVar_CheckExact(o) Py_IS_TYPE((o), &PyContextVar_Type) +#define PyContextToken_CheckExact(o) Py_IS_TYPE((o), &PyContextToken_Type) + + +PyAPI_FUNC(PyObject *) PyContext_New(void); +PyAPI_FUNC(PyObject *) PyContext_Copy(PyObject *); +PyAPI_FUNC(PyObject *) PyContext_CopyCurrent(void); + +PyAPI_FUNC(int) PyContext_Enter(PyObject *); +PyAPI_FUNC(int) PyContext_Exit(PyObject *); + + +/* Create a new context variable. + + default_value can be NULL. +*/ +PyAPI_FUNC(PyObject *) PyContextVar_New( + const char *name, PyObject *default_value); + + +/* Get a value for the variable. + + Returns -1 if an error occurred during lookup. + + Returns 0 if value either was or was not found. + + If value was found, *value will point to it. + If not, it will point to: + + - default_value, if not NULL; + - the default value of "var", if not NULL; + - NULL. + + '*value' will be a new ref, if not NULL. +*/ +PyAPI_FUNC(int) PyContextVar_Get( + PyObject *var, PyObject *default_value, PyObject **value); + + +/* Set a new value for the variable. + Returns NULL if an error occurs. +*/ +PyAPI_FUNC(PyObject *) PyContextVar_Set(PyObject *var, PyObject *value); + + +/* Reset a variable to its previous value. + Returns 0 on success, -1 on error. +*/ +PyAPI_FUNC(int) PyContextVar_Reset(PyObject *var, PyObject *token); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CONTEXT_H */ +#endif /* !Py_LIMITED_API */ diff --git a/Include/cpython/critical_section.h b/Include/cpython/critical_section.h new file mode 100644 index 0000000000000000000000000000000000000000..35db3fb6a59ce66c23aa5efa948ef76a3f489d4d --- /dev/null +++ b/Include/cpython/critical_section.h @@ -0,0 +1,134 @@ +#ifndef Py_CPYTHON_CRITICAL_SECTION_H +# error "this header file must not be included directly" +#endif + +// Python critical sections +// +// Conceptually, critical sections are a deadlock avoidance layer on top of +// per-object locks. These helpers, in combination with those locks, replace +// our usage of the global interpreter lock to provide thread-safety for +// otherwise thread-unsafe objects, such as dict. +// +// NOTE: These APIs are no-ops in non-free-threaded builds. +// +// Straightforward per-object locking could introduce deadlocks that were not +// present when running with the GIL. Threads may hold locks for multiple +// objects simultaneously because Python operations can nest. If threads were +// to acquire the same locks in different orders, they would deadlock. +// +// One way to avoid deadlocks is to allow threads to hold only the lock (or +// locks) for a single operation at a time (typically a single lock, but some +// operations involve two locks). When a thread begins a nested operation it +// could suspend the locks for any outer operation: before beginning the nested +// operation, the locks for the outer operation are released and when the +// nested operation completes, the locks for the outer operation are +// reacquired. +// +// To improve performance, this API uses a variation of the above scheme. +// Instead of immediately suspending locks any time a nested operation begins, +// locks are only suspended if the thread would block. This reduces the number +// of lock acquisitions and releases for nested operations, while still +// avoiding deadlocks. +// +// Additionally, the locks for any active operation are suspended around +// other potentially blocking operations, such as I/O. This is because the +// interaction between locks and blocking operations can lead to deadlocks in +// the same way as the interaction between multiple locks. +// +// Each thread's critical sections and their corresponding locks are tracked in +// a stack in `PyThreadState.critical_section`. When a thread calls +// `_PyThreadState_Detach()`, such as before a blocking I/O operation or when +// waiting to acquire a lock, the thread suspends all of its active critical +// sections, temporarily releasing the associated locks. When the thread calls +// `_PyThreadState_Attach()`, it resumes the top-most (i.e., most recent) +// critical section by reacquiring the associated lock or locks. See +// `_PyCriticalSection_Resume()`. +// +// NOTE: Only the top-most critical section is guaranteed to be active. +// Operations that need to lock two objects at once must use +// `Py_BEGIN_CRITICAL_SECTION2()`. You *CANNOT* use nested critical sections +// to lock more than one object at once, because the inner critical section +// may suspend the outer critical sections. This API does not provide a way +// to lock more than two objects at once (though it could be added later +// if actually needed). +// +// NOTE: Critical sections implicitly behave like reentrant locks because +// attempting to acquire the same lock will suspend any outer (earlier) +// critical sections. However, they are less efficient for this use case than +// purposefully designed reentrant locks. +// +// Example usage: +// Py_BEGIN_CRITICAL_SECTION(op); +// ... +// Py_END_CRITICAL_SECTION(); +// +// To lock two objects at once: +// Py_BEGIN_CRITICAL_SECTION2(op1, op2); +// ... +// Py_END_CRITICAL_SECTION2(); + +typedef struct PyCriticalSection PyCriticalSection; +typedef struct PyCriticalSection2 PyCriticalSection2; + +PyAPI_FUNC(void) +PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op); + +PyAPI_FUNC(void) +PyCriticalSection_End(PyCriticalSection *c); + +PyAPI_FUNC(void) +PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b); + +PyAPI_FUNC(void) +PyCriticalSection2_End(PyCriticalSection2 *c); + +#ifndef Py_GIL_DISABLED +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { +# define Py_END_CRITICAL_SECTION() \ + } +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { +# define Py_END_CRITICAL_SECTION2() \ + } +#else /* !Py_GIL_DISABLED */ + +// NOTE: the contents of this struct are private and may change betweeen +// Python releases without a deprecation period. +struct PyCriticalSection { + // Tagged pointer to an outer active critical section (or 0). + uintptr_t _cs_prev; + + // Mutex used to protect critical section + PyMutex *_cs_mutex; +}; + +// A critical section protected by two mutexes. Use +// Py_BEGIN_CRITICAL_SECTION2 and Py_END_CRITICAL_SECTION2. +// NOTE: the contents of this struct are private and may change betweeen +// Python releases without a deprecation period. +struct PyCriticalSection2 { + PyCriticalSection _cs_base; + + PyMutex *_cs_mutex2; +}; + +# define Py_BEGIN_CRITICAL_SECTION(op) \ + { \ + PyCriticalSection _py_cs; \ + PyCriticalSection_Begin(&_py_cs, _PyObject_CAST(op)) + +# define Py_END_CRITICAL_SECTION() \ + PyCriticalSection_End(&_py_cs); \ + } + +# define Py_BEGIN_CRITICAL_SECTION2(a, b) \ + { \ + PyCriticalSection2 _py_cs2; \ + PyCriticalSection2_Begin(&_py_cs2, _PyObject_CAST(a), _PyObject_CAST(b)) + +# define Py_END_CRITICAL_SECTION2() \ + PyCriticalSection2_End(&_py_cs2); \ + } + +#endif diff --git a/Include/cpython/descrobject.h b/Include/cpython/descrobject.h new file mode 100644 index 0000000000000000000000000000000000000000..bbad8b59c225ab5a8e52493dcebd08ab7499e67e --- /dev/null +++ b/Include/cpython/descrobject.h @@ -0,0 +1,62 @@ +#ifndef Py_CPYTHON_DESCROBJECT_H +# error "this header file must not be included directly" +#endif + +typedef PyObject *(*wrapperfunc)(PyObject *self, PyObject *args, + void *wrapped); + +typedef PyObject *(*wrapperfunc_kwds)(PyObject *self, PyObject *args, + void *wrapped, PyObject *kwds); + +struct wrapperbase { + const char *name; + int offset; + void *function; + wrapperfunc wrapper; + const char *doc; + int flags; + PyObject *name_strobj; +}; + +/* Flags for above struct */ +#define PyWrapperFlag_KEYWORDS 1 /* wrapper function takes keyword args */ + +/* Various kinds of descriptor objects */ + +typedef struct { + PyObject_HEAD + PyTypeObject *d_type; + PyObject *d_name; + PyObject *d_qualname; +} PyDescrObject; + +#define PyDescr_COMMON PyDescrObject d_common + +#define PyDescr_TYPE(x) (((PyDescrObject *)(x))->d_type) +#define PyDescr_NAME(x) (((PyDescrObject *)(x))->d_name) + +typedef struct { + PyDescr_COMMON; + PyMethodDef *d_method; + vectorcallfunc vectorcall; +} PyMethodDescrObject; + +typedef struct { + PyDescr_COMMON; + PyMemberDef *d_member; +} PyMemberDescrObject; + +typedef struct { + PyDescr_COMMON; + PyGetSetDef *d_getset; +} PyGetSetDescrObject; + +typedef struct { + PyDescr_COMMON; + struct wrapperbase *d_base; + void *d_wrapped; /* This can be any function pointer */ +} PyWrapperDescrObject; + +PyAPI_FUNC(PyObject *) PyDescr_NewWrapper(PyTypeObject *, + struct wrapperbase *, void *); +PyAPI_FUNC(int) PyDescr_IsData(PyObject *); diff --git a/Include/cpython/dictobject.h b/Include/cpython/dictobject.h new file mode 100644 index 0000000000000000000000000000000000000000..3fd23b9313c45361aea981a7d6cae99ab8aa52c3 --- /dev/null +++ b/Include/cpython/dictobject.h @@ -0,0 +1,102 @@ +#ifndef Py_CPYTHON_DICTOBJECT_H +# error "this header file must not be included directly" +#endif + +typedef struct _dictkeysobject PyDictKeysObject; +typedef struct _dictvalues PyDictValues; + +/* The ma_values pointer is NULL for a combined table + * or points to an array of PyObject* for a split table + */ +typedef struct { + PyObject_HEAD + + /* Number of items in the dictionary */ + Py_ssize_t ma_used; + + /* Dictionary version: globally unique, value change each time + the dictionary is modified */ +#ifdef Py_BUILD_CORE + /* Bits 0-7 are for dict watchers. + * Bits 8-11 are for the watched mutation counter (used by tier2 optimization) + * The remaining bits (12-63) are the actual version tag. */ + uint64_t ma_version_tag; +#else + Py_DEPRECATED(3.12) uint64_t ma_version_tag; +#endif + + PyDictKeysObject *ma_keys; + + /* If ma_values is NULL, the table is "combined": keys and values + are stored in ma_keys. + + If ma_values is not NULL, the table is split: + keys are stored in ma_keys and values are stored in ma_values */ + PyDictValues *ma_values; +} PyDictObject; + +PyAPI_FUNC(PyObject *) _PyDict_GetItem_KnownHash(PyObject *mp, PyObject *key, + Py_hash_t hash); +PyAPI_FUNC(PyObject *) _PyDict_GetItemStringWithError(PyObject *, const char *); +PyAPI_FUNC(PyObject *) PyDict_SetDefault( + PyObject *mp, PyObject *key, PyObject *defaultobj); + +// Inserts `key` with a value `default_value`, if `key` is not already present +// in the dictionary. If `result` is not NULL, then the value associated +// with `key` is returned in `*result` (either the existing value, or the now +// inserted `default_value`). +// Returns: +// -1 on error +// 0 if `key` was not present and `default_value` was inserted +// 1 if `key` was present and `default_value` was not inserted +PyAPI_FUNC(int) PyDict_SetDefaultRef(PyObject *mp, PyObject *key, PyObject *default_value, PyObject **result); + +/* Get the number of items of a dictionary. */ +static inline Py_ssize_t PyDict_GET_SIZE(PyObject *op) { + PyDictObject *mp; + assert(PyDict_Check(op)); + mp = _Py_CAST(PyDictObject*, op); +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_ssize_relaxed(&mp->ma_used); +#else + return mp->ma_used; +#endif +} +#define PyDict_GET_SIZE(op) PyDict_GET_SIZE(_PyObject_CAST(op)) + +PyAPI_FUNC(int) PyDict_ContainsString(PyObject *mp, const char *key); + +PyAPI_FUNC(PyObject *) _PyDict_NewPresized(Py_ssize_t minused); + +PyAPI_FUNC(int) PyDict_Pop(PyObject *dict, PyObject *key, PyObject **result); +PyAPI_FUNC(int) PyDict_PopString(PyObject *dict, const char *key, PyObject **result); +PyAPI_FUNC(PyObject *) _PyDict_Pop(PyObject *dict, PyObject *key, PyObject *default_value); + +/* Dictionary watchers */ + +#define PY_FOREACH_DICT_EVENT(V) \ + V(ADDED) \ + V(MODIFIED) \ + V(DELETED) \ + V(CLONED) \ + V(CLEARED) \ + V(DEALLOCATED) + +typedef enum { + #define PY_DEF_EVENT(EVENT) PyDict_EVENT_##EVENT, + PY_FOREACH_DICT_EVENT(PY_DEF_EVENT) + #undef PY_DEF_EVENT +} PyDict_WatchEvent; + +// Callback to be invoked when a watched dict is cleared, dealloced, or modified. +// In clear/dealloc case, key and new_value will be NULL. Otherwise, new_value will be the +// new value for key, NULL if key is being deleted. +typedef int(*PyDict_WatchCallback)(PyDict_WatchEvent event, PyObject* dict, PyObject* key, PyObject* new_value); + +// Register/unregister a dict-watcher callback +PyAPI_FUNC(int) PyDict_AddWatcher(PyDict_WatchCallback callback); +PyAPI_FUNC(int) PyDict_ClearWatcher(int watcher_id); + +// Mark given dictionary as "watched" (callback will be called if it is modified) +PyAPI_FUNC(int) PyDict_Watch(int watcher_id, PyObject* dict); +PyAPI_FUNC(int) PyDict_Unwatch(int watcher_id, PyObject* dict); diff --git a/Include/cpython/fileobject.h b/Include/cpython/fileobject.h new file mode 100644 index 0000000000000000000000000000000000000000..e2d89c522bdd1326b440421c76f4cc0f77d6ba87 --- /dev/null +++ b/Include/cpython/fileobject.h @@ -0,0 +1,16 @@ +#ifndef Py_CPYTHON_FILEOBJECT_H +# error "this header file must not be included directly" +#endif + +PyAPI_FUNC(char *) Py_UniversalNewlineFgets(char *, int, FILE*, PyObject *); + +/* The std printer acts as a preliminary sys.stderr until the new io + infrastructure is in place. */ +PyAPI_FUNC(PyObject *) PyFile_NewStdPrinter(int); +PyAPI_DATA(PyTypeObject) PyStdPrinter_Type; + +typedef PyObject * (*Py_OpenCodeHookFunction)(PyObject *, void *); + +PyAPI_FUNC(PyObject *) PyFile_OpenCode(const char *utf8path); +PyAPI_FUNC(PyObject *) PyFile_OpenCodeObject(PyObject *path); +PyAPI_FUNC(int) PyFile_SetOpenCodeHook(Py_OpenCodeHookFunction hook, void *userData); diff --git a/Include/cpython/fileutils.h b/Include/cpython/fileutils.h new file mode 100644 index 0000000000000000000000000000000000000000..b386ad107bde1ff9f522137167c10868766d2f30 --- /dev/null +++ b/Include/cpython/fileutils.h @@ -0,0 +1,8 @@ +#ifndef Py_CPYTHON_FILEUTILS_H +# error "this header file must not be included directly" +#endif + +// Used by _testcapi which must not use the internal C API +PyAPI_FUNC(FILE*) _Py_fopen_obj( + PyObject *path, + const char *mode); diff --git a/Include/cpython/floatobject.h b/Include/cpython/floatobject.h new file mode 100644 index 0000000000000000000000000000000000000000..127093098bfe642355383c0be1ba6e560b1f93cd --- /dev/null +++ b/Include/cpython/floatobject.h @@ -0,0 +1,27 @@ +#ifndef Py_CPYTHON_FLOATOBJECT_H +# error "this header file must not be included directly" +#endif + +typedef struct { + PyObject_HEAD + double ob_fval; +} PyFloatObject; + +#define _PyFloat_CAST(op) \ + (assert(PyFloat_Check(op)), _Py_CAST(PyFloatObject*, op)) + +// Static inline version of PyFloat_AsDouble() trading safety for speed. +// It doesn't check if op is a double object. +static inline double PyFloat_AS_DOUBLE(PyObject *op) { + return _PyFloat_CAST(op)->ob_fval; +} +#define PyFloat_AS_DOUBLE(op) PyFloat_AS_DOUBLE(_PyObject_CAST(op)) + + +PyAPI_FUNC(int) PyFloat_Pack2(double x, char *p, int le); +PyAPI_FUNC(int) PyFloat_Pack4(double x, char *p, int le); +PyAPI_FUNC(int) PyFloat_Pack8(double x, char *p, int le); + +PyAPI_FUNC(double) PyFloat_Unpack2(const char *p, int le); +PyAPI_FUNC(double) PyFloat_Unpack4(const char *p, int le); +PyAPI_FUNC(double) PyFloat_Unpack8(const char *p, int le); diff --git a/Include/cpython/frameobject.h b/Include/cpython/frameobject.h new file mode 100644 index 0000000000000000000000000000000000000000..dbbfbb5105ba7aa51840beff4a90b4e103d497da --- /dev/null +++ b/Include/cpython/frameobject.h @@ -0,0 +1,35 @@ +/* Frame object interface */ + +#ifndef Py_CPYTHON_FRAMEOBJECT_H +# error "this header file must not be included directly" +#endif + +/* Standard object interface */ + +PyAPI_FUNC(PyFrameObject *) PyFrame_New(PyThreadState *, PyCodeObject *, + PyObject *, PyObject *); + +/* The rest of the interface is specific for frame objects */ + +/* Conversions between "fast locals" and locals in dictionary */ + +PyAPI_FUNC(void) PyFrame_LocalsToFast(PyFrameObject *, int); + +/* -- Caveat emptor -- + * The concept of entry frames is an implementation detail of the CPython + * interpreter. This API is considered unstable and is provided for the + * convenience of debuggers, profilers and state-inspecting tools. Notice that + * this API can be changed in future minor versions if the underlying frame + * mechanism change or the concept of an 'entry frame' or its semantics becomes + * obsolete or outdated. */ + +PyAPI_FUNC(int) _PyFrame_IsEntryFrame(PyFrameObject *frame); + +PyAPI_FUNC(int) PyFrame_FastToLocalsWithError(PyFrameObject *f); +PyAPI_FUNC(void) PyFrame_FastToLocals(PyFrameObject *); + + +typedef struct { + PyObject_HEAD + PyFrameObject* frame; +} PyFrameLocalsProxyObject; diff --git a/Include/cpython/funcobject.h b/Include/cpython/funcobject.h new file mode 100644 index 0000000000000000000000000000000000000000..5433ba48eefc69220e7e031a164f3095644100ae --- /dev/null +++ b/Include/cpython/funcobject.h @@ -0,0 +1,184 @@ +/* Function object interface */ + +#ifndef Py_LIMITED_API +#ifndef Py_FUNCOBJECT_H +#define Py_FUNCOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +#define _Py_COMMON_FIELDS(PREFIX) \ + PyObject *PREFIX ## globals; \ + PyObject *PREFIX ## builtins; \ + PyObject *PREFIX ## name; \ + PyObject *PREFIX ## qualname; \ + PyObject *PREFIX ## code; /* A code object, the __code__ attribute */ \ + PyObject *PREFIX ## defaults; /* NULL or a tuple */ \ + PyObject *PREFIX ## kwdefaults; /* NULL or a dict */ \ + PyObject *PREFIX ## closure; /* NULL or a tuple of cell objects */ + +typedef struct { + _Py_COMMON_FIELDS(fc_) +} PyFrameConstructor; + +/* Function objects and code objects should not be confused with each other: + * + * Function objects are created by the execution of the 'def' statement. + * They reference a code object in their __code__ attribute, which is a + * purely syntactic object, i.e. nothing more than a compiled version of some + * source code lines. There is one code object per source code "fragment", + * but each code object can be referenced by zero or many function objects + * depending only on how many times the 'def' statement in the source was + * executed so far. + */ + +typedef struct { + PyObject_HEAD + _Py_COMMON_FIELDS(func_) + PyObject *func_doc; /* The __doc__ attribute, can be anything */ + PyObject *func_dict; /* The __dict__ attribute, a dict or NULL */ + PyObject *func_weakreflist; /* List of weak references */ + PyObject *func_module; /* The __module__ attribute, can be anything */ + PyObject *func_annotations; /* Annotations, a dict or NULL */ + PyObject *func_typeparams; /* Tuple of active type variables or NULL */ + vectorcallfunc vectorcall; + /* Version number for use by specializer. + * Can set to non-zero when we want to specialize. + * Will be set to zero if any of these change: + * defaults + * kwdefaults (only if the object changes, not the contents of the dict) + * code + * annotations + * vectorcall function pointer */ + uint32_t func_version; + + /* Invariant: + * func_closure contains the bindings for func_code->co_freevars, so + * PyTuple_Size(func_closure) == PyCode_GetNumFree(func_code) + * (func_closure may be NULL if PyCode_GetNumFree(func_code) == 0). + */ +} PyFunctionObject; + +#undef _Py_COMMON_FIELDS + +PyAPI_DATA(PyTypeObject) PyFunction_Type; + +#define PyFunction_Check(op) Py_IS_TYPE((op), &PyFunction_Type) + +PyAPI_FUNC(PyObject *) PyFunction_New(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_NewWithQualName(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetCode(PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetGlobals(PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetModule(PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetDefaults(PyObject *); +PyAPI_FUNC(int) PyFunction_SetDefaults(PyObject *, PyObject *); +PyAPI_FUNC(void) PyFunction_SetVectorcall(PyFunctionObject *, vectorcallfunc); +PyAPI_FUNC(PyObject *) PyFunction_GetKwDefaults(PyObject *); +PyAPI_FUNC(int) PyFunction_SetKwDefaults(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetClosure(PyObject *); +PyAPI_FUNC(int) PyFunction_SetClosure(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyFunction_GetAnnotations(PyObject *); +PyAPI_FUNC(int) PyFunction_SetAnnotations(PyObject *, PyObject *); + +#define _PyFunction_CAST(func) \ + (assert(PyFunction_Check(func)), _Py_CAST(PyFunctionObject*, func)) + +/* Static inline functions for direct access to these values. + Type checks are *not* done, so use with care. */ +static inline PyObject* PyFunction_GET_CODE(PyObject *func) { + return _PyFunction_CAST(func)->func_code; +} +#define PyFunction_GET_CODE(func) PyFunction_GET_CODE(_PyObject_CAST(func)) + +static inline PyObject* PyFunction_GET_GLOBALS(PyObject *func) { + return _PyFunction_CAST(func)->func_globals; +} +#define PyFunction_GET_GLOBALS(func) PyFunction_GET_GLOBALS(_PyObject_CAST(func)) + +static inline PyObject* PyFunction_GET_MODULE(PyObject *func) { + return _PyFunction_CAST(func)->func_module; +} +#define PyFunction_GET_MODULE(func) PyFunction_GET_MODULE(_PyObject_CAST(func)) + +static inline PyObject* PyFunction_GET_DEFAULTS(PyObject *func) { + return _PyFunction_CAST(func)->func_defaults; +} +#define PyFunction_GET_DEFAULTS(func) PyFunction_GET_DEFAULTS(_PyObject_CAST(func)) + +static inline PyObject* PyFunction_GET_KW_DEFAULTS(PyObject *func) { + return _PyFunction_CAST(func)->func_kwdefaults; +} +#define PyFunction_GET_KW_DEFAULTS(func) PyFunction_GET_KW_DEFAULTS(_PyObject_CAST(func)) + +static inline PyObject* PyFunction_GET_CLOSURE(PyObject *func) { + return _PyFunction_CAST(func)->func_closure; +} +#define PyFunction_GET_CLOSURE(func) PyFunction_GET_CLOSURE(_PyObject_CAST(func)) + +static inline PyObject* PyFunction_GET_ANNOTATIONS(PyObject *func) { + return _PyFunction_CAST(func)->func_annotations; +} +#define PyFunction_GET_ANNOTATIONS(func) PyFunction_GET_ANNOTATIONS(_PyObject_CAST(func)) + +/* The classmethod and staticmethod types lives here, too */ +PyAPI_DATA(PyTypeObject) PyClassMethod_Type; +PyAPI_DATA(PyTypeObject) PyStaticMethod_Type; + +PyAPI_FUNC(PyObject *) PyClassMethod_New(PyObject *); +PyAPI_FUNC(PyObject *) PyStaticMethod_New(PyObject *); + +#define PY_FOREACH_FUNC_EVENT(V) \ + V(CREATE) \ + V(DESTROY) \ + V(MODIFY_CODE) \ + V(MODIFY_DEFAULTS) \ + V(MODIFY_KWDEFAULTS) + +typedef enum { + #define PY_DEF_EVENT(EVENT) PyFunction_EVENT_##EVENT, + PY_FOREACH_FUNC_EVENT(PY_DEF_EVENT) + #undef PY_DEF_EVENT +} PyFunction_WatchEvent; + +/* + * A callback that is invoked for different events in a function's lifecycle. + * + * The callback is invoked with a borrowed reference to func, after it is + * created and before it is modified or destroyed. The callback should not + * modify func. + * + * When a function's code object, defaults, or kwdefaults are modified the + * callback will be invoked with the respective event and new_value will + * contain a borrowed reference to the new value that is about to be stored in + * the function. Otherwise the third argument is NULL. + * + * If the callback returns with an exception set, it must return -1. Otherwise + * it should return 0. + */ +typedef int (*PyFunction_WatchCallback)( + PyFunction_WatchEvent event, + PyFunctionObject *func, + PyObject *new_value); + +/* + * Register a per-interpreter callback that will be invoked for function lifecycle + * events. + * + * Returns a handle that may be passed to PyFunction_ClearWatcher on success, + * or -1 and sets an error if no more handles are available. + */ +PyAPI_FUNC(int) PyFunction_AddWatcher(PyFunction_WatchCallback callback); + +/* + * Clear the watcher associated with the watcher_id handle. + * + * Returns 0 on success or -1 if no watcher exists for the supplied id. + */ +PyAPI_FUNC(int) PyFunction_ClearWatcher(int watcher_id); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FUNCOBJECT_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/cpython/genobject.h b/Include/cpython/genobject.h new file mode 100644 index 0000000000000000000000000000000000000000..49e46c277d75ae8bc96580c9adeb96e41dae2ae8 --- /dev/null +++ b/Include/cpython/genobject.h @@ -0,0 +1,83 @@ +/* Generator object interface */ + +#ifndef Py_LIMITED_API +#ifndef Py_GENOBJECT_H +#define Py_GENOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* --- Generators --------------------------------------------------------- */ + +/* _PyGenObject_HEAD defines the initial segment of generator + and coroutine objects. */ +#define _PyGenObject_HEAD(prefix) \ + PyObject_HEAD \ + /* List of weak reference. */ \ + PyObject *prefix##_weakreflist; \ + /* Name of the generator. */ \ + PyObject *prefix##_name; \ + /* Qualified name of the generator. */ \ + PyObject *prefix##_qualname; \ + _PyErr_StackItem prefix##_exc_state; \ + PyObject *prefix##_origin_or_finalizer; \ + char prefix##_hooks_inited; \ + char prefix##_closed; \ + char prefix##_running_async; \ + /* The frame */ \ + int8_t prefix##_frame_state; \ + PyObject *prefix##_iframe[1]; \ + +typedef struct { + /* The gi_ prefix is intended to remind of generator-iterator. */ + _PyGenObject_HEAD(gi) +} PyGenObject; + +PyAPI_DATA(PyTypeObject) PyGen_Type; + +#define PyGen_Check(op) PyObject_TypeCheck((op), &PyGen_Type) +#define PyGen_CheckExact(op) Py_IS_TYPE((op), &PyGen_Type) + +PyAPI_FUNC(PyObject *) PyGen_New(PyFrameObject *); +PyAPI_FUNC(PyObject *) PyGen_NewWithQualName(PyFrameObject *, + PyObject *name, PyObject *qualname); +PyAPI_FUNC(PyCodeObject *) PyGen_GetCode(PyGenObject *gen); + + +/* --- PyCoroObject ------------------------------------------------------- */ + +typedef struct { + _PyGenObject_HEAD(cr) +} PyCoroObject; + +PyAPI_DATA(PyTypeObject) PyCoro_Type; + +#define PyCoro_CheckExact(op) Py_IS_TYPE((op), &PyCoro_Type) +PyAPI_FUNC(PyObject *) PyCoro_New(PyFrameObject *, + PyObject *name, PyObject *qualname); + + +/* --- Asynchronous Generators -------------------------------------------- */ + +typedef struct { + _PyGenObject_HEAD(ag) +} PyAsyncGenObject; + +PyAPI_DATA(PyTypeObject) PyAsyncGen_Type; +PyAPI_DATA(PyTypeObject) _PyAsyncGenASend_Type; + +PyAPI_FUNC(PyObject *) PyAsyncGen_New(PyFrameObject *, + PyObject *name, PyObject *qualname); + +#define PyAsyncGen_CheckExact(op) Py_IS_TYPE((op), &PyAsyncGen_Type) + +#define PyAsyncGenASend_CheckExact(op) Py_IS_TYPE((op), &_PyAsyncGenASend_Type) + + +#undef _PyGenObject_HEAD + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GENOBJECT_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/cpython/import.h b/Include/cpython/import.h new file mode 100644 index 0000000000000000000000000000000000000000..7daf0b84fcf71bf172db2f7b60ff0e260053da73 --- /dev/null +++ b/Include/cpython/import.h @@ -0,0 +1,25 @@ +#ifndef Py_CPYTHON_IMPORT_H +# error "this header file must not be included directly" +#endif + +PyMODINIT_FUNC PyInit__imp(void); + +struct _inittab { + const char *name; /* ASCII encoded string */ + PyObject* (*initfunc)(void); +}; +// This is not used after Py_Initialize() is called. +PyAPI_DATA(struct _inittab *) PyImport_Inittab; +PyAPI_FUNC(int) PyImport_ExtendInittab(struct _inittab *newtab); + +struct _frozen { + const char *name; /* ASCII encoded string */ + const unsigned char *code; + int size; + int is_package; +}; + +/* Embedding apps may change this pointer to point to their favorite + collection of frozen modules: */ + +PyAPI_DATA(const struct _frozen *) PyImport_FrozenModules; diff --git a/Include/cpython/initconfig.h b/Include/cpython/initconfig.h new file mode 100644 index 0000000000000000000000000000000000000000..5da5ef9e5431b1f79bb5134a35f79ce95d90fea6 --- /dev/null +++ b/Include/cpython/initconfig.h @@ -0,0 +1,274 @@ +#ifndef Py_PYCORECONFIG_H +#define Py_PYCORECONFIG_H +#ifndef Py_LIMITED_API +#ifdef __cplusplus +extern "C" { +#endif + +/* --- PyStatus ----------------------------------------------- */ + +typedef struct { + enum { + _PyStatus_TYPE_OK=0, + _PyStatus_TYPE_ERROR=1, + _PyStatus_TYPE_EXIT=2 + } _type; + const char *func; + const char *err_msg; + int exitcode; +} PyStatus; + +PyAPI_FUNC(PyStatus) PyStatus_Ok(void); +PyAPI_FUNC(PyStatus) PyStatus_Error(const char *err_msg); +PyAPI_FUNC(PyStatus) PyStatus_NoMemory(void); +PyAPI_FUNC(PyStatus) PyStatus_Exit(int exitcode); +PyAPI_FUNC(int) PyStatus_IsError(PyStatus err); +PyAPI_FUNC(int) PyStatus_IsExit(PyStatus err); +PyAPI_FUNC(int) PyStatus_Exception(PyStatus err); + +/* --- PyWideStringList ------------------------------------------------ */ + +typedef struct { + /* If length is greater than zero, items must be non-NULL + and all items strings must be non-NULL */ + Py_ssize_t length; + wchar_t **items; +} PyWideStringList; + +PyAPI_FUNC(PyStatus) PyWideStringList_Append(PyWideStringList *list, + const wchar_t *item); +PyAPI_FUNC(PyStatus) PyWideStringList_Insert(PyWideStringList *list, + Py_ssize_t index, + const wchar_t *item); + + +/* --- PyPreConfig ----------------------------------------------- */ + +typedef struct PyPreConfig { + int _config_init; /* _PyConfigInitEnum value */ + + /* Parse Py_PreInitializeFromBytesArgs() arguments? + See PyConfig.parse_argv */ + int parse_argv; + + /* If greater than 0, enable isolated mode: sys.path contains + neither the script's directory nor the user's site-packages directory. + + Set to 1 by the -I command line option. If set to -1 (default), inherit + Py_IsolatedFlag value. */ + int isolated; + + /* If greater than 0: use environment variables. + Set to 0 by -E command line option. If set to -1 (default), it is + set to !Py_IgnoreEnvironmentFlag. */ + int use_environment; + + /* Set the LC_CTYPE locale to the user preferred locale? If equals to 0, + set coerce_c_locale and coerce_c_locale_warn to 0. */ + int configure_locale; + + /* Coerce the LC_CTYPE locale if it's equal to "C"? (PEP 538) + + Set to 0 by PYTHONCOERCECLOCALE=0. Set to 1 by PYTHONCOERCECLOCALE=1. + Set to 2 if the user preferred LC_CTYPE locale is "C". + + If it is equal to 1, LC_CTYPE locale is read to decide if it should be + coerced or not (ex: PYTHONCOERCECLOCALE=1). Internally, it is set to 2 + if the LC_CTYPE locale must be coerced. + + Disable by default (set to 0). Set it to -1 to let Python decide if it + should be enabled or not. */ + int coerce_c_locale; + + /* Emit a warning if the LC_CTYPE locale is coerced? + + Set to 1 by PYTHONCOERCECLOCALE=warn. + + Disable by default (set to 0). Set it to -1 to let Python decide if it + should be enabled or not. */ + int coerce_c_locale_warn; + +#ifdef MS_WINDOWS + /* If greater than 1, use the "mbcs" encoding instead of the UTF-8 + encoding for the filesystem encoding. + + Set to 1 if the PYTHONLEGACYWINDOWSFSENCODING environment variable is + set to a non-empty string. If set to -1 (default), inherit + Py_LegacyWindowsFSEncodingFlag value. + + See PEP 529 for more details. */ + int legacy_windows_fs_encoding; +#endif + + /* Enable UTF-8 mode? (PEP 540) + + Disabled by default (equals to 0). + + Set to 1 by "-X utf8" and "-X utf8=1" command line options. + Set to 1 by PYTHONUTF8=1 environment variable. + + Set to 0 by "-X utf8=0" and PYTHONUTF8=0. + + If equals to -1, it is set to 1 if the LC_CTYPE locale is "C" or + "POSIX", otherwise it is set to 0. Inherit Py_UTF8Mode value value. */ + int utf8_mode; + + /* If non-zero, enable the Python Development Mode. + + Set to 1 by the -X dev command line option. Set by the PYTHONDEVMODE + environment variable. */ + int dev_mode; + + /* Memory allocator: PYTHONMALLOC env var. + See PyMemAllocatorName for valid values. */ + int allocator; +} PyPreConfig; + +PyAPI_FUNC(void) PyPreConfig_InitPythonConfig(PyPreConfig *config); +PyAPI_FUNC(void) PyPreConfig_InitIsolatedConfig(PyPreConfig *config); + + +/* --- PyConfig ---------------------------------------------- */ + +/* This structure is best documented in the Doc/c-api/init_config.rst file. */ +typedef struct PyConfig { + int _config_init; /* _PyConfigInitEnum value */ + + int isolated; + int use_environment; + int dev_mode; + int install_signal_handlers; + int use_hash_seed; + unsigned long hash_seed; + int faulthandler; + int tracemalloc; + int perf_profiling; + int import_time; + int code_debug_ranges; + int show_ref_count; + int dump_refs; + wchar_t *dump_refs_file; + int malloc_stats; + wchar_t *filesystem_encoding; + wchar_t *filesystem_errors; + wchar_t *pycache_prefix; + int parse_argv; + PyWideStringList orig_argv; + PyWideStringList argv; + PyWideStringList xoptions; + PyWideStringList warnoptions; + int site_import; + int bytes_warning; + int warn_default_encoding; + int inspect; + int interactive; + int optimization_level; + int parser_debug; + int write_bytecode; + int verbose; + int quiet; + int user_site_directory; + int configure_c_stdio; + int buffered_stdio; + wchar_t *stdio_encoding; + wchar_t *stdio_errors; +#ifdef MS_WINDOWS + int legacy_windows_stdio; +#endif + wchar_t *check_hash_pycs_mode; + int use_frozen_modules; + int safe_path; + int int_max_str_digits; + + int cpu_count; +#ifdef Py_GIL_DISABLED + int enable_gil; +#endif + + /* --- Path configuration inputs ------------ */ + int pathconfig_warnings; + wchar_t *program_name; + wchar_t *pythonpath_env; + wchar_t *home; + wchar_t *platlibdir; + + /* --- Path configuration outputs ----------- */ + int module_search_paths_set; + PyWideStringList module_search_paths; + wchar_t *stdlib_dir; + wchar_t *executable; + wchar_t *base_executable; + wchar_t *prefix; + wchar_t *base_prefix; + wchar_t *exec_prefix; + wchar_t *base_exec_prefix; + + /* --- Parameter only used by Py_Main() ---------- */ + int skip_source_first_line; + wchar_t *run_command; + wchar_t *run_module; + wchar_t *run_filename; + + /* --- Set by Py_Main() -------------------------- */ + wchar_t *sys_path_0; + + /* --- Private fields ---------------------------- */ + + // Install importlib? If equals to 0, importlib is not initialized at all. + // Needed by freeze_importlib. + int _install_importlib; + + // If equal to 0, stop Python initialization before the "main" phase. + int _init_main; + + // If non-zero, we believe we're running from a source tree. + int _is_python_build; + +#ifdef Py_STATS + // If non-zero, turns on statistics gathering. + int _pystats; +#endif + +#ifdef Py_DEBUG + // If not empty, import a non-__main__ module before site.py is executed. + // PYTHON_PRESITE=package.module or -X presite=package.module + wchar_t *run_presite; +#endif +} PyConfig; + +PyAPI_FUNC(void) PyConfig_InitPythonConfig(PyConfig *config); +PyAPI_FUNC(void) PyConfig_InitIsolatedConfig(PyConfig *config); +PyAPI_FUNC(void) PyConfig_Clear(PyConfig *); +PyAPI_FUNC(PyStatus) PyConfig_SetString( + PyConfig *config, + wchar_t **config_str, + const wchar_t *str); +PyAPI_FUNC(PyStatus) PyConfig_SetBytesString( + PyConfig *config, + wchar_t **config_str, + const char *str); +PyAPI_FUNC(PyStatus) PyConfig_Read(PyConfig *config); +PyAPI_FUNC(PyStatus) PyConfig_SetBytesArgv( + PyConfig *config, + Py_ssize_t argc, + char * const *argv); +PyAPI_FUNC(PyStatus) PyConfig_SetArgv(PyConfig *config, + Py_ssize_t argc, + wchar_t * const *argv); +PyAPI_FUNC(PyStatus) PyConfig_SetWideStringList(PyConfig *config, + PyWideStringList *list, + Py_ssize_t length, wchar_t **items); + + +/* --- Helper functions --------------------------------------- */ + +/* Get the original command line arguments, before Python modified them. + + See also PyConfig.orig_argv. */ +PyAPI_FUNC(void) Py_GetArgcArgv(int *argc, wchar_t ***argv); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LIMITED_API */ +#endif /* !Py_PYCORECONFIG_H */ diff --git a/Include/cpython/listobject.h b/Include/cpython/listobject.h new file mode 100644 index 0000000000000000000000000000000000000000..49f5e8d6d1a0d6c2863925daaac16c07b6066948 --- /dev/null +++ b/Include/cpython/listobject.h @@ -0,0 +1,53 @@ +#ifndef Py_CPYTHON_LISTOBJECT_H +# error "this header file must not be included directly" +#endif + +typedef struct { + PyObject_VAR_HEAD + /* Vector of pointers to list elements. list[0] is ob_item[0], etc. */ + PyObject **ob_item; + + /* ob_item contains space for 'allocated' elements. The number + * currently in use is ob_size. + * Invariants: + * 0 <= ob_size <= allocated + * len(list) == ob_size + * ob_item == NULL implies ob_size == allocated == 0 + * list.sort() temporarily sets allocated to -1 to detect mutations. + * + * Items must normally not be NULL, except during construction when + * the list is not yet visible outside the function that builds it. + */ + Py_ssize_t allocated; +} PyListObject; + +/* Cast argument to PyListObject* type. */ +#define _PyList_CAST(op) \ + (assert(PyList_Check(op)), _Py_CAST(PyListObject*, (op))) + +// Macros and static inline functions, trading safety for speed + +static inline Py_ssize_t PyList_GET_SIZE(PyObject *op) { + PyListObject *list = _PyList_CAST(op); +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_ssize_relaxed(&(_PyVarObject_CAST(list)->ob_size)); +#else + return Py_SIZE(list); +#endif +} +#define PyList_GET_SIZE(op) PyList_GET_SIZE(_PyObject_CAST(op)) + +#define PyList_GET_ITEM(op, index) (_PyList_CAST(op)->ob_item[(index)]) + +static inline void +PyList_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value) { + PyListObject *list = _PyList_CAST(op); + assert(0 <= index); + assert(index < list->allocated); + list->ob_item[index] = value; +} +#define PyList_SET_ITEM(op, index, value) \ + PyList_SET_ITEM(_PyObject_CAST(op), (index), _PyObject_CAST(value)) + +PyAPI_FUNC(int) PyList_Extend(PyObject *self, PyObject *iterable); +PyAPI_FUNC(int) PyList_Clear(PyObject *self); diff --git a/Include/cpython/lock.h b/Include/cpython/lock.h new file mode 100644 index 0000000000000000000000000000000000000000..8ee03e82f74dfdc747a6af65b3baf7f3095792b7 --- /dev/null +++ b/Include/cpython/lock.h @@ -0,0 +1,63 @@ +#ifndef Py_CPYTHON_LOCK_H +# error "this header file must not be included directly" +#endif + +#define _Py_UNLOCKED 0 +#define _Py_LOCKED 1 + +// A mutex that occupies one byte. The lock can be zero initialized to +// represent the unlocked state. +// +// Typical initialization: +// PyMutex m = (PyMutex){0}; +// +// Or initialize as global variables: +// static PyMutex m; +// +// Typical usage: +// PyMutex_Lock(&m); +// ... +// PyMutex_Unlock(&m); +// +// The contents of the PyMutex are not part of the public API, but are +// described to aid in understanding the implementation and debugging. Only +// the two least significant bits are used. The remaining bits are always zero: +// 0b00: unlocked +// 0b01: locked +// 0b10: unlocked and has parked threads +// 0b11: locked and has parked threads +typedef struct PyMutex { + uint8_t _bits; // (private) +} PyMutex; + +// exported function for locking the mutex +PyAPI_FUNC(void) PyMutex_Lock(PyMutex *m); + +// exported function for unlocking the mutex +PyAPI_FUNC(void) PyMutex_Unlock(PyMutex *m); + +// Locks the mutex. +// +// If the mutex is currently locked, the calling thread will be parked until +// the mutex is unlocked. If the current thread holds the GIL, then the GIL +// will be released while the thread is parked. +static inline void +_PyMutex_Lock(PyMutex *m) +{ + uint8_t expected = _Py_UNLOCKED; + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &expected, _Py_LOCKED)) { + PyMutex_Lock(m); + } +} +#define PyMutex_Lock _PyMutex_Lock + +// Unlocks the mutex. +static inline void +_PyMutex_Unlock(PyMutex *m) +{ + uint8_t expected = _Py_LOCKED; + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &expected, _Py_UNLOCKED)) { + PyMutex_Unlock(m); + } +} +#define PyMutex_Unlock _PyMutex_Unlock diff --git a/Include/cpython/longintrepr.h b/Include/cpython/longintrepr.h new file mode 100644 index 0000000000000000000000000000000000000000..3246908ba982e217d1b3987dbae9884f075c35ad --- /dev/null +++ b/Include/cpython/longintrepr.h @@ -0,0 +1,146 @@ +#ifndef Py_LIMITED_API +#ifndef Py_LONGINTREPR_H +#define Py_LONGINTREPR_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* This is published for the benefit of "friends" marshal.c and _decimal.c. */ + +/* Parameters of the integer representation. There are two different + sets of parameters: one set for 30-bit digits, stored in an unsigned 32-bit + integer type, and one set for 15-bit digits with each digit stored in an + unsigned short. The value of PYLONG_BITS_IN_DIGIT, defined either at + configure time or in pyport.h, is used to decide which digit size to use. + + Type 'digit' should be able to hold 2*PyLong_BASE-1, and type 'twodigits' + should be an unsigned integer type able to hold all integers up to + PyLong_BASE*PyLong_BASE-1. x_sub assumes that 'digit' is an unsigned type, + and that overflow is handled by taking the result modulo 2**N for some N > + PyLong_SHIFT. The majority of the code doesn't care about the precise + value of PyLong_SHIFT, but there are some notable exceptions: + + - PyLong_{As,From}ByteArray require that PyLong_SHIFT be at least 8 + + - long_hash() requires that PyLong_SHIFT is *strictly* less than the number + of bits in an unsigned long, as do the PyLong <-> long (or unsigned long) + conversion functions + + - the Python int <-> size_t/Py_ssize_t conversion functions expect that + PyLong_SHIFT is strictly less than the number of bits in a size_t + + - the marshal code currently expects that PyLong_SHIFT is a multiple of 15 + + - NSMALLNEGINTS and NSMALLPOSINTS should be small enough to fit in a single + digit; with the current values this forces PyLong_SHIFT >= 9 + + The values 15 and 30 should fit all of the above requirements, on any + platform. +*/ + +#if PYLONG_BITS_IN_DIGIT == 30 +typedef uint32_t digit; +typedef int32_t sdigit; /* signed variant of digit */ +typedef uint64_t twodigits; +typedef int64_t stwodigits; /* signed variant of twodigits */ +#define PyLong_SHIFT 30 +#define _PyLong_DECIMAL_SHIFT 9 /* max(e such that 10**e fits in a digit) */ +#define _PyLong_DECIMAL_BASE ((digit)1000000000) /* 10 ** DECIMAL_SHIFT */ +#elif PYLONG_BITS_IN_DIGIT == 15 +typedef unsigned short digit; +typedef short sdigit; /* signed variant of digit */ +typedef unsigned long twodigits; +typedef long stwodigits; /* signed variant of twodigits */ +#define PyLong_SHIFT 15 +#define _PyLong_DECIMAL_SHIFT 4 /* max(e such that 10**e fits in a digit) */ +#define _PyLong_DECIMAL_BASE ((digit)10000) /* 10 ** DECIMAL_SHIFT */ +#else +#error "PYLONG_BITS_IN_DIGIT should be 15 or 30" +#endif +#define PyLong_BASE ((digit)1 << PyLong_SHIFT) +#define PyLong_MASK ((digit)(PyLong_BASE - 1)) + +/* Long integer representation. + + Long integers are made up of a number of 30- or 15-bit digits, depending on + the platform. The number of digits (ndigits) is stored in the high bits of + the lv_tag field (lvtag >> _PyLong_NON_SIZE_BITS). + + The absolute value of a number is equal to + SUM(for i=0 through ndigits-1) ob_digit[i] * 2**(PyLong_SHIFT*i) + + The sign of the value is stored in the lower 2 bits of lv_tag. + + - 0: Positive + - 1: Zero + - 2: Negative + + The third lowest bit of lv_tag is reserved for an immortality flag, but is + not currently used. + + In a normalized number, ob_digit[ndigits-1] (the most significant + digit) is never zero. Also, in all cases, for all valid i, + 0 <= ob_digit[i] <= PyLong_MASK. + + The allocation function takes care of allocating extra memory + so that ob_digit[0] ... ob_digit[ndigits-1] are actually available. + We always allocate memory for at least one digit, so accessing ob_digit[0] + is always safe. However, in the case ndigits == 0, the contents of + ob_digit[0] may be undefined. +*/ + +typedef struct _PyLongValue { + uintptr_t lv_tag; /* Number of digits, sign and flags */ + digit ob_digit[1]; +} _PyLongValue; + +struct _longobject { + PyObject_HEAD + _PyLongValue long_value; +}; + +PyAPI_FUNC(PyLongObject*) _PyLong_New(Py_ssize_t); + +// Return a copy of src. +PyAPI_FUNC(PyObject*) _PyLong_Copy(PyLongObject *src); + +PyAPI_FUNC(PyLongObject*) _PyLong_FromDigits( + int negative, + Py_ssize_t digit_count, + digit *digits); + + +/* Inline some internals for speed. These should be in pycore_long.h + * if user code didn't need them inlined. */ + +#define _PyLong_SIGN_MASK 3 +#define _PyLong_NON_SIZE_BITS 3 + + +static inline int +_PyLong_IsCompact(const PyLongObject* op) { + assert(PyType_HasFeature((op)->ob_base.ob_type, Py_TPFLAGS_LONG_SUBCLASS)); + return op->long_value.lv_tag < (2 << _PyLong_NON_SIZE_BITS); +} + +#define PyUnstable_Long_IsCompact _PyLong_IsCompact + +static inline Py_ssize_t +_PyLong_CompactValue(const PyLongObject *op) +{ + Py_ssize_t sign; + assert(PyType_HasFeature((op)->ob_base.ob_type, Py_TPFLAGS_LONG_SUBCLASS)); + assert(PyUnstable_Long_IsCompact(op)); + sign = 1 - (op->long_value.lv_tag & _PyLong_SIGN_MASK); + return sign * (Py_ssize_t)op->long_value.ob_digit[0]; +} + +#define PyUnstable_Long_CompactValue _PyLong_CompactValue + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LONGINTREPR_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/cpython/longobject.h b/Include/cpython/longobject.h new file mode 100644 index 0000000000000000000000000000000000000000..0d49242ff6808cac1e24d1780b51945b3caf1869 --- /dev/null +++ b/Include/cpython/longobject.h @@ -0,0 +1,116 @@ +#ifndef Py_CPYTHON_LONGOBJECT_H +# error "this header file must not be included directly" +#endif + +PyAPI_FUNC(PyObject*) PyLong_FromUnicodeObject(PyObject *u, int base); + +#define Py_ASNATIVEBYTES_DEFAULTS -1 +#define Py_ASNATIVEBYTES_BIG_ENDIAN 0 +#define Py_ASNATIVEBYTES_LITTLE_ENDIAN 1 +#define Py_ASNATIVEBYTES_NATIVE_ENDIAN 3 +#define Py_ASNATIVEBYTES_UNSIGNED_BUFFER 4 +#define Py_ASNATIVEBYTES_REJECT_NEGATIVE 8 +#define Py_ASNATIVEBYTES_ALLOW_INDEX 16 + +/* PyLong_AsNativeBytes: Copy the integer value to a native variable. + buffer points to the first byte of the variable. + n_bytes is the number of bytes available in the buffer. Pass 0 to request + the required size for the value. + flags is a bitfield of the following flags: + * 1 - little endian + * 2 - native endian + * 4 - unsigned destination (e.g. don't reject copying 255 into one byte) + * 8 - raise an exception for negative inputs + * 16 - call __index__ on non-int types + If flags is -1 (all bits set), native endian is used, value truncation + behaves most like C (allows negative inputs and allow MSB set), and non-int + objects will raise a TypeError. + Big endian mode will write the most significant byte into the address + directly referenced by buffer; little endian will write the least significant + byte into that address. + + If an exception is raised, returns a negative value. + Otherwise, returns the number of bytes that are required to store the value. + To check that the full value is represented, ensure that the return value is + equal or less than n_bytes. + All n_bytes are guaranteed to be written (unless an exception occurs), and + so ignoring a positive return value is the equivalent of a downcast in C. + In cases where the full value could not be represented, the returned value + may be larger than necessary - this function is not an accurate way to + calculate the bit length of an integer object. + */ +PyAPI_FUNC(Py_ssize_t) PyLong_AsNativeBytes(PyObject* v, void* buffer, + Py_ssize_t n_bytes, int flags); + +/* PyLong_FromNativeBytes: Create an int value from a native integer + n_bytes is the number of bytes to read from the buffer. Passing 0 will + always produce the zero int. + PyLong_FromUnsignedNativeBytes always produces a non-negative int. + flags is the same as for PyLong_AsNativeBytes, but only supports selecting + the endianness or forcing an unsigned buffer. + + Returns the int object, or NULL with an exception set. */ +PyAPI_FUNC(PyObject*) PyLong_FromNativeBytes(const void* buffer, size_t n_bytes, + int flags); +PyAPI_FUNC(PyObject*) PyLong_FromUnsignedNativeBytes(const void* buffer, + size_t n_bytes, int flags); + +PyAPI_FUNC(int) PyUnstable_Long_IsCompact(const PyLongObject* op); +PyAPI_FUNC(Py_ssize_t) PyUnstable_Long_CompactValue(const PyLongObject* op); + +// _PyLong_Sign. Return 0 if v is 0, -1 if v < 0, +1 if v > 0. +// v must not be NULL, and must be a normalized long. +// There are no error cases. +PyAPI_FUNC(int) _PyLong_Sign(PyObject *v); + +/* _PyLong_NumBits. Return the number of bits needed to represent the + absolute value of a long. For example, this returns 1 for 1 and -1, 2 + for 2 and -2, and 2 for 3 and -3. It returns 0 for 0. + v must not be NULL, and must be a normalized long. + (size_t)-1 is returned and OverflowError set if the true result doesn't + fit in a size_t. +*/ +PyAPI_FUNC(size_t) _PyLong_NumBits(PyObject *v); + +/* _PyLong_FromByteArray: View the n unsigned bytes as a binary integer in + base 256, and return a Python int with the same numeric value. + If n is 0, the integer is 0. Else: + If little_endian is 1/true, bytes[n-1] is the MSB and bytes[0] the LSB; + else (little_endian is 0/false) bytes[0] is the MSB and bytes[n-1] the + LSB. + If is_signed is 0/false, view the bytes as a non-negative integer. + If is_signed is 1/true, view the bytes as a 2's-complement integer, + non-negative if bit 0x80 of the MSB is clear, negative if set. + Error returns: + + Return NULL with the appropriate exception set if there's not + enough memory to create the Python int. +*/ +PyAPI_FUNC(PyObject *) _PyLong_FromByteArray( + const unsigned char* bytes, size_t n, + int little_endian, int is_signed); + +/* _PyLong_AsByteArray: Convert the least-significant 8*n bits of long + v to a base-256 integer, stored in array bytes. Normally return 0, + return -1 on error. + If little_endian is 1/true, store the MSB at bytes[n-1] and the LSB at + bytes[0]; else (little_endian is 0/false) store the MSB at bytes[0] and + the LSB at bytes[n-1]. + If is_signed is 0/false, it's an error if v < 0; else (v >= 0) n bytes + are filled and there's nothing special about bit 0x80 of the MSB. + If is_signed is 1/true, bytes is filled with the 2's-complement + representation of v's value. Bit 0x80 of the MSB is the sign bit. + Error returns (-1): + + is_signed is 0 and v < 0. TypeError is set in this case, and bytes + isn't altered. + + n isn't big enough to hold the full mathematical value of v. For + example, if is_signed is 0 and there are more digits in the v than + fit in n; or if is_signed is 1, v < 0, and n is just 1 bit shy of + being large enough to hold a sign bit. OverflowError is set in this + case, but bytes holds the least-significant n bytes of the true value. +*/ +PyAPI_FUNC(int) _PyLong_AsByteArray(PyLongObject* v, + unsigned char* bytes, size_t n, + int little_endian, int is_signed, int with_exceptions); + +/* For use by the gcd function in mathmodule.c */ +PyAPI_FUNC(PyObject *) _PyLong_GCD(PyObject *, PyObject *); diff --git a/Include/cpython/memoryobject.h b/Include/cpython/memoryobject.h new file mode 100644 index 0000000000000000000000000000000000000000..961161b70f20580b9d1c1408b3b58aebf01214ec --- /dev/null +++ b/Include/cpython/memoryobject.h @@ -0,0 +1,50 @@ +#ifndef Py_CPYTHON_MEMORYOBJECT_H +# error "this header file must not be included directly" +#endif + +/* The structs are declared here so that macros can work, but they shouldn't + be considered public. Don't access their fields directly, use the macros + and functions instead! */ +#define _Py_MANAGED_BUFFER_RELEASED 0x001 /* access to exporter blocked */ +#define _Py_MANAGED_BUFFER_FREE_FORMAT 0x002 /* free format */ + +typedef struct { + PyObject_HEAD + int flags; /* state flags */ + Py_ssize_t exports; /* number of direct memoryview exports */ + Py_buffer master; /* snapshot buffer obtained from the original exporter */ +} _PyManagedBufferObject; + + +/* memoryview state flags */ +#define _Py_MEMORYVIEW_RELEASED 0x001 /* access to master buffer blocked */ +#define _Py_MEMORYVIEW_C 0x002 /* C-contiguous layout */ +#define _Py_MEMORYVIEW_FORTRAN 0x004 /* Fortran contiguous layout */ +#define _Py_MEMORYVIEW_SCALAR 0x008 /* scalar: ndim = 0 */ +#define _Py_MEMORYVIEW_PIL 0x010 /* PIL-style layout */ +#define _Py_MEMORYVIEW_RESTRICTED 0x020 /* Disallow new references to the memoryview's buffer */ + +typedef struct { + PyObject_VAR_HEAD + _PyManagedBufferObject *mbuf; /* managed buffer */ + Py_hash_t hash; /* hash value for read-only views */ + int flags; /* state flags */ + Py_ssize_t exports; /* number of buffer re-exports */ + Py_buffer view; /* private copy of the exporter's view */ + PyObject *weakreflist; + Py_ssize_t ob_array[1]; /* shape, strides, suboffsets */ +} PyMemoryViewObject; + +#define _PyMemoryView_CAST(op) _Py_CAST(PyMemoryViewObject*, op) + +/* Get a pointer to the memoryview's private copy of the exporter's buffer. */ +static inline Py_buffer* PyMemoryView_GET_BUFFER(PyObject *op) { + return (&_PyMemoryView_CAST(op)->view); +} +#define PyMemoryView_GET_BUFFER(op) PyMemoryView_GET_BUFFER(_PyObject_CAST(op)) + +/* Get a pointer to the exporting object (this may be NULL!). */ +static inline PyObject* PyMemoryView_GET_BASE(PyObject *op) { + return _PyMemoryView_CAST(op)->view.obj; +} +#define PyMemoryView_GET_BASE(op) PyMemoryView_GET_BASE(_PyObject_CAST(op)) diff --git a/Include/cpython/methodobject.h b/Include/cpython/methodobject.h new file mode 100644 index 0000000000000000000000000000000000000000..d541e1549480417fbce8933e222821ef8aab6247 --- /dev/null +++ b/Include/cpython/methodobject.h @@ -0,0 +1,66 @@ +#ifndef Py_CPYTHON_METHODOBJECT_H +# error "this header file must not be included directly" +#endif + +// PyCFunctionObject structure + +typedef struct { + PyObject_HEAD + PyMethodDef *m_ml; /* Description of the C function to call */ + PyObject *m_self; /* Passed as 'self' arg to the C func, can be NULL */ + PyObject *m_module; /* The __module__ attribute, can be anything */ + PyObject *m_weakreflist; /* List of weak references */ + vectorcallfunc vectorcall; +} PyCFunctionObject; + +#define _PyCFunctionObject_CAST(func) \ + (assert(PyCFunction_Check(func)), \ + _Py_CAST(PyCFunctionObject*, (func))) + + +// PyCMethodObject structure + +typedef struct { + PyCFunctionObject func; + PyTypeObject *mm_class; /* Class that defines this method */ +} PyCMethodObject; + +#define _PyCMethodObject_CAST(func) \ + (assert(PyCMethod_Check(func)), \ + _Py_CAST(PyCMethodObject*, (func))) + +PyAPI_DATA(PyTypeObject) PyCMethod_Type; + +#define PyCMethod_CheckExact(op) Py_IS_TYPE((op), &PyCMethod_Type) +#define PyCMethod_Check(op) PyObject_TypeCheck((op), &PyCMethod_Type) + + +/* Static inline functions for direct access to these values. + Type checks are *not* done, so use with care. */ +static inline PyCFunction PyCFunction_GET_FUNCTION(PyObject *func) { + return _PyCFunctionObject_CAST(func)->m_ml->ml_meth; +} +#define PyCFunction_GET_FUNCTION(func) PyCFunction_GET_FUNCTION(_PyObject_CAST(func)) + +static inline PyObject* PyCFunction_GET_SELF(PyObject *func_obj) { + PyCFunctionObject *func = _PyCFunctionObject_CAST(func_obj); + if (func->m_ml->ml_flags & METH_STATIC) { + return _Py_NULL; + } + return func->m_self; +} +#define PyCFunction_GET_SELF(func) PyCFunction_GET_SELF(_PyObject_CAST(func)) + +static inline int PyCFunction_GET_FLAGS(PyObject *func) { + return _PyCFunctionObject_CAST(func)->m_ml->ml_flags; +} +#define PyCFunction_GET_FLAGS(func) PyCFunction_GET_FLAGS(_PyObject_CAST(func)) + +static inline PyTypeObject* PyCFunction_GET_CLASS(PyObject *func_obj) { + PyCFunctionObject *func = _PyCFunctionObject_CAST(func_obj); + if (func->m_ml->ml_flags & METH_METHOD) { + return _PyCMethodObject_CAST(func)->mm_class; + } + return _Py_NULL; +} +#define PyCFunction_GET_CLASS(func) PyCFunction_GET_CLASS(_PyObject_CAST(func)) diff --git a/Include/cpython/modsupport.h b/Include/cpython/modsupport.h new file mode 100644 index 0000000000000000000000000000000000000000..d3b88f58c82ca3e923cb7756871113f1dfcafec7 --- /dev/null +++ b/Include/cpython/modsupport.h @@ -0,0 +1,26 @@ +#ifndef Py_CPYTHON_MODSUPPORT_H +# error "this header file must not be included directly" +#endif + +// A data structure that can be used to run initialization code once in a +// thread-safe manner. The C++11 equivalent is std::call_once. +typedef struct { + uint8_t v; +} _PyOnceFlag; + +typedef struct _PyArg_Parser { + const char *format; + const char * const *keywords; + const char *fname; + const char *custom_msg; + _PyOnceFlag once; /* atomic one-time initialization flag */ + int is_kwtuple_owned; /* does this parser own the kwtuple object? */ + int pos; /* number of positional-only arguments */ + int min; /* minimal number of arguments */ + int max; /* maximal number of positional arguments */ + PyObject *kwtuple; /* tuple of keyword parameter names */ + struct _PyArg_Parser *next; +} _PyArg_Parser; + +PyAPI_FUNC(int) _PyArg_ParseTupleAndKeywordsFast(PyObject *, PyObject *, + struct _PyArg_Parser *, ...); diff --git a/Include/cpython/monitoring.h b/Include/cpython/monitoring.h new file mode 100644 index 0000000000000000000000000000000000000000..797ba51246b1c63e5ee3a765c7742751e5acd002 --- /dev/null +++ b/Include/cpython/monitoring.h @@ -0,0 +1,250 @@ +#ifndef Py_CPYTHON_MONITORING_H +# error "this header file must not be included directly" +#endif + +/* Local events. + * These require bytecode instrumentation */ + +#define PY_MONITORING_EVENT_PY_START 0 +#define PY_MONITORING_EVENT_PY_RESUME 1 +#define PY_MONITORING_EVENT_PY_RETURN 2 +#define PY_MONITORING_EVENT_PY_YIELD 3 +#define PY_MONITORING_EVENT_CALL 4 +#define PY_MONITORING_EVENT_LINE 5 +#define PY_MONITORING_EVENT_INSTRUCTION 6 +#define PY_MONITORING_EVENT_JUMP 7 +#define PY_MONITORING_EVENT_BRANCH 8 +#define PY_MONITORING_EVENT_STOP_ITERATION 9 + +#define PY_MONITORING_IS_INSTRUMENTED_EVENT(ev) \ + ((ev) < _PY_MONITORING_LOCAL_EVENTS) + +/* Other events, mainly exceptions */ + +#define PY_MONITORING_EVENT_RAISE 10 +#define PY_MONITORING_EVENT_EXCEPTION_HANDLED 11 +#define PY_MONITORING_EVENT_PY_UNWIND 12 +#define PY_MONITORING_EVENT_PY_THROW 13 +#define PY_MONITORING_EVENT_RERAISE 14 + + +/* Ancillary events */ + +#define PY_MONITORING_EVENT_C_RETURN 15 +#define PY_MONITORING_EVENT_C_RAISE 16 + + +typedef struct _PyMonitoringState { + uint8_t active; + uint8_t opaque; +} PyMonitoringState; + + +PyAPI_FUNC(int) +PyMonitoring_EnterScope(PyMonitoringState *state_array, uint64_t *version, + const uint8_t *event_types, Py_ssize_t length); + +PyAPI_FUNC(int) +PyMonitoring_ExitScope(void); + + +PyAPI_FUNC(int) +_PyMonitoring_FirePyStartEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FirePyResumeEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FirePyReturnEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *retval); + +PyAPI_FUNC(int) +_PyMonitoring_FirePyYieldEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *retval); + +PyAPI_FUNC(int) +_PyMonitoring_FireCallEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject* callable, PyObject *arg0); + +PyAPI_FUNC(int) +_PyMonitoring_FireLineEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + int lineno); + +PyAPI_FUNC(int) +_PyMonitoring_FireJumpEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *target_offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireBranchEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *target_offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireCReturnEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *retval); + +PyAPI_FUNC(int) +_PyMonitoring_FirePyThrowEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireRaiseEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireReraiseEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireExceptionHandledEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireCRaiseEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FirePyUnwindEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset); + +PyAPI_FUNC(int) +_PyMonitoring_FireStopIterationEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, PyObject *value); + + +#define _PYMONITORING_IF_ACTIVE(STATE, X) \ + if ((STATE)->active) { \ + return (X); \ + } \ + else { \ + return 0; \ + } + +static inline int +PyMonitoring_FirePyStartEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FirePyStartEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FirePyResumeEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FirePyResumeEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FirePyReturnEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *retval) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FirePyReturnEvent(state, codelike, offset, retval)); +} + +static inline int +PyMonitoring_FirePyYieldEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *retval) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FirePyYieldEvent(state, codelike, offset, retval)); +} + +static inline int +PyMonitoring_FireCallEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject* callable, PyObject *arg0) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireCallEvent(state, codelike, offset, callable, arg0)); +} + +static inline int +PyMonitoring_FireLineEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + int lineno) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireLineEvent(state, codelike, offset, lineno)); +} + +static inline int +PyMonitoring_FireJumpEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *target_offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireJumpEvent(state, codelike, offset, target_offset)); +} + +static inline int +PyMonitoring_FireBranchEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *target_offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireBranchEvent(state, codelike, offset, target_offset)); +} + +static inline int +PyMonitoring_FireCReturnEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, + PyObject *retval) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireCReturnEvent(state, codelike, offset, retval)); +} + +static inline int +PyMonitoring_FirePyThrowEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FirePyThrowEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FireRaiseEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireRaiseEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FireReraiseEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireReraiseEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FireExceptionHandledEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireExceptionHandledEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FireCRaiseEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireCRaiseEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FirePyUnwindEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FirePyUnwindEvent(state, codelike, offset)); +} + +static inline int +PyMonitoring_FireStopIterationEvent(PyMonitoringState *state, PyObject *codelike, int32_t offset, PyObject *value) +{ + _PYMONITORING_IF_ACTIVE( + state, + _PyMonitoring_FireStopIterationEvent(state, codelike, offset, value)); +} + +#undef _PYMONITORING_IF_ACTIVE diff --git a/Include/cpython/object.h b/Include/cpython/object.h new file mode 100644 index 0000000000000000000000000000000000000000..6cb2c40fe2eb71b38716245f977666e26dd928a6 --- /dev/null +++ b/Include/cpython/object.h @@ -0,0 +1,525 @@ +#ifndef Py_CPYTHON_OBJECT_H +# error "this header file must not be included directly" +#endif + +PyAPI_FUNC(void) _Py_NewReference(PyObject *op); +PyAPI_FUNC(void) _Py_NewReferenceNoTotal(PyObject *op); +PyAPI_FUNC(void) _Py_ResurrectReference(PyObject *op); + +#ifdef Py_REF_DEBUG +/* These are useful as debugging aids when chasing down refleaks. */ +PyAPI_FUNC(Py_ssize_t) _Py_GetGlobalRefTotal(void); +# define _Py_GetRefTotal() _Py_GetGlobalRefTotal() +PyAPI_FUNC(Py_ssize_t) _Py_GetLegacyRefTotal(void); +PyAPI_FUNC(Py_ssize_t) _PyInterpreterState_GetRefTotal(PyInterpreterState *); +#endif + + +/********************* String Literals ****************************************/ +/* This structure helps managing static strings. The basic usage goes like this: + Instead of doing + + r = PyObject_CallMethod(o, "foo", "args", ...); + + do + + _Py_IDENTIFIER(foo); + ... + r = _PyObject_CallMethodId(o, &PyId_foo, "args", ...); + + PyId_foo is a static variable, either on block level or file level. On first + usage, the string "foo" is interned, and the structures are linked. On interpreter + shutdown, all strings are released. + + Alternatively, _Py_static_string allows choosing the variable name. + _PyUnicode_FromId returns a borrowed reference to the interned string. + _PyObject_{Get,Set,Has}AttrId are __getattr__ versions using _Py_Identifier*. +*/ +typedef struct _Py_Identifier { + const char* string; + // Index in PyInterpreterState.unicode.ids.array. It is process-wide + // unique and must be initialized to -1. + Py_ssize_t index; + // Hidden PyMutex struct for non free-threaded build. + struct { + uint8_t v; + } mutex; +} _Py_Identifier; + +#ifndef Py_BUILD_CORE +// For now we are keeping _Py_IDENTIFIER for continued use +// in non-builtin extensions (and naughty PyPI modules). + +#define _Py_static_string_init(value) { .string = (value), .index = -1 } +#define _Py_static_string(varname, value) static _Py_Identifier varname = _Py_static_string_init(value) +#define _Py_IDENTIFIER(varname) _Py_static_string(PyId_##varname, #varname) + +#endif /* !Py_BUILD_CORE */ + + +typedef struct { + /* Number implementations must check *both* + arguments for proper type and implement the necessary conversions + in the slot functions themselves. */ + + binaryfunc nb_add; + binaryfunc nb_subtract; + binaryfunc nb_multiply; + binaryfunc nb_remainder; + binaryfunc nb_divmod; + ternaryfunc nb_power; + unaryfunc nb_negative; + unaryfunc nb_positive; + unaryfunc nb_absolute; + inquiry nb_bool; + unaryfunc nb_invert; + binaryfunc nb_lshift; + binaryfunc nb_rshift; + binaryfunc nb_and; + binaryfunc nb_xor; + binaryfunc nb_or; + unaryfunc nb_int; + void *nb_reserved; /* the slot formerly known as nb_long */ + unaryfunc nb_float; + + binaryfunc nb_inplace_add; + binaryfunc nb_inplace_subtract; + binaryfunc nb_inplace_multiply; + binaryfunc nb_inplace_remainder; + ternaryfunc nb_inplace_power; + binaryfunc nb_inplace_lshift; + binaryfunc nb_inplace_rshift; + binaryfunc nb_inplace_and; + binaryfunc nb_inplace_xor; + binaryfunc nb_inplace_or; + + binaryfunc nb_floor_divide; + binaryfunc nb_true_divide; + binaryfunc nb_inplace_floor_divide; + binaryfunc nb_inplace_true_divide; + + unaryfunc nb_index; + + binaryfunc nb_matrix_multiply; + binaryfunc nb_inplace_matrix_multiply; +} PyNumberMethods; + +typedef struct { + lenfunc sq_length; + binaryfunc sq_concat; + ssizeargfunc sq_repeat; + ssizeargfunc sq_item; + void *was_sq_slice; + ssizeobjargproc sq_ass_item; + void *was_sq_ass_slice; + objobjproc sq_contains; + + binaryfunc sq_inplace_concat; + ssizeargfunc sq_inplace_repeat; +} PySequenceMethods; + +typedef struct { + lenfunc mp_length; + binaryfunc mp_subscript; + objobjargproc mp_ass_subscript; +} PyMappingMethods; + +typedef PySendResult (*sendfunc)(PyObject *iter, PyObject *value, PyObject **result); + +typedef struct { + unaryfunc am_await; + unaryfunc am_aiter; + unaryfunc am_anext; + sendfunc am_send; +} PyAsyncMethods; + +typedef struct { + getbufferproc bf_getbuffer; + releasebufferproc bf_releasebuffer; +} PyBufferProcs; + +/* Allow printfunc in the tp_vectorcall_offset slot for + * backwards-compatibility */ +typedef Py_ssize_t printfunc; + +// If this structure is modified, Doc/includes/typestruct.h should be updated +// as well. +struct _typeobject { + PyObject_VAR_HEAD + const char *tp_name; /* For printing, in format "." */ + Py_ssize_t tp_basicsize, tp_itemsize; /* For allocation */ + + /* Methods to implement standard operations */ + + destructor tp_dealloc; + Py_ssize_t tp_vectorcall_offset; + getattrfunc tp_getattr; + setattrfunc tp_setattr; + PyAsyncMethods *tp_as_async; /* formerly known as tp_compare (Python 2) + or tp_reserved (Python 3) */ + reprfunc tp_repr; + + /* Method suites for standard classes */ + + PyNumberMethods *tp_as_number; + PySequenceMethods *tp_as_sequence; + PyMappingMethods *tp_as_mapping; + + /* More standard operations (here for binary compatibility) */ + + hashfunc tp_hash; + ternaryfunc tp_call; + reprfunc tp_str; + getattrofunc tp_getattro; + setattrofunc tp_setattro; + + /* Functions to access object as input/output buffer */ + PyBufferProcs *tp_as_buffer; + + /* Flags to define presence of optional/expanded features */ + unsigned long tp_flags; + + const char *tp_doc; /* Documentation string */ + + /* Assigned meaning in release 2.0 */ + /* call function for all accessible objects */ + traverseproc tp_traverse; + + /* delete references to contained objects */ + inquiry tp_clear; + + /* Assigned meaning in release 2.1 */ + /* rich comparisons */ + richcmpfunc tp_richcompare; + + /* weak reference enabler */ + Py_ssize_t tp_weaklistoffset; + + /* Iterators */ + getiterfunc tp_iter; + iternextfunc tp_iternext; + + /* Attribute descriptor and subclassing stuff */ + PyMethodDef *tp_methods; + PyMemberDef *tp_members; + PyGetSetDef *tp_getset; + // Strong reference on a heap type, borrowed reference on a static type + PyTypeObject *tp_base; + PyObject *tp_dict; + descrgetfunc tp_descr_get; + descrsetfunc tp_descr_set; + Py_ssize_t tp_dictoffset; + initproc tp_init; + allocfunc tp_alloc; + newfunc tp_new; + freefunc tp_free; /* Low-level free-memory routine */ + inquiry tp_is_gc; /* For PyObject_IS_GC */ + PyObject *tp_bases; + PyObject *tp_mro; /* method resolution order */ + PyObject *tp_cache; /* no longer used */ + void *tp_subclasses; /* for static builtin types this is an index */ + PyObject *tp_weaklist; /* not used for static builtin types */ + destructor tp_del; + + /* Type attribute cache version tag. Added in version 2.6 */ + unsigned int tp_version_tag; + + destructor tp_finalize; + vectorcallfunc tp_vectorcall; + + /* bitset of which type-watchers care about this type */ + unsigned char tp_watched; + uint16_t tp_versions_used; +}; + +/* This struct is used by the specializer + * It should be treated as an opaque blob + * by code other than the specializer and interpreter. */ +struct _specialization_cache { + // In order to avoid bloating the bytecode with lots of inline caches, the + // members of this structure have a somewhat unique contract. They are set + // by the specialization machinery, and are invalidated by PyType_Modified. + // The rules for using them are as follows: + // - If getitem is non-NULL, then it is the same Python function that + // PyType_Lookup(cls, "__getitem__") would return. + // - If getitem is NULL, then getitem_version is meaningless. + // - If getitem->func_version == getitem_version, then getitem can be called + // with two positional arguments and no keyword arguments, and has neither + // *args nor **kwargs (as required by BINARY_SUBSCR_GETITEM): + PyObject *getitem; + uint32_t getitem_version; + PyObject *init; +}; + +/* The *real* layout of a type object when allocated on the heap */ +typedef struct _heaptypeobject { + /* Note: there's a dependency on the order of these members + in slotptr() in typeobject.c . */ + PyTypeObject ht_type; + PyAsyncMethods as_async; + PyNumberMethods as_number; + PyMappingMethods as_mapping; + PySequenceMethods as_sequence; /* as_sequence comes after as_mapping, + so that the mapping wins when both + the mapping and the sequence define + a given operator (e.g. __getitem__). + see add_operators() in typeobject.c . */ + PyBufferProcs as_buffer; + PyObject *ht_name, *ht_slots, *ht_qualname; + struct _dictkeysobject *ht_cached_keys; + PyObject *ht_module; + char *_ht_tpname; // Storage for "tp_name"; see PyType_FromModuleAndSpec + struct _specialization_cache _spec_cache; // For use by the specializer. + /* here are optional user slots, followed by the members. */ +} PyHeapTypeObject; + +PyAPI_FUNC(const char *) _PyType_Name(PyTypeObject *); +PyAPI_FUNC(PyObject *) _PyType_Lookup(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) _PyType_LookupRef(PyTypeObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyType_GetDict(PyTypeObject *); + +PyAPI_FUNC(int) PyObject_Print(PyObject *, FILE *, int); +PyAPI_FUNC(void) _Py_BreakPoint(void); +PyAPI_FUNC(void) _PyObject_Dump(PyObject *); + +PyAPI_FUNC(PyObject*) _PyObject_GetAttrId(PyObject *, _Py_Identifier *); + +PyAPI_FUNC(PyObject **) _PyObject_GetDictPtr(PyObject *); +PyAPI_FUNC(void) PyObject_CallFinalizer(PyObject *); +PyAPI_FUNC(int) PyObject_CallFinalizerFromDealloc(PyObject *); + +PyAPI_FUNC(void) PyUnstable_Object_ClearWeakRefsNoCallbacks(PyObject *); + +/* Same as PyObject_Generic{Get,Set}Attr, but passing the attributes + dict as the last parameter. */ +PyAPI_FUNC(PyObject *) +_PyObject_GenericGetAttrWithDict(PyObject *, PyObject *, PyObject *, int); +PyAPI_FUNC(int) +_PyObject_GenericSetAttrWithDict(PyObject *, PyObject *, + PyObject *, PyObject *); + +PyAPI_FUNC(PyObject *) _PyObject_FunctionStr(PyObject *); + +/* Safely decref `dst` and set `dst` to `src`. + * + * As in case of Py_CLEAR "the obvious" code can be deadly: + * + * Py_DECREF(dst); + * dst = src; + * + * The safe way is: + * + * Py_SETREF(dst, src); + * + * That arranges to set `dst` to `src` _before_ decref'ing, so that any code + * triggered as a side-effect of `dst` getting torn down no longer believes + * `dst` points to a valid object. + * + * Temporary variables are used to only evalutate macro arguments once and so + * avoid the duplication of side effects. _Py_TYPEOF() or memcpy() is used to + * avoid a miscompilation caused by type punning. See Py_CLEAR() comment for + * implementation details about type punning. + * + * The memcpy() implementation does not emit a compiler warning if 'src' has + * not the same type than 'src': any pointer type is accepted for 'src'. + */ +#ifdef _Py_TYPEOF +#define Py_SETREF(dst, src) \ + do { \ + _Py_TYPEOF(dst)* _tmp_dst_ptr = &(dst); \ + _Py_TYPEOF(dst) _tmp_old_dst = (*_tmp_dst_ptr); \ + *_tmp_dst_ptr = (src); \ + Py_DECREF(_tmp_old_dst); \ + } while (0) +#else +#define Py_SETREF(dst, src) \ + do { \ + PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \ + PyObject *_tmp_old_dst = (*_tmp_dst_ptr); \ + PyObject *_tmp_src = _PyObject_CAST(src); \ + memcpy(_tmp_dst_ptr, &_tmp_src, sizeof(PyObject*)); \ + Py_DECREF(_tmp_old_dst); \ + } while (0) +#endif + +/* Py_XSETREF() is a variant of Py_SETREF() that uses Py_XDECREF() instead of + * Py_DECREF(). + */ +#ifdef _Py_TYPEOF +#define Py_XSETREF(dst, src) \ + do { \ + _Py_TYPEOF(dst)* _tmp_dst_ptr = &(dst); \ + _Py_TYPEOF(dst) _tmp_old_dst = (*_tmp_dst_ptr); \ + *_tmp_dst_ptr = (src); \ + Py_XDECREF(_tmp_old_dst); \ + } while (0) +#else +#define Py_XSETREF(dst, src) \ + do { \ + PyObject **_tmp_dst_ptr = _Py_CAST(PyObject**, &(dst)); \ + PyObject *_tmp_old_dst = (*_tmp_dst_ptr); \ + PyObject *_tmp_src = _PyObject_CAST(src); \ + memcpy(_tmp_dst_ptr, &_tmp_src, sizeof(PyObject*)); \ + Py_XDECREF(_tmp_old_dst); \ + } while (0) +#endif + + +/* Define a pair of assertion macros: + _PyObject_ASSERT_FROM(), _PyObject_ASSERT_WITH_MSG() and _PyObject_ASSERT(). + + These work like the regular C assert(), in that they will abort the + process with a message on stderr if the given condition fails to hold, + but compile away to nothing if NDEBUG is defined. + + However, before aborting, Python will also try to call _PyObject_Dump() on + the given object. This may be of use when investigating bugs in which a + particular object is corrupt (e.g. buggy a tp_visit method in an extension + module breaking the garbage collector), to help locate the broken objects. + + The WITH_MSG variant allows you to supply an additional message that Python + will attempt to print to stderr, after the object dump. */ +#ifdef NDEBUG + /* No debugging: compile away the assertions: */ +# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \ + ((void)0) +#else + /* With debugging: generate checks: */ +# define _PyObject_ASSERT_FROM(obj, expr, msg, filename, lineno, func) \ + ((expr) \ + ? (void)(0) \ + : _PyObject_AssertFailed((obj), Py_STRINGIFY(expr), \ + (msg), (filename), (lineno), (func))) +#endif + +#define _PyObject_ASSERT_WITH_MSG(obj, expr, msg) \ + _PyObject_ASSERT_FROM((obj), expr, (msg), __FILE__, __LINE__, __func__) +#define _PyObject_ASSERT(obj, expr) \ + _PyObject_ASSERT_WITH_MSG((obj), expr, NULL) + +#define _PyObject_ASSERT_FAILED_MSG(obj, msg) \ + _PyObject_AssertFailed((obj), NULL, (msg), __FILE__, __LINE__, __func__) + +/* Declare and define _PyObject_AssertFailed() even when NDEBUG is defined, + to avoid causing compiler/linker errors when building extensions without + NDEBUG against a Python built with NDEBUG defined. + + msg, expr and function can be NULL. */ +PyAPI_FUNC(void) _Py_NO_RETURN _PyObject_AssertFailed( + PyObject *obj, + const char *expr, + const char *msg, + const char *file, + int line, + const char *function); + + +/* Trashcan mechanism, thanks to Christian Tismer. + +When deallocating a container object, it's possible to trigger an unbounded +chain of deallocations, as each Py_DECREF in turn drops the refcount on "the +next" object in the chain to 0. This can easily lead to stack overflows, +especially in threads (which typically have less stack space to work with). + +A container object can avoid this by bracketing the body of its tp_dealloc +function with a pair of macros: + +static void +mytype_dealloc(mytype *p) +{ + ... declarations go here ... + + PyObject_GC_UnTrack(p); // must untrack first + Py_TRASHCAN_BEGIN(p, mytype_dealloc) + ... The body of the deallocator goes here, including all calls ... + ... to Py_DECREF on contained objects. ... + Py_TRASHCAN_END // there should be no code after this +} + +CAUTION: Never return from the middle of the body! If the body needs to +"get out early", put a label immediately before the Py_TRASHCAN_END +call, and goto it. Else the call-depth counter (see below) will stay +above 0 forever, and the trashcan will never get emptied. + +How it works: The BEGIN macro increments a call-depth counter. So long +as this counter is small, the body of the deallocator is run directly without +further ado. But if the counter gets large, it instead adds p to a list of +objects to be deallocated later, skips the body of the deallocator, and +resumes execution after the END macro. The tp_dealloc routine then returns +without deallocating anything (and so unbounded call-stack depth is avoided). + +When the call stack finishes unwinding again, code generated by the END macro +notices this, and calls another routine to deallocate all the objects that +may have been added to the list of deferred deallocations. In effect, a +chain of N deallocations is broken into (N-1)/(Py_TRASHCAN_HEADROOM-1) pieces, +with the call stack never exceeding a depth of Py_TRASHCAN_HEADROOM. + +Since the tp_dealloc of a subclass typically calls the tp_dealloc of the base +class, we need to ensure that the trashcan is only triggered on the tp_dealloc +of the actual class being deallocated. Otherwise we might end up with a +partially-deallocated object. To check this, the tp_dealloc function must be +passed as second argument to Py_TRASHCAN_BEGIN(). +*/ + +/* Python 3.9 private API, invoked by the macros below. */ +PyAPI_FUNC(int) _PyTrash_begin(PyThreadState *tstate, PyObject *op); +PyAPI_FUNC(void) _PyTrash_end(PyThreadState *tstate); + +PyAPI_FUNC(void) _PyTrash_thread_deposit_object(PyThreadState *tstate, PyObject *op); +PyAPI_FUNC(void) _PyTrash_thread_destroy_chain(PyThreadState *tstate); + + +/* Python 3.10 private API, invoked by the Py_TRASHCAN_BEGIN(). */ + +/* To avoid raising recursion errors during dealloc trigger trashcan before we reach + * recursion limit. To avoid trashing, we don't attempt to empty the trashcan until + * we have headroom above the trigger limit */ +#define Py_TRASHCAN_HEADROOM 50 + +#define Py_TRASHCAN_BEGIN(op, dealloc) \ +do { \ + PyThreadState *tstate = PyThreadState_Get(); \ + if (tstate->c_recursion_remaining <= Py_TRASHCAN_HEADROOM && Py_TYPE(op)->tp_dealloc == (destructor)dealloc) { \ + _PyTrash_thread_deposit_object(tstate, (PyObject *)op); \ + break; \ + } \ + tstate->c_recursion_remaining--; + /* The body of the deallocator is here. */ +#define Py_TRASHCAN_END \ + tstate->c_recursion_remaining++; \ + if (tstate->delete_later && tstate->c_recursion_remaining > (Py_TRASHCAN_HEADROOM*2)) { \ + _PyTrash_thread_destroy_chain(tstate); \ + } \ +} while (0); + + +PyAPI_FUNC(void *) PyObject_GetItemData(PyObject *obj); + +PyAPI_FUNC(int) PyObject_VisitManagedDict(PyObject *obj, visitproc visit, void *arg); +PyAPI_FUNC(int) _PyObject_SetManagedDict(PyObject *obj, PyObject *new_dict); +PyAPI_FUNC(void) PyObject_ClearManagedDict(PyObject *obj); + +#define TYPE_MAX_WATCHERS 8 + +typedef int(*PyType_WatchCallback)(PyTypeObject *); +PyAPI_FUNC(int) PyType_AddWatcher(PyType_WatchCallback callback); +PyAPI_FUNC(int) PyType_ClearWatcher(int watcher_id); +PyAPI_FUNC(int) PyType_Watch(int watcher_id, PyObject *type); +PyAPI_FUNC(int) PyType_Unwatch(int watcher_id, PyObject *type); + +/* Attempt to assign a version tag to the given type. + * + * Returns 1 if the type already had a valid version tag or a new one was + * assigned, or 0 if a new tag could not be assigned. + */ +PyAPI_FUNC(int) PyUnstable_Type_AssignVersionTag(PyTypeObject *type); + + +typedef enum { + PyRefTracer_CREATE = 0, + PyRefTracer_DESTROY = 1, +} PyRefTracerEvent; + +typedef int (*PyRefTracer)(PyObject *, PyRefTracerEvent event, void *); +PyAPI_FUNC(int) PyRefTracer_SetTracer(PyRefTracer tracer, void *data); +PyAPI_FUNC(PyRefTracer) PyRefTracer_GetTracer(void**); diff --git a/Include/cpython/objimpl.h b/Include/cpython/objimpl.h new file mode 100644 index 0000000000000000000000000000000000000000..e0c2ce286f13ce38c3308c17e1cdc2308bf76edf --- /dev/null +++ b/Include/cpython/objimpl.h @@ -0,0 +1,104 @@ +#ifndef Py_CPYTHON_OBJIMPL_H +# error "this header file must not be included directly" +#endif + +static inline size_t _PyObject_SIZE(PyTypeObject *type) { + return _Py_STATIC_CAST(size_t, type->tp_basicsize); +} + +/* _PyObject_VAR_SIZE returns the number of bytes (as size_t) allocated for a + vrbl-size object with nitems items, exclusive of gc overhead (if any). The + value is rounded up to the closest multiple of sizeof(void *), in order to + ensure that pointer fields at the end of the object are correctly aligned + for the platform (this is of special importance for subclasses of, e.g., + str or int, so that pointers can be stored after the embedded data). + + Note that there's no memory wastage in doing this, as malloc has to + return (at worst) pointer-aligned memory anyway. +*/ +#if ((SIZEOF_VOID_P - 1) & SIZEOF_VOID_P) != 0 +# error "_PyObject_VAR_SIZE requires SIZEOF_VOID_P be a power of 2" +#endif + +static inline size_t _PyObject_VAR_SIZE(PyTypeObject *type, Py_ssize_t nitems) { + size_t size = _Py_STATIC_CAST(size_t, type->tp_basicsize); + size += _Py_STATIC_CAST(size_t, nitems) * _Py_STATIC_CAST(size_t, type->tp_itemsize); + return _Py_SIZE_ROUND_UP(size, SIZEOF_VOID_P); +} + + +/* This example code implements an object constructor with a custom + allocator, where PyObject_New is inlined, and shows the important + distinction between two steps (at least): + 1) the actual allocation of the object storage; + 2) the initialization of the Python specific fields + in this storage with PyObject_{Init, InitVar}. + + PyObject * + YourObject_New(...) + { + PyObject *op; + + op = (PyObject *) Your_Allocator(_PyObject_SIZE(YourTypeStruct)); + if (op == NULL) { + return PyErr_NoMemory(); + } + + PyObject_Init(op, &YourTypeStruct); + + op->ob_field = value; + ... + return op; + } + + Note that in C++, the use of the new operator usually implies that + the 1st step is performed automatically for you, so in a C++ class + constructor you would start directly with PyObject_Init/InitVar. */ + + +typedef struct { + /* user context passed as the first argument to the 2 functions */ + void *ctx; + + /* allocate an arena of size bytes */ + void* (*alloc) (void *ctx, size_t size); + + /* free an arena */ + void (*free) (void *ctx, void *ptr, size_t size); +} PyObjectArenaAllocator; + +/* Get the arena allocator. */ +PyAPI_FUNC(void) PyObject_GetArenaAllocator(PyObjectArenaAllocator *allocator); + +/* Set the arena allocator. */ +PyAPI_FUNC(void) PyObject_SetArenaAllocator(PyObjectArenaAllocator *allocator); + + +/* Test if an object implements the garbage collector protocol */ +PyAPI_FUNC(int) PyObject_IS_GC(PyObject *obj); + + +// Test if a type supports weak references +PyAPI_FUNC(int) PyType_SUPPORTS_WEAKREFS(PyTypeObject *type); + +PyAPI_FUNC(PyObject **) PyObject_GET_WEAKREFS_LISTPTR(PyObject *op); + +PyAPI_FUNC(PyObject *) PyUnstable_Object_GC_NewWithExtraData(PyTypeObject *, + size_t); + + +/* Visit all live GC-capable objects, similar to gc.get_objects(None). The + * supplied callback is called on every such object with the void* arg set + * to the supplied arg. Returning 0 from the callback ends iteration, returning + * 1 allows iteration to continue. Returning any other value may result in + * undefined behaviour. + * + * If new objects are (de)allocated by the callback it is undefined if they + * will be visited. + + * Garbage collection is disabled during operation. Explicitly running a + * collection in the callback may lead to undefined behaviour e.g. visiting the + * same objects multiple times or not at all. + */ +typedef int (*gcvisitobjects_t)(PyObject*, void*); +PyAPI_FUNC(void) PyUnstable_GC_VisitObjects(gcvisitobjects_t callback, void* arg); diff --git a/Include/cpython/odictobject.h b/Include/cpython/odictobject.h new file mode 100644 index 0000000000000000000000000000000000000000..3822d554868c10e3910f13aad4272f2973726eed --- /dev/null +++ b/Include/cpython/odictobject.h @@ -0,0 +1,43 @@ +#ifndef Py_ODICTOBJECT_H +#define Py_ODICTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* OrderedDict */ +/* This API is optional and mostly redundant. */ + +#ifndef Py_LIMITED_API + +typedef struct _odictobject PyODictObject; + +PyAPI_DATA(PyTypeObject) PyODict_Type; +PyAPI_DATA(PyTypeObject) PyODictIter_Type; +PyAPI_DATA(PyTypeObject) PyODictKeys_Type; +PyAPI_DATA(PyTypeObject) PyODictItems_Type; +PyAPI_DATA(PyTypeObject) PyODictValues_Type; + +#define PyODict_Check(op) PyObject_TypeCheck((op), &PyODict_Type) +#define PyODict_CheckExact(op) Py_IS_TYPE((op), &PyODict_Type) +#define PyODict_SIZE(op) PyDict_GET_SIZE((op)) + +PyAPI_FUNC(PyObject *) PyODict_New(void); +PyAPI_FUNC(int) PyODict_SetItem(PyObject *od, PyObject *key, PyObject *item); +PyAPI_FUNC(int) PyODict_DelItem(PyObject *od, PyObject *key); + +/* wrappers around PyDict* functions */ +#define PyODict_GetItem(od, key) PyDict_GetItem(_PyObject_CAST(od), (key)) +#define PyODict_GetItemWithError(od, key) \ + PyDict_GetItemWithError(_PyObject_CAST(od), (key)) +#define PyODict_Contains(od, key) PyDict_Contains(_PyObject_CAST(od), (key)) +#define PyODict_Size(od) PyDict_Size(_PyObject_CAST(od)) +#define PyODict_GetItemString(od, key) \ + PyDict_GetItemString(_PyObject_CAST(od), (key)) + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ODICTOBJECT_H */ diff --git a/Include/cpython/picklebufobject.h b/Include/cpython/picklebufobject.h new file mode 100644 index 0000000000000000000000000000000000000000..f3cbaeef919518cd3154ce6cd4634210aebf42de --- /dev/null +++ b/Include/cpython/picklebufobject.h @@ -0,0 +1,31 @@ +/* PickleBuffer object. This is built-in for ease of use from third-party + * C extensions. + */ + +#ifndef Py_PICKLEBUFOBJECT_H +#define Py_PICKLEBUFOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API + +PyAPI_DATA(PyTypeObject) PyPickleBuffer_Type; + +#define PyPickleBuffer_Check(op) Py_IS_TYPE((op), &PyPickleBuffer_Type) + +/* Create a PickleBuffer redirecting to the given buffer-enabled object */ +PyAPI_FUNC(PyObject *) PyPickleBuffer_FromObject(PyObject *); +/* Get the PickleBuffer's underlying view to the original object + * (NULL if released) + */ +PyAPI_FUNC(const Py_buffer *) PyPickleBuffer_GetBuffer(PyObject *); +/* Release the PickleBuffer. Returns 0 on success, -1 on error. */ +PyAPI_FUNC(int) PyPickleBuffer_Release(PyObject *); + +#endif /* !Py_LIMITED_API */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PICKLEBUFOBJECT_H */ diff --git a/Include/cpython/pthread_stubs.h b/Include/cpython/pthread_stubs.h new file mode 100644 index 0000000000000000000000000000000000000000..e542eaa5bff0cf0322441f18e05da6dba35ab417 --- /dev/null +++ b/Include/cpython/pthread_stubs.h @@ -0,0 +1,105 @@ +#ifndef Py_CPYTHON_PTRHEAD_STUBS_H +#define Py_CPYTHON_PTRHEAD_STUBS_H + +#if !defined(HAVE_PTHREAD_STUBS) +# error "this header file requires stubbed pthreads." +#endif + +#ifndef _POSIX_THREADS +# define _POSIX_THREADS 1 +#endif + +/* Minimal pthread stubs for CPython. + * + * The stubs implement the minimum pthread API for CPython. + * - pthread_create() fails. + * - pthread_exit() calls exit(0). + * - pthread_key_*() functions implement minimal TSS without destructor. + * - all other functions do nothing and return 0. + */ + +#ifdef __wasi__ +// WASI's bits/alltypes.h provides type definitions when __NEED_ is set. +// The header file can be included multiple times. +// +// may also define these macros. +# ifndef __NEED_pthread_cond_t +# define __NEED_pthread_cond_t 1 +# endif +# ifndef __NEED_pthread_condattr_t +# define __NEED_pthread_condattr_t 1 +# endif +# ifndef __NEED_pthread_mutex_t +# define __NEED_pthread_mutex_t 1 +# endif +# ifndef __NEED_pthread_mutexattr_t +# define __NEED_pthread_mutexattr_t 1 +# endif +# ifndef __NEED_pthread_key_t +# define __NEED_pthread_key_t 1 +# endif +# ifndef __NEED_pthread_t +# define __NEED_pthread_t 1 +# endif +# ifndef __NEED_pthread_attr_t +# define __NEED_pthread_attr_t 1 +# endif +# include +#else +typedef struct { void *__x; } pthread_cond_t; +typedef struct { unsigned __attr; } pthread_condattr_t; +typedef struct { void *__x; } pthread_mutex_t; +typedef struct { unsigned __attr; } pthread_mutexattr_t; +typedef unsigned pthread_key_t; +typedef unsigned pthread_t; +typedef struct { unsigned __attr; } pthread_attr_t; +#endif + +// mutex +PyAPI_FUNC(int) pthread_mutex_init(pthread_mutex_t *restrict mutex, + const pthread_mutexattr_t *restrict attr); +PyAPI_FUNC(int) pthread_mutex_destroy(pthread_mutex_t *mutex); +PyAPI_FUNC(int) pthread_mutex_trylock(pthread_mutex_t *mutex); +PyAPI_FUNC(int) pthread_mutex_lock(pthread_mutex_t *mutex); +PyAPI_FUNC(int) pthread_mutex_unlock(pthread_mutex_t *mutex); + +// condition +PyAPI_FUNC(int) pthread_cond_init(pthread_cond_t *restrict cond, + const pthread_condattr_t *restrict attr); +PyAPI_FUNC(int) pthread_cond_destroy(pthread_cond_t *cond); +PyAPI_FUNC(int) pthread_cond_wait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex); +PyAPI_FUNC(int) pthread_cond_timedwait(pthread_cond_t *restrict cond, + pthread_mutex_t *restrict mutex, + const struct timespec *restrict abstime); +PyAPI_FUNC(int) pthread_cond_signal(pthread_cond_t *cond); +PyAPI_FUNC(int) pthread_condattr_init(pthread_condattr_t *attr); +PyAPI_FUNC(int) pthread_condattr_setclock( + pthread_condattr_t *attr, clockid_t clock_id); + +// pthread +PyAPI_FUNC(int) pthread_create(pthread_t *restrict thread, + const pthread_attr_t *restrict attr, + void *(*start_routine)(void *), + void *restrict arg); +PyAPI_FUNC(int) pthread_detach(pthread_t thread); +PyAPI_FUNC(int) pthread_join(pthread_t thread, void** value_ptr); +PyAPI_FUNC(pthread_t) pthread_self(void); +PyAPI_FUNC(int) pthread_exit(void *retval) __attribute__ ((__noreturn__)); +PyAPI_FUNC(int) pthread_attr_init(pthread_attr_t *attr); +PyAPI_FUNC(int) pthread_attr_setstacksize(pthread_attr_t *attr, size_t stacksize); +PyAPI_FUNC(int) pthread_attr_destroy(pthread_attr_t *attr); + + +// pthread_key +#ifndef PTHREAD_KEYS_MAX +# define PTHREAD_KEYS_MAX 128 +#endif + +PyAPI_FUNC(int) pthread_key_create(pthread_key_t *key, + void (*destr_function)(void *)); +PyAPI_FUNC(int) pthread_key_delete(pthread_key_t key); +PyAPI_FUNC(void *) pthread_getspecific(pthread_key_t key); +PyAPI_FUNC(int) pthread_setspecific(pthread_key_t key, const void *value); + +#endif // Py_CPYTHON_PTRHEAD_STUBS_H diff --git a/Include/cpython/pyatomic.h b/Include/cpython/pyatomic.h new file mode 100644 index 0000000000000000000000000000000000000000..28029859d3df2b826c27812fc845ba3016b884c0 --- /dev/null +++ b/Include/cpython/pyatomic.h @@ -0,0 +1,569 @@ +// This header provides cross-platform low-level atomic operations +// similar to C11 atomics. +// +// Operations are sequentially consistent unless they have a suffix indicating +// otherwise. If in doubt, prefer the sequentially consistent operations. +// +// The "_relaxed" suffix for load and store operations indicates the "relaxed" +// memory order. They don't provide synchronization, but (roughly speaking) +// guarantee somewhat sane behavior for races instead of undefined behavior. +// In practice, they correspond to "normal" hardware load and store +// instructions, so they are almost as inexpensive as plain loads and stores +// in C. +// +// Note that atomic read-modify-write operations like _Py_atomic_add_* return +// the previous value of the atomic variable, not the new value. +// +// See https://en.cppreference.com/w/c/atomic for more information on C11 +// atomics. +// See https://www.open-std.org/jtc1/sc22/wg21/docs/papers/2020/p2055r0.pdf +// "A Relaxed Guide to memory_order_relaxed" for discussion of and common usage +// or relaxed atomics. +// +// Functions with pseudo Python code: +// +// def _Py_atomic_load(obj): +// return obj # sequential consistency +// +// def _Py_atomic_load_relaxed(obj): +// return obj # relaxed consistency +// +// def _Py_atomic_store(obj, value): +// obj = value # sequential consistency +// +// def _Py_atomic_store_relaxed(obj, value): +// obj = value # relaxed consistency +// +// def _Py_atomic_exchange(obj, value): +// # sequential consistency +// old_obj = obj +// obj = value +// return old_obj +// +// def _Py_atomic_compare_exchange(obj, expected, desired): +// # sequential consistency +// if obj == expected: +// obj = desired +// return True +// else: +// expected = obj +// return False +// +// def _Py_atomic_add(obj, value): +// # sequential consistency +// old_obj = obj +// obj += value +// return old_obj +// +// def _Py_atomic_and(obj, value): +// # sequential consistency +// old_obj = obj +// obj &= value +// return old_obj +// +// def _Py_atomic_or(obj, value): +// # sequential consistency +// old_obj = obj +// obj |= value +// return old_obj +// +// Other functions: +// +// def _Py_atomic_load_ptr_acquire(obj): +// return obj # acquire +// +// def _Py_atomic_store_ptr_release(obj): +// return obj # release +// +// def _Py_atomic_fence_seq_cst(): +// # sequential consistency +// ... +// +// def _Py_atomic_fence_release(): +// # release +// ... + +#ifndef Py_CPYTHON_ATOMIC_H +# error "this header file must not be included directly" +#endif + +// --- _Py_atomic_add -------------------------------------------------------- +// Atomically adds `value` to `obj` and returns the previous value + +static inline int +_Py_atomic_add_int(int *obj, int value); + +static inline int8_t +_Py_atomic_add_int8(int8_t *obj, int8_t value); + +static inline int16_t +_Py_atomic_add_int16(int16_t *obj, int16_t value); + +static inline int32_t +_Py_atomic_add_int32(int32_t *obj, int32_t value); + +static inline int64_t +_Py_atomic_add_int64(int64_t *obj, int64_t value); + +static inline intptr_t +_Py_atomic_add_intptr(intptr_t *obj, intptr_t value); + +static inline unsigned int +_Py_atomic_add_uint(unsigned int *obj, unsigned int value); + +static inline uint8_t +_Py_atomic_add_uint8(uint8_t *obj, uint8_t value); + +static inline uint16_t +_Py_atomic_add_uint16(uint16_t *obj, uint16_t value); + +static inline uint32_t +_Py_atomic_add_uint32(uint32_t *obj, uint32_t value); + +static inline uint64_t +_Py_atomic_add_uint64(uint64_t *obj, uint64_t value); + +static inline uintptr_t +_Py_atomic_add_uintptr(uintptr_t *obj, uintptr_t value); + +static inline Py_ssize_t +_Py_atomic_add_ssize(Py_ssize_t *obj, Py_ssize_t value); + + +// --- _Py_atomic_compare_exchange ------------------------------------------- +// Performs an atomic compare-and-exchange. +// +// - If `*obj` and `*expected` are equal, store `desired` into `*obj` +// and return 1 (success). +// - Otherwise, store the `*obj` current value into `*expected` +// and return 0 (failure). +// +// These correspond to the C11 atomic_compare_exchange_strong() function. + +static inline int +_Py_atomic_compare_exchange_int(int *obj, int *expected, int desired); + +static inline int +_Py_atomic_compare_exchange_int8(int8_t *obj, int8_t *expected, int8_t desired); + +static inline int +_Py_atomic_compare_exchange_int16(int16_t *obj, int16_t *expected, int16_t desired); + +static inline int +_Py_atomic_compare_exchange_int32(int32_t *obj, int32_t *expected, int32_t desired); + +static inline int +_Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desired); + +static inline int +_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired); + +static inline int +_Py_atomic_compare_exchange_uint(unsigned int *obj, unsigned int *expected, unsigned int desired); + +static inline int +_Py_atomic_compare_exchange_uint8(uint8_t *obj, uint8_t *expected, uint8_t desired); + +static inline int +_Py_atomic_compare_exchange_uint16(uint16_t *obj, uint16_t *expected, uint16_t desired); + +static inline int +_Py_atomic_compare_exchange_uint32(uint32_t *obj, uint32_t *expected, uint32_t desired); + +static inline int +_Py_atomic_compare_exchange_uint64(uint64_t *obj, uint64_t *expected, uint64_t desired); + +static inline int +_Py_atomic_compare_exchange_uintptr(uintptr_t *obj, uintptr_t *expected, uintptr_t desired); + +static inline int +_Py_atomic_compare_exchange_ssize(Py_ssize_t *obj, Py_ssize_t *expected, Py_ssize_t desired); + +// NOTE: `obj` and `expected` are logically `void**` types, but we use `void*` +// so that we can pass types like `PyObject**` without a cast. +static inline int +_Py_atomic_compare_exchange_ptr(void *obj, void *expected, void *value); + + +// --- _Py_atomic_exchange --------------------------------------------------- +// Atomically replaces `*obj` with `value` and returns the previous value of `*obj`. + +static inline int +_Py_atomic_exchange_int(int *obj, int value); + +static inline int8_t +_Py_atomic_exchange_int8(int8_t *obj, int8_t value); + +static inline int16_t +_Py_atomic_exchange_int16(int16_t *obj, int16_t value); + +static inline int32_t +_Py_atomic_exchange_int32(int32_t *obj, int32_t value); + +static inline int64_t +_Py_atomic_exchange_int64(int64_t *obj, int64_t value); + +static inline intptr_t +_Py_atomic_exchange_intptr(intptr_t *obj, intptr_t value); + +static inline unsigned int +_Py_atomic_exchange_uint(unsigned int *obj, unsigned int value); + +static inline uint8_t +_Py_atomic_exchange_uint8(uint8_t *obj, uint8_t value); + +static inline uint16_t +_Py_atomic_exchange_uint16(uint16_t *obj, uint16_t value); + +static inline uint32_t +_Py_atomic_exchange_uint32(uint32_t *obj, uint32_t value); + +static inline uint64_t +_Py_atomic_exchange_uint64(uint64_t *obj, uint64_t value); + +static inline uintptr_t +_Py_atomic_exchange_uintptr(uintptr_t *obj, uintptr_t value); + +static inline Py_ssize_t +_Py_atomic_exchange_ssize(Py_ssize_t *obj, Py_ssize_t value); + +static inline void * +_Py_atomic_exchange_ptr(void *obj, void *value); + + +// --- _Py_atomic_and -------------------------------------------------------- +// Performs `*obj &= value` atomically and returns the previous value of `*obj`. + +static inline uint8_t +_Py_atomic_and_uint8(uint8_t *obj, uint8_t value); + +static inline uint16_t +_Py_atomic_and_uint16(uint16_t *obj, uint16_t value); + +static inline uint32_t +_Py_atomic_and_uint32(uint32_t *obj, uint32_t value); + +static inline uint64_t +_Py_atomic_and_uint64(uint64_t *obj, uint64_t value); + +static inline uintptr_t +_Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value); + + +// --- _Py_atomic_or --------------------------------------------------------- +// Performs `*obj |= value` atomically and returns the previous value of `*obj`. + +static inline uint8_t +_Py_atomic_or_uint8(uint8_t *obj, uint8_t value); + +static inline uint16_t +_Py_atomic_or_uint16(uint16_t *obj, uint16_t value); + +static inline uint32_t +_Py_atomic_or_uint32(uint32_t *obj, uint32_t value); + +static inline uint64_t +_Py_atomic_or_uint64(uint64_t *obj, uint64_t value); + +static inline uintptr_t +_Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value); + + +// --- _Py_atomic_load ------------------------------------------------------- +// Atomically loads `*obj` (sequential consistency) + +static inline int +_Py_atomic_load_int(const int *obj); + +static inline int8_t +_Py_atomic_load_int8(const int8_t *obj); + +static inline int16_t +_Py_atomic_load_int16(const int16_t *obj); + +static inline int32_t +_Py_atomic_load_int32(const int32_t *obj); + +static inline int64_t +_Py_atomic_load_int64(const int64_t *obj); + +static inline intptr_t +_Py_atomic_load_intptr(const intptr_t *obj); + +static inline uint8_t +_Py_atomic_load_uint8(const uint8_t *obj); + +static inline uint16_t +_Py_atomic_load_uint16(const uint16_t *obj); + +static inline uint32_t +_Py_atomic_load_uint32(const uint32_t *obj); + +static inline uint64_t +_Py_atomic_load_uint64(const uint64_t *obj); + +static inline uintptr_t +_Py_atomic_load_uintptr(const uintptr_t *obj); + +static inline unsigned int +_Py_atomic_load_uint(const unsigned int *obj); + +static inline Py_ssize_t +_Py_atomic_load_ssize(const Py_ssize_t *obj); + +static inline void * +_Py_atomic_load_ptr(const void *obj); + + +// --- _Py_atomic_load_relaxed ----------------------------------------------- +// Loads `*obj` (relaxed consistency, i.e., no ordering) + +static inline int +_Py_atomic_load_int_relaxed(const int *obj); + +static inline int8_t +_Py_atomic_load_int8_relaxed(const int8_t *obj); + +static inline int16_t +_Py_atomic_load_int16_relaxed(const int16_t *obj); + +static inline int32_t +_Py_atomic_load_int32_relaxed(const int32_t *obj); + +static inline int64_t +_Py_atomic_load_int64_relaxed(const int64_t *obj); + +static inline intptr_t +_Py_atomic_load_intptr_relaxed(const intptr_t *obj); + +static inline uint8_t +_Py_atomic_load_uint8_relaxed(const uint8_t *obj); + +static inline uint16_t +_Py_atomic_load_uint16_relaxed(const uint16_t *obj); + +static inline uint32_t +_Py_atomic_load_uint32_relaxed(const uint32_t *obj); + +static inline uint64_t +_Py_atomic_load_uint64_relaxed(const uint64_t *obj); + +static inline uintptr_t +_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj); + +static inline unsigned int +_Py_atomic_load_uint_relaxed(const unsigned int *obj); + +static inline Py_ssize_t +_Py_atomic_load_ssize_relaxed(const Py_ssize_t *obj); + +static inline void * +_Py_atomic_load_ptr_relaxed(const void *obj); + +static inline unsigned long long +_Py_atomic_load_ullong_relaxed(const unsigned long long *obj); + +// --- _Py_atomic_store ------------------------------------------------------ +// Atomically performs `*obj = value` (sequential consistency) + +static inline void +_Py_atomic_store_int(int *obj, int value); + +static inline void +_Py_atomic_store_int8(int8_t *obj, int8_t value); + +static inline void +_Py_atomic_store_int16(int16_t *obj, int16_t value); + +static inline void +_Py_atomic_store_int32(int32_t *obj, int32_t value); + +static inline void +_Py_atomic_store_int64(int64_t *obj, int64_t value); + +static inline void +_Py_atomic_store_intptr(intptr_t *obj, intptr_t value); + +static inline void +_Py_atomic_store_uint8(uint8_t *obj, uint8_t value); + +static inline void +_Py_atomic_store_uint16(uint16_t *obj, uint16_t value); + +static inline void +_Py_atomic_store_uint32(uint32_t *obj, uint32_t value); + +static inline void +_Py_atomic_store_uint64(uint64_t *obj, uint64_t value); + +static inline void +_Py_atomic_store_uintptr(uintptr_t *obj, uintptr_t value); + +static inline void +_Py_atomic_store_uint(unsigned int *obj, unsigned int value); + +static inline void +_Py_atomic_store_ptr(void *obj, void *value); + +static inline void +_Py_atomic_store_ssize(Py_ssize_t* obj, Py_ssize_t value); + + +// --- _Py_atomic_store_relaxed ---------------------------------------------- +// Stores `*obj = value` (relaxed consistency, i.e., no ordering) + +static inline void +_Py_atomic_store_int_relaxed(int *obj, int value); + +static inline void +_Py_atomic_store_int8_relaxed(int8_t *obj, int8_t value); + +static inline void +_Py_atomic_store_int16_relaxed(int16_t *obj, int16_t value); + +static inline void +_Py_atomic_store_int32_relaxed(int32_t *obj, int32_t value); + +static inline void +_Py_atomic_store_int64_relaxed(int64_t *obj, int64_t value); + +static inline void +_Py_atomic_store_intptr_relaxed(intptr_t *obj, intptr_t value); + +static inline void +_Py_atomic_store_uint8_relaxed(uint8_t* obj, uint8_t value); + +static inline void +_Py_atomic_store_uint16_relaxed(uint16_t *obj, uint16_t value); + +static inline void +_Py_atomic_store_uint32_relaxed(uint32_t *obj, uint32_t value); + +static inline void +_Py_atomic_store_uint64_relaxed(uint64_t *obj, uint64_t value); + +static inline void +_Py_atomic_store_uintptr_relaxed(uintptr_t *obj, uintptr_t value); + +static inline void +_Py_atomic_store_uint_relaxed(unsigned int *obj, unsigned int value); + +static inline void +_Py_atomic_store_ptr_relaxed(void *obj, void *value); + +static inline void +_Py_atomic_store_ssize_relaxed(Py_ssize_t *obj, Py_ssize_t value); + +static inline void +_Py_atomic_store_ullong_relaxed(unsigned long long *obj, + unsigned long long value); + + +// --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ + +// Loads `*obj` (acquire operation) +static inline void * +_Py_atomic_load_ptr_acquire(const void *obj); + +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj); + +// Stores `*obj = value` (release operation) +static inline void +_Py_atomic_store_ptr_release(void *obj, void *value); + +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value); + +static inline void +_Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value); + +static inline void +_Py_atomic_store_int_release(int *obj, int value); + +static inline int +_Py_atomic_load_int_acquire(const int *obj); + +static inline void +_Py_atomic_store_uint32_release(uint32_t *obj, uint32_t value); + +static inline void +_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value); + +static inline uint64_t +_Py_atomic_load_uint64_acquire(const uint64_t *obj); + +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj); + +static inline Py_ssize_t +_Py_atomic_load_ssize_acquire(const Py_ssize_t *obj); + + + + +// --- _Py_atomic_fence ------------------------------------------------------ + +// Sequential consistency fence. C11 fences have complex semantics. When +// possible, use the atomic operations on variables defined above, which +// generally do not require explicit use of a fence. +// See https://en.cppreference.com/w/cpp/atomic/atomic_thread_fence +static inline void _Py_atomic_fence_seq_cst(void); + +// Acquire fence +static inline void _Py_atomic_fence_acquire(void); + +// Release fence +static inline void _Py_atomic_fence_release(void); + + +#ifndef _Py_USE_GCC_BUILTIN_ATOMICS +# if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) +# define _Py_USE_GCC_BUILTIN_ATOMICS 1 +# elif defined(__clang__) +# if __has_builtin(__atomic_load) +# define _Py_USE_GCC_BUILTIN_ATOMICS 1 +# endif +# endif +#endif + +#if _Py_USE_GCC_BUILTIN_ATOMICS +# define Py_ATOMIC_GCC_H +# include "pyatomic_gcc.h" +# undef Py_ATOMIC_GCC_H +#elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_ATOMICS__) +# define Py_ATOMIC_STD_H +# include "pyatomic_std.h" +# undef Py_ATOMIC_STD_H +#elif defined(_MSC_VER) +# define Py_ATOMIC_MSC_H +# include "pyatomic_msc.h" +# undef Py_ATOMIC_MSC_H +#else +# error "no available pyatomic implementation for this platform/compiler" +#endif + + +// --- aliases --------------------------------------------------------------- + +#if SIZEOF_LONG == 8 +# define _Py_atomic_load_ulong(p) \ + _Py_atomic_load_uint64((uint64_t *)p) +# define _Py_atomic_load_ulong_relaxed(p) \ + _Py_atomic_load_uint64_relaxed((uint64_t *)p) +# define _Py_atomic_store_ulong(p, v) \ + _Py_atomic_store_uint64((uint64_t *)p, v) +# define _Py_atomic_store_ulong_relaxed(p, v) \ + _Py_atomic_store_uint64_relaxed((uint64_t *)p, v) +#elif SIZEOF_LONG == 4 +# define _Py_atomic_load_ulong(p) \ + _Py_atomic_load_uint32((uint32_t *)p) +# define _Py_atomic_load_ulong_relaxed(p) \ + _Py_atomic_load_uint32_relaxed((uint32_t *)p) +# define _Py_atomic_store_ulong(p, v) \ + _Py_atomic_store_uint32((uint32_t *)p, v) +# define _Py_atomic_store_ulong_relaxed(p, v) \ + _Py_atomic_store_uint32_relaxed((uint32_t *)p, v) +#else +# error "long must be 4 or 8 bytes in size" +#endif // SIZEOF_LONG diff --git a/Include/cpython/pyatomic_gcc.h b/Include/cpython/pyatomic_gcc.h new file mode 100644 index 0000000000000000000000000000000000000000..ef09954d53ac1d61819856ed5c59a1fca1f1da28 --- /dev/null +++ b/Include/cpython/pyatomic_gcc.h @@ -0,0 +1,551 @@ +// This is the implementation of Python atomic operations using GCC's built-in +// functions that match the C+11 memory model. This implementation is preferred +// for GCC compatible compilers, such as Clang. These functions are available +// in GCC 4.8+ without needing to compile with --std=c11 or --std=gnu11. + +#ifndef Py_ATOMIC_GCC_H +# error "this header file must not be included directly" +#endif + + +// --- _Py_atomic_add -------------------------------------------------------- + +static inline int +_Py_atomic_add_int(int *obj, int value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline int8_t +_Py_atomic_add_int8(int8_t *obj, int8_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline int16_t +_Py_atomic_add_int16(int16_t *obj, int16_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline int32_t +_Py_atomic_add_int32(int32_t *obj, int32_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline int64_t +_Py_atomic_add_int64(int64_t *obj, int64_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline intptr_t +_Py_atomic_add_intptr(intptr_t *obj, intptr_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline unsigned int +_Py_atomic_add_uint(unsigned int *obj, unsigned int value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint8_t +_Py_atomic_add_uint8(uint8_t *obj, uint8_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint16_t +_Py_atomic_add_uint16(uint16_t *obj, uint16_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint32_t +_Py_atomic_add_uint32(uint32_t *obj, uint32_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint64_t +_Py_atomic_add_uint64(uint64_t *obj, uint64_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline uintptr_t +_Py_atomic_add_uintptr(uintptr_t *obj, uintptr_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + +static inline Py_ssize_t +_Py_atomic_add_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ return __atomic_fetch_add(obj, value, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_compare_exchange ------------------------------------------- + +static inline int +_Py_atomic_compare_exchange_int(int *obj, int *expected, int desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_int8(int8_t *obj, int8_t *expected, int8_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_int16(int16_t *obj, int16_t *expected, int16_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_int32(int32_t *obj, int32_t *expected, int32_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_uint(unsigned int *obj, unsigned int *expected, unsigned int desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_uint8(uint8_t *obj, uint8_t *expected, uint8_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_uint16(uint16_t *obj, uint16_t *expected, uint16_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_uint32(uint32_t *obj, uint32_t *expected, uint32_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_uint64(uint64_t *obj, uint64_t *expected, uint64_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_uintptr(uintptr_t *obj, uintptr_t *expected, uintptr_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_ssize(Py_ssize_t *obj, Py_ssize_t *expected, Py_ssize_t desired) +{ return __atomic_compare_exchange_n(obj, expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + +static inline int +_Py_atomic_compare_exchange_ptr(void *obj, void *expected, void *desired) +{ return __atomic_compare_exchange_n((void **)obj, (void **)expected, desired, 0, + __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_exchange --------------------------------------------------- + +static inline int +_Py_atomic_exchange_int(int *obj, int value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline int8_t +_Py_atomic_exchange_int8(int8_t *obj, int8_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline int16_t +_Py_atomic_exchange_int16(int16_t *obj, int16_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline int32_t +_Py_atomic_exchange_int32(int32_t *obj, int32_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline int64_t +_Py_atomic_exchange_int64(int64_t *obj, int64_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline intptr_t +_Py_atomic_exchange_intptr(intptr_t *obj, intptr_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline unsigned int +_Py_atomic_exchange_uint(unsigned int *obj, unsigned int value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint8_t +_Py_atomic_exchange_uint8(uint8_t *obj, uint8_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint16_t +_Py_atomic_exchange_uint16(uint16_t *obj, uint16_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint32_t +_Py_atomic_exchange_uint32(uint32_t *obj, uint32_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint64_t +_Py_atomic_exchange_uint64(uint64_t *obj, uint64_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline uintptr_t +_Py_atomic_exchange_uintptr(uintptr_t *obj, uintptr_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline Py_ssize_t +_Py_atomic_exchange_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ return __atomic_exchange_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void * +_Py_atomic_exchange_ptr(void *obj, void *value) +{ return __atomic_exchange_n((void **)obj, value, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_and -------------------------------------------------------- + +static inline uint8_t +_Py_atomic_and_uint8(uint8_t *obj, uint8_t value) +{ return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint16_t +_Py_atomic_and_uint16(uint16_t *obj, uint16_t value) +{ return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint32_t +_Py_atomic_and_uint32(uint32_t *obj, uint32_t value) +{ return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint64_t +_Py_atomic_and_uint64(uint64_t *obj, uint64_t value) +{ return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } + +static inline uintptr_t +_Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) +{ return __atomic_fetch_and(obj, value, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_or --------------------------------------------------------- + +static inline uint8_t +_Py_atomic_or_uint8(uint8_t *obj, uint8_t value) +{ return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint16_t +_Py_atomic_or_uint16(uint16_t *obj, uint16_t value) +{ return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint32_t +_Py_atomic_or_uint32(uint32_t *obj, uint32_t value) +{ return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } + +static inline uint64_t +_Py_atomic_or_uint64(uint64_t *obj, uint64_t value) +{ return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } + +static inline uintptr_t +_Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) +{ return __atomic_fetch_or(obj, value, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_load ------------------------------------------------------- + +static inline int +_Py_atomic_load_int(const int *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline int8_t +_Py_atomic_load_int8(const int8_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline int16_t +_Py_atomic_load_int16(const int16_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline int32_t +_Py_atomic_load_int32(const int32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline int64_t +_Py_atomic_load_int64(const int64_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline intptr_t +_Py_atomic_load_intptr(const intptr_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline uint8_t +_Py_atomic_load_uint8(const uint8_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline uint16_t +_Py_atomic_load_uint16(const uint16_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline uint32_t +_Py_atomic_load_uint32(const uint32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline uint64_t +_Py_atomic_load_uint64(const uint64_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline uintptr_t +_Py_atomic_load_uintptr(const uintptr_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline unsigned int +_Py_atomic_load_uint(const unsigned int *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline Py_ssize_t +_Py_atomic_load_ssize(const Py_ssize_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_SEQ_CST); } + +static inline void * +_Py_atomic_load_ptr(const void *obj) +{ return (void *)__atomic_load_n((void * const *)obj, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_load_relaxed ----------------------------------------------- + +static inline int +_Py_atomic_load_int_relaxed(const int *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline int8_t +_Py_atomic_load_int8_relaxed(const int8_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline int16_t +_Py_atomic_load_int16_relaxed(const int16_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline int32_t +_Py_atomic_load_int32_relaxed(const int32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline int64_t +_Py_atomic_load_int64_relaxed(const int64_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline intptr_t +_Py_atomic_load_intptr_relaxed(const intptr_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline uint8_t +_Py_atomic_load_uint8_relaxed(const uint8_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline uint16_t +_Py_atomic_load_uint16_relaxed(const uint16_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline uint32_t +_Py_atomic_load_uint32_relaxed(const uint32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline uint64_t +_Py_atomic_load_uint64_relaxed(const uint64_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline uintptr_t +_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline unsigned int +_Py_atomic_load_uint_relaxed(const unsigned int *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline Py_ssize_t +_Py_atomic_load_ssize_relaxed(const Py_ssize_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + +static inline void * +_Py_atomic_load_ptr_relaxed(const void *obj) +{ return (void *)__atomic_load_n((void * const *)obj, __ATOMIC_RELAXED); } + +static inline unsigned long long +_Py_atomic_load_ullong_relaxed(const unsigned long long *obj) +{ return __atomic_load_n(obj, __ATOMIC_RELAXED); } + + +// --- _Py_atomic_store ------------------------------------------------------ + +static inline void +_Py_atomic_store_int(int *obj, int value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_int8(int8_t *obj, int8_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_int16(int16_t *obj, int16_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_int32(int32_t *obj, int32_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_int64(int64_t *obj, int64_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_intptr(intptr_t *obj, intptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_uint8(uint8_t *obj, uint8_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_uint16(uint16_t *obj, uint16_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_uint32(uint32_t *obj, uint32_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_uint64(uint64_t *obj, uint64_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_uintptr(uintptr_t *obj, uintptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_uint(unsigned int *obj, unsigned int value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_ptr(void *obj, void *value) +{ __atomic_store_n((void **)obj, value, __ATOMIC_SEQ_CST); } + +static inline void +_Py_atomic_store_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ __atomic_store_n(obj, value, __ATOMIC_SEQ_CST); } + + +// --- _Py_atomic_store_relaxed ---------------------------------------------- + +static inline void +_Py_atomic_store_int_relaxed(int *obj, int value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_int8_relaxed(int8_t *obj, int8_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_int16_relaxed(int16_t *obj, int16_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_int32_relaxed(int32_t *obj, int32_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_int64_relaxed(int64_t *obj, int64_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_intptr_relaxed(intptr_t *obj, intptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_uint8_relaxed(uint8_t *obj, uint8_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_uint16_relaxed(uint16_t *obj, uint16_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_uint32_relaxed(uint32_t *obj, uint32_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_uint64_relaxed(uint64_t *obj, uint64_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_uintptr_relaxed(uintptr_t *obj, uintptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_uint_relaxed(unsigned int *obj, unsigned int value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_ptr_relaxed(void *obj, void *value) +{ __atomic_store_n((void **)obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_ssize_relaxed(Py_ssize_t *obj, Py_ssize_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + +static inline void +_Py_atomic_store_ullong_relaxed(unsigned long long *obj, + unsigned long long value) +{ __atomic_store_n(obj, value, __ATOMIC_RELAXED); } + + +// --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ + +static inline void * +_Py_atomic_load_ptr_acquire(const void *obj) +{ return (void *)__atomic_load_n((void * const *)obj, __ATOMIC_ACQUIRE); } + +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ return (uintptr_t)__atomic_load_n(obj, __ATOMIC_ACQUIRE); } + +static inline void +_Py_atomic_store_ptr_release(void *obj, void *value) +{ __atomic_store_n((void **)obj, value, __ATOMIC_RELEASE); } + +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + +static inline void +_Py_atomic_store_int_release(int *obj, int value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + +static inline void +_Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + +static inline int +_Py_atomic_load_int_acquire(const int *obj) +{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } + +static inline void +_Py_atomic_store_uint32_release(uint32_t *obj, uint32_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + +static inline void +_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value) +{ __atomic_store_n(obj, value, __ATOMIC_RELEASE); } + +static inline uint64_t +_Py_atomic_load_uint64_acquire(const uint64_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } + +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } + +static inline Py_ssize_t +_Py_atomic_load_ssize_acquire(const Py_ssize_t *obj) +{ return __atomic_load_n(obj, __ATOMIC_ACQUIRE); } + +// --- _Py_atomic_fence ------------------------------------------------------ + +static inline void +_Py_atomic_fence_seq_cst(void) +{ __atomic_thread_fence(__ATOMIC_SEQ_CST); } + + static inline void +_Py_atomic_fence_acquire(void) +{ __atomic_thread_fence(__ATOMIC_ACQUIRE); } + + static inline void +_Py_atomic_fence_release(void) +{ __atomic_thread_fence(__ATOMIC_RELEASE); } diff --git a/Include/cpython/pyatomic_msc.h b/Include/cpython/pyatomic_msc.h new file mode 100644 index 0000000000000000000000000000000000000000..84da21bdcbff4f1ed3bb549db4dfd7350b5dfffe --- /dev/null +++ b/Include/cpython/pyatomic_msc.h @@ -0,0 +1,1095 @@ +// This is the implementation of Python atomic operations for MSVC if the +// compiler does not support C11 or C++11 atomics. +// +// MSVC intrinsics are defined on char, short, long, __int64, and pointer +// types. Note that long and int are both 32-bits even on 64-bit Windows, +// so operations on int are cast to long. +// +// The volatile keyword has additional memory ordering semantics on MSVC. On +// x86 and x86-64, volatile accesses have acquire-release semantics. On ARM64, +// volatile accesses behave like C11's memory_order_relaxed. + +#ifndef Py_ATOMIC_MSC_H +# error "this header file must not be included directly" +#endif + +#include + +#define _Py_atomic_ASSERT_ARG_TYPE(TYPE) \ + Py_BUILD_ASSERT(sizeof(*obj) == sizeof(TYPE)) + + +// --- _Py_atomic_add -------------------------------------------------------- + +static inline int8_t +_Py_atomic_add_int8(int8_t *obj, int8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + return (int8_t)_InterlockedExchangeAdd8((volatile char *)obj, (char)value); +} + +static inline int16_t +_Py_atomic_add_int16(int16_t *obj, int16_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(short); + return (int16_t)_InterlockedExchangeAdd16((volatile short *)obj, (short)value); +} + +static inline int32_t +_Py_atomic_add_int32(int32_t *obj, int32_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(long); + return (int32_t)_InterlockedExchangeAdd((volatile long *)obj, (long)value); +} + +static inline int64_t +_Py_atomic_add_int64(int64_t *obj, int64_t value) +{ +#if defined(_M_X64) || defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(__int64); + return (int64_t)_InterlockedExchangeAdd64((volatile __int64 *)obj, (__int64)value); +#else + int64_t old_value = _Py_atomic_load_int64_relaxed(obj); + for (;;) { + int64_t new_value = old_value + value; + if (_Py_atomic_compare_exchange_int64(obj, &old_value, new_value)) { + return old_value; + } + } +#endif +} + + +static inline uint8_t +_Py_atomic_add_uint8(uint8_t *obj, uint8_t value) +{ + return (uint8_t)_Py_atomic_add_int8((int8_t *)obj, (int8_t)value); +} + +static inline uint16_t +_Py_atomic_add_uint16(uint16_t *obj, uint16_t value) +{ + return (uint16_t)_Py_atomic_add_int16((int16_t *)obj, (int16_t)value); +} + +static inline uint32_t +_Py_atomic_add_uint32(uint32_t *obj, uint32_t value) +{ + return (uint32_t)_Py_atomic_add_int32((int32_t *)obj, (int32_t)value); +} + +static inline int +_Py_atomic_add_int(int *obj, int value) +{ + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return (int)_Py_atomic_add_int32((int32_t *)obj, (int32_t)value); +} + +static inline unsigned int +_Py_atomic_add_uint(unsigned int *obj, unsigned int value) +{ + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return (unsigned int)_Py_atomic_add_int32((int32_t *)obj, (int32_t)value); +} + +static inline uint64_t +_Py_atomic_add_uint64(uint64_t *obj, uint64_t value) +{ + return (uint64_t)_Py_atomic_add_int64((int64_t *)obj, (int64_t)value); +} + +static inline intptr_t +_Py_atomic_add_intptr(intptr_t *obj, intptr_t value) +{ +#if SIZEOF_VOID_P == 8 + _Py_atomic_ASSERT_ARG_TYPE(int64_t); + return (intptr_t)_Py_atomic_add_int64((int64_t *)obj, (int64_t)value); +#else + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return (intptr_t)_Py_atomic_add_int32((int32_t *)obj, (int32_t)value); +#endif +} + +static inline uintptr_t +_Py_atomic_add_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(intptr_t); + return (uintptr_t)_Py_atomic_add_intptr((intptr_t *)obj, (intptr_t)value); +} + +static inline Py_ssize_t +_Py_atomic_add_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(intptr_t); + return (Py_ssize_t)_Py_atomic_add_intptr((intptr_t *)obj, (intptr_t)value); +} + + +// --- _Py_atomic_compare_exchange ------------------------------------------- + +static inline int +_Py_atomic_compare_exchange_int8(int8_t *obj, int8_t *expected, int8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + int8_t initial = (int8_t)_InterlockedCompareExchange8( + (volatile char *)obj, + (char)value, + (char)*expected); + if (initial == *expected) { + return 1; + } + *expected = initial; + return 0; +} + +static inline int +_Py_atomic_compare_exchange_int16(int16_t *obj, int16_t *expected, int16_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(short); + int16_t initial = (int16_t)_InterlockedCompareExchange16( + (volatile short *)obj, + (short)value, + (short)*expected); + if (initial == *expected) { + return 1; + } + *expected = initial; + return 0; +} + +static inline int +_Py_atomic_compare_exchange_int32(int32_t *obj, int32_t *expected, int32_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(long); + int32_t initial = (int32_t)_InterlockedCompareExchange( + (volatile long *)obj, + (long)value, + (long)*expected); + if (initial == *expected) { + return 1; + } + *expected = initial; + return 0; +} + +static inline int +_Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(__int64); + int64_t initial = (int64_t)_InterlockedCompareExchange64( + (volatile __int64 *)obj, + (__int64)value, + (__int64)*expected); + if (initial == *expected) { + return 1; + } + *expected = initial; + return 0; +} + +static inline int +_Py_atomic_compare_exchange_ptr(void *obj, void *expected, void *value) +{ + void *initial = _InterlockedCompareExchangePointer( + (void**)obj, + value, + *(void**)expected); + if (initial == *(void**)expected) { + return 1; + } + *(void**)expected = initial; + return 0; +} + + +static inline int +_Py_atomic_compare_exchange_uint8(uint8_t *obj, uint8_t *expected, uint8_t value) +{ + return _Py_atomic_compare_exchange_int8((int8_t *)obj, + (int8_t *)expected, + (int8_t)value); +} + +static inline int +_Py_atomic_compare_exchange_uint16(uint16_t *obj, uint16_t *expected, uint16_t value) +{ + return _Py_atomic_compare_exchange_int16((int16_t *)obj, + (int16_t *)expected, + (int16_t)value); +} + +static inline int +_Py_atomic_compare_exchange_uint32(uint32_t *obj, uint32_t *expected, uint32_t value) +{ + return _Py_atomic_compare_exchange_int32((int32_t *)obj, + (int32_t *)expected, + (int32_t)value); +} + +static inline int +_Py_atomic_compare_exchange_int(int *obj, int *expected, int value) +{ + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return _Py_atomic_compare_exchange_int32((int32_t *)obj, + (int32_t *)expected, + (int32_t)value); +} + +static inline int +_Py_atomic_compare_exchange_uint(unsigned int *obj, unsigned int *expected, unsigned int value) +{ + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return _Py_atomic_compare_exchange_int32((int32_t *)obj, + (int32_t *)expected, + (int32_t)value); +} + +static inline int +_Py_atomic_compare_exchange_uint64(uint64_t *obj, uint64_t *expected, uint64_t value) +{ + return _Py_atomic_compare_exchange_int64((int64_t *)obj, + (int64_t *)expected, + (int64_t)value); +} + +static inline int +_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return _Py_atomic_compare_exchange_ptr((void**)obj, + (void**)expected, + (void*)value); +} + +static inline int +_Py_atomic_compare_exchange_uintptr(uintptr_t *obj, uintptr_t *expected, uintptr_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return _Py_atomic_compare_exchange_ptr((void**)obj, + (void**)expected, + (void*)value); +} + +static inline int +_Py_atomic_compare_exchange_ssize(Py_ssize_t *obj, Py_ssize_t *expected, Py_ssize_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return _Py_atomic_compare_exchange_ptr((void**)obj, + (void**)expected, + (void*)value); +} + + +// --- _Py_atomic_exchange --------------------------------------------------- + +static inline int8_t +_Py_atomic_exchange_int8(int8_t *obj, int8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + return (int8_t)_InterlockedExchange8((volatile char *)obj, (char)value); +} + +static inline int16_t +_Py_atomic_exchange_int16(int16_t *obj, int16_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(short); + return (int16_t)_InterlockedExchange16((volatile short *)obj, (short)value); +} + +static inline int32_t +_Py_atomic_exchange_int32(int32_t *obj, int32_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(long); + return (int32_t)_InterlockedExchange((volatile long *)obj, (long)value); +} + +static inline int64_t +_Py_atomic_exchange_int64(int64_t *obj, int64_t value) +{ +#if defined(_M_X64) || defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(__int64); + return (int64_t)_InterlockedExchange64((volatile __int64 *)obj, (__int64)value); +#else + int64_t old_value = _Py_atomic_load_int64_relaxed(obj); + for (;;) { + if (_Py_atomic_compare_exchange_int64(obj, &old_value, value)) { + return old_value; + } + } +#endif +} + +static inline void* +_Py_atomic_exchange_ptr(void *obj, void *value) +{ + return (void*)_InterlockedExchangePointer((void * volatile *)obj, (void *)value); +} + + +static inline uint8_t +_Py_atomic_exchange_uint8(uint8_t *obj, uint8_t value) +{ + return (uint8_t)_Py_atomic_exchange_int8((int8_t *)obj, + (int8_t)value); +} + +static inline uint16_t +_Py_atomic_exchange_uint16(uint16_t *obj, uint16_t value) +{ + return (uint16_t)_Py_atomic_exchange_int16((int16_t *)obj, + (int16_t)value); +} + +static inline uint32_t +_Py_atomic_exchange_uint32(uint32_t *obj, uint32_t value) +{ + return (uint32_t)_Py_atomic_exchange_int32((int32_t *)obj, + (int32_t)value); +} + +static inline int +_Py_atomic_exchange_int(int *obj, int value) +{ + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return (int)_Py_atomic_exchange_int32((int32_t *)obj, + (int32_t)value); +} + +static inline unsigned int +_Py_atomic_exchange_uint(unsigned int *obj, unsigned int value) +{ + _Py_atomic_ASSERT_ARG_TYPE(int32_t); + return (unsigned int)_Py_atomic_exchange_int32((int32_t *)obj, + (int32_t)value); +} + +static inline uint64_t +_Py_atomic_exchange_uint64(uint64_t *obj, uint64_t value) +{ + return (uint64_t)_Py_atomic_exchange_int64((int64_t *)obj, + (int64_t)value); +} + +static inline intptr_t +_Py_atomic_exchange_intptr(intptr_t *obj, intptr_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return (intptr_t)_Py_atomic_exchange_ptr((void**)obj, + (void*)value); +} + +static inline uintptr_t +_Py_atomic_exchange_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return (uintptr_t)_Py_atomic_exchange_ptr((void**)obj, + (void*)value); +} + +static inline Py_ssize_t +_Py_atomic_exchange_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return (Py_ssize_t)_Py_atomic_exchange_ptr((void**)obj, + (void*)value); +} + + +// --- _Py_atomic_and -------------------------------------------------------- + +static inline uint8_t +_Py_atomic_and_uint8(uint8_t *obj, uint8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + return (uint8_t)_InterlockedAnd8((volatile char *)obj, (char)value); +} + +static inline uint16_t +_Py_atomic_and_uint16(uint16_t *obj, uint16_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(short); + return (uint16_t)_InterlockedAnd16((volatile short *)obj, (short)value); +} + +static inline uint32_t +_Py_atomic_and_uint32(uint32_t *obj, uint32_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(long); + return (uint32_t)_InterlockedAnd((volatile long *)obj, (long)value); +} + +static inline uint64_t +_Py_atomic_and_uint64(uint64_t *obj, uint64_t value) +{ +#if defined(_M_X64) || defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(__int64); + return (uint64_t)_InterlockedAnd64((volatile __int64 *)obj, (__int64)value); +#else + uint64_t old_value = _Py_atomic_load_uint64_relaxed(obj); + for (;;) { + uint64_t new_value = old_value & value; + if (_Py_atomic_compare_exchange_uint64(obj, &old_value, new_value)) { + return old_value; + } + } +#endif +} + +static inline uintptr_t +_Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) +{ +#if SIZEOF_VOID_P == 8 + _Py_atomic_ASSERT_ARG_TYPE(uint64_t); + return (uintptr_t)_Py_atomic_and_uint64((uint64_t *)obj, + (uint64_t)value); +#else + _Py_atomic_ASSERT_ARG_TYPE(uint32_t); + return (uintptr_t)_Py_atomic_and_uint32((uint32_t *)obj, + (uint32_t)value); +#endif +} + + +// --- _Py_atomic_or --------------------------------------------------------- + +static inline uint8_t +_Py_atomic_or_uint8(uint8_t *obj, uint8_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(char); + return (uint8_t)_InterlockedOr8((volatile char *)obj, (char)value); +} + +static inline uint16_t +_Py_atomic_or_uint16(uint16_t *obj, uint16_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(short); + return (uint16_t)_InterlockedOr16((volatile short *)obj, (short)value); +} + +static inline uint32_t +_Py_atomic_or_uint32(uint32_t *obj, uint32_t value) +{ + _Py_atomic_ASSERT_ARG_TYPE(long); + return (uint32_t)_InterlockedOr((volatile long *)obj, (long)value); +} + +static inline uint64_t +_Py_atomic_or_uint64(uint64_t *obj, uint64_t value) +{ +#if defined(_M_X64) || defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(__int64); + return (uint64_t)_InterlockedOr64((volatile __int64 *)obj, (__int64)value); +#else + uint64_t old_value = _Py_atomic_load_uint64_relaxed(obj); + for (;;) { + uint64_t new_value = old_value | value; + if (_Py_atomic_compare_exchange_uint64(obj, &old_value, new_value)) { + return old_value; + } + } +#endif +} + + +static inline uintptr_t +_Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) +{ +#if SIZEOF_VOID_P == 8 + _Py_atomic_ASSERT_ARG_TYPE(uint64_t); + return (uintptr_t)_Py_atomic_or_uint64((uint64_t *)obj, + (uint64_t)value); +#else + _Py_atomic_ASSERT_ARG_TYPE(uint32_t); + return (uintptr_t)_Py_atomic_or_uint32((uint32_t *)obj, + (uint32_t)value); +#endif +} + + +// --- _Py_atomic_load ------------------------------------------------------- + +static inline uint8_t +_Py_atomic_load_uint8(const uint8_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(volatile uint8_t *)obj; +#elif defined(_M_ARM64) + return (uint8_t)__ldar8((unsigned __int8 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint8" +#endif +} + +static inline uint16_t +_Py_atomic_load_uint16(const uint16_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(volatile uint16_t *)obj; +#elif defined(_M_ARM64) + return (uint16_t)__ldar16((unsigned __int16 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint16" +#endif +} + +static inline uint32_t +_Py_atomic_load_uint32(const uint32_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(volatile uint32_t *)obj; +#elif defined(_M_ARM64) + return (uint32_t)__ldar32((unsigned __int32 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint32" +#endif +} + +static inline uint64_t +_Py_atomic_load_uint64(const uint64_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(volatile uint64_t *)obj; +#elif defined(_M_ARM64) + return (uint64_t)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint64" +#endif +} + +static inline int8_t +_Py_atomic_load_int8(const int8_t *obj) +{ + return (int8_t)_Py_atomic_load_uint8((const uint8_t *)obj); +} + +static inline int16_t +_Py_atomic_load_int16(const int16_t *obj) +{ + return (int16_t)_Py_atomic_load_uint16((const uint16_t *)obj); +} + +static inline int32_t +_Py_atomic_load_int32(const int32_t *obj) +{ + return (int32_t)_Py_atomic_load_uint32((const uint32_t *)obj); +} + +static inline int +_Py_atomic_load_int(const int *obj) +{ + _Py_atomic_ASSERT_ARG_TYPE(uint32_t); + return (int)_Py_atomic_load_uint32((uint32_t *)obj); +} + +static inline unsigned int +_Py_atomic_load_uint(const unsigned int *obj) +{ + _Py_atomic_ASSERT_ARG_TYPE(uint32_t); + return (unsigned int)_Py_atomic_load_uint32((uint32_t *)obj); +} + +static inline int64_t +_Py_atomic_load_int64(const int64_t *obj) +{ + return (int64_t)_Py_atomic_load_uint64((const uint64_t *)obj); +} + +static inline void* +_Py_atomic_load_ptr(const void *obj) +{ +#if SIZEOF_VOID_P == 8 + return (void*)_Py_atomic_load_uint64((const uint64_t *)obj); +#else + return (void*)_Py_atomic_load_uint32((const uint32_t *)obj); +#endif +} + +static inline intptr_t +_Py_atomic_load_intptr(const intptr_t *obj) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return (intptr_t)_Py_atomic_load_ptr((void*)obj); +} + +static inline uintptr_t +_Py_atomic_load_uintptr(const uintptr_t *obj) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return (uintptr_t)_Py_atomic_load_ptr((void*)obj); +} + +static inline Py_ssize_t +_Py_atomic_load_ssize(const Py_ssize_t *obj) +{ + _Py_atomic_ASSERT_ARG_TYPE(void*); + return (Py_ssize_t)_Py_atomic_load_ptr((void*)obj); +} + + +// --- _Py_atomic_load_relaxed ----------------------------------------------- + +static inline int +_Py_atomic_load_int_relaxed(const int *obj) +{ + return *(volatile int *)obj; +} + +static inline int8_t +_Py_atomic_load_int8_relaxed(const int8_t *obj) +{ + return *(volatile int8_t *)obj; +} + +static inline int16_t +_Py_atomic_load_int16_relaxed(const int16_t *obj) +{ + return *(volatile int16_t *)obj; +} + +static inline int32_t +_Py_atomic_load_int32_relaxed(const int32_t *obj) +{ + return *(volatile int32_t *)obj; +} + +static inline int64_t +_Py_atomic_load_int64_relaxed(const int64_t *obj) +{ + return *(volatile int64_t *)obj; +} + +static inline intptr_t +_Py_atomic_load_intptr_relaxed(const intptr_t *obj) +{ + return *(volatile intptr_t *)obj; +} + +static inline uint8_t +_Py_atomic_load_uint8_relaxed(const uint8_t *obj) +{ + return *(volatile uint8_t *)obj; +} + +static inline uint16_t +_Py_atomic_load_uint16_relaxed(const uint16_t *obj) +{ + return *(volatile uint16_t *)obj; +} + +static inline uint32_t +_Py_atomic_load_uint32_relaxed(const uint32_t *obj) +{ + return *(volatile uint32_t *)obj; +} + +static inline uint64_t +_Py_atomic_load_uint64_relaxed(const uint64_t *obj) +{ + return *(volatile uint64_t *)obj; +} + +static inline uintptr_t +_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj) +{ + return *(volatile uintptr_t *)obj; +} + +static inline unsigned int +_Py_atomic_load_uint_relaxed(const unsigned int *obj) +{ + return *(volatile unsigned int *)obj; +} + +static inline Py_ssize_t +_Py_atomic_load_ssize_relaxed(const Py_ssize_t *obj) +{ + return *(volatile Py_ssize_t *)obj; +} + +static inline void* +_Py_atomic_load_ptr_relaxed(const void *obj) +{ + return *(void * volatile *)obj; +} + +static inline unsigned long long +_Py_atomic_load_ullong_relaxed(const unsigned long long *obj) +{ + return *(volatile unsigned long long *)obj; +} + + +// --- _Py_atomic_store ------------------------------------------------------ + +static inline void +_Py_atomic_store_int(int *obj, int value) +{ + (void)_Py_atomic_exchange_int(obj, value); +} + +static inline void +_Py_atomic_store_int8(int8_t *obj, int8_t value) +{ + (void)_Py_atomic_exchange_int8(obj, value); +} + +static inline void +_Py_atomic_store_int16(int16_t *obj, int16_t value) +{ + (void)_Py_atomic_exchange_int16(obj, value); +} + +static inline void +_Py_atomic_store_int32(int32_t *obj, int32_t value) +{ + (void)_Py_atomic_exchange_int32(obj, value); +} + +static inline void +_Py_atomic_store_int64(int64_t *obj, int64_t value) +{ + (void)_Py_atomic_exchange_int64(obj, value); +} + +static inline void +_Py_atomic_store_intptr(intptr_t *obj, intptr_t value) +{ + (void)_Py_atomic_exchange_intptr(obj, value); +} + +static inline void +_Py_atomic_store_uint8(uint8_t *obj, uint8_t value) +{ + (void)_Py_atomic_exchange_uint8(obj, value); +} + +static inline void +_Py_atomic_store_uint16(uint16_t *obj, uint16_t value) +{ + (void)_Py_atomic_exchange_uint16(obj, value); +} + +static inline void +_Py_atomic_store_uint32(uint32_t *obj, uint32_t value) +{ + (void)_Py_atomic_exchange_uint32(obj, value); +} + +static inline void +_Py_atomic_store_uint64(uint64_t *obj, uint64_t value) +{ + (void)_Py_atomic_exchange_uint64(obj, value); +} + +static inline void +_Py_atomic_store_uintptr(uintptr_t *obj, uintptr_t value) +{ + (void)_Py_atomic_exchange_uintptr(obj, value); +} + +static inline void +_Py_atomic_store_uint(unsigned int *obj, unsigned int value) +{ + (void)_Py_atomic_exchange_uint(obj, value); +} + +static inline void +_Py_atomic_store_ptr(void *obj, void *value) +{ + (void)_Py_atomic_exchange_ptr(obj, value); +} + +static inline void +_Py_atomic_store_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ + (void)_Py_atomic_exchange_ssize(obj, value); +} + + +// --- _Py_atomic_store_relaxed ---------------------------------------------- + +static inline void +_Py_atomic_store_int_relaxed(int *obj, int value) +{ + *(volatile int *)obj = value; +} + +static inline void +_Py_atomic_store_int8_relaxed(int8_t *obj, int8_t value) +{ + *(volatile int8_t *)obj = value; +} + +static inline void +_Py_atomic_store_int16_relaxed(int16_t *obj, int16_t value) +{ + *(volatile int16_t *)obj = value; +} + +static inline void +_Py_atomic_store_int32_relaxed(int32_t *obj, int32_t value) +{ + *(volatile int32_t *)obj = value; +} + +static inline void +_Py_atomic_store_int64_relaxed(int64_t *obj, int64_t value) +{ + *(volatile int64_t *)obj = value; +} + +static inline void +_Py_atomic_store_intptr_relaxed(intptr_t *obj, intptr_t value) +{ + *(volatile intptr_t *)obj = value; +} + +static inline void +_Py_atomic_store_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + *(volatile uint8_t *)obj = value; +} + +static inline void +_Py_atomic_store_uint16_relaxed(uint16_t *obj, uint16_t value) +{ + *(volatile uint16_t *)obj = value; +} + +static inline void +_Py_atomic_store_uint32_relaxed(uint32_t *obj, uint32_t value) +{ + *(volatile uint32_t *)obj = value; +} + +static inline void +_Py_atomic_store_uint64_relaxed(uint64_t *obj, uint64_t value) +{ + *(volatile uint64_t *)obj = value; +} + +static inline void +_Py_atomic_store_uintptr_relaxed(uintptr_t *obj, uintptr_t value) +{ + *(volatile uintptr_t *)obj = value; +} + +static inline void +_Py_atomic_store_uint_relaxed(unsigned int *obj, unsigned int value) +{ + *(volatile unsigned int *)obj = value; +} + +static inline void +_Py_atomic_store_ptr_relaxed(void *obj, void* value) +{ + *(void * volatile *)obj = value; +} + +static inline void +_Py_atomic_store_ssize_relaxed(Py_ssize_t *obj, Py_ssize_t value) +{ + *(volatile Py_ssize_t *)obj = value; +} + +static inline void +_Py_atomic_store_ullong_relaxed(unsigned long long *obj, + unsigned long long value) +{ + *(volatile unsigned long long *)obj = value; +} + + +// --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ + +static inline void * +_Py_atomic_load_ptr_acquire(const void *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(void * volatile *)obj; +#elif defined(_M_ARM64) + return (void *)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_ptr_acquire" +#endif +} + +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uintptr_t volatile *)obj; +#elif defined(_M_ARM64) + return (uintptr_t)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uintptr_acquire" +#endif +} + +static inline void +_Py_atomic_store_ptr_release(void *obj, void *value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(void * volatile *)obj = value; +#elif defined(_M_ARM64) + __stlr64((unsigned __int64 volatile *)obj, (uintptr_t)value); +#else +# error "no implementation of _Py_atomic_store_ptr_release" +#endif +} + +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(uintptr_t volatile *)obj = value; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64); + __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); +#else +# error "no implementation of _Py_atomic_store_uintptr_release" +#endif +} + +static inline void +_Py_atomic_store_int_release(int *obj, int value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(int volatile *)obj = value; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int32); + __stlr32((unsigned __int32 volatile *)obj, (unsigned __int32)value); +#else +# error "no implementation of _Py_atomic_store_int_release" +#endif +} + +static inline void +_Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(Py_ssize_t volatile *)obj = value; +#elif defined(_M_ARM64) + __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); +#else +# error "no implementation of _Py_atomic_store_ssize_release" +#endif +} + +static inline int +_Py_atomic_load_int_acquire(const int *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(int volatile *)obj; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int32); + return (int)__ldar32((unsigned __int32 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_int_acquire" +#endif +} + +static inline void +_Py_atomic_store_uint32_release(uint32_t *obj, uint32_t value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(uint32_t volatile *)obj = value; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int32); + __stlr32((unsigned __int32 volatile *)obj, (unsigned __int32)value); +#else +# error "no implementation of _Py_atomic_store_uint32_release" +#endif +} + +static inline void +_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value) +{ +#if defined(_M_X64) || defined(_M_IX86) + *(uint64_t volatile *)obj = value; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(unsigned __int64); + __stlr64((unsigned __int64 volatile *)obj, (unsigned __int64)value); +#else +# error "no implementation of _Py_atomic_store_uint64_release" +#endif +} + +static inline uint64_t +_Py_atomic_load_uint64_acquire(const uint64_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uint64_t volatile *)obj; +#elif defined(_M_ARM64) + _Py_atomic_ASSERT_ARG_TYPE(__int64); + return (uint64_t)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint64_acquire" +#endif +} + +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(uint32_t volatile *)obj; +#elif defined(_M_ARM64) + return (uint32_t)__ldar32((uint32_t volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_uint32_acquire" +#endif +} + +static inline Py_ssize_t +_Py_atomic_load_ssize_acquire(const Py_ssize_t *obj) +{ +#if defined(_M_X64) || defined(_M_IX86) + return *(Py_ssize_t volatile *)obj; +#elif defined(_M_ARM64) + return (Py_ssize_t)__ldar64((unsigned __int64 volatile *)obj); +#else +# error "no implementation of _Py_atomic_load_ssize_acquire" +#endif +} + +// --- _Py_atomic_fence ------------------------------------------------------ + + static inline void +_Py_atomic_fence_seq_cst(void) +{ +#if defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISH); +#elif defined(_M_X64) + __faststorefence(); +#elif defined(_M_IX86) + _mm_mfence(); +#else +# error "no implementation of _Py_atomic_fence_seq_cst" +#endif +} + + static inline void +_Py_atomic_fence_acquire(void) +{ +#if defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISHLD); +#elif defined(_M_X64) || defined(_M_IX86) + _ReadBarrier(); +#else +# error "no implementation of _Py_atomic_fence_acquire" +#endif +} + + static inline void +_Py_atomic_fence_release(void) +{ +#if defined(_M_ARM64) + __dmb(_ARM64_BARRIER_ISH); +#elif defined(_M_X64) || defined(_M_IX86) + _ReadWriteBarrier(); +#else +# error "no implementation of _Py_atomic_fence_release" +#endif +} + +#undef _Py_atomic_ASSERT_ARG_TYPE diff --git a/Include/cpython/pyatomic_std.h b/Include/cpython/pyatomic_std.h new file mode 100644 index 0000000000000000000000000000000000000000..7c71e94c68f8e6d16664201bd71f8538c10d361d --- /dev/null +++ b/Include/cpython/pyatomic_std.h @@ -0,0 +1,976 @@ +// This is the implementation of Python atomic operations using C++11 or C11 +// atomics. Note that the pyatomic_gcc.h implementation is preferred for GCC +// compatible compilers, even if they support C++11 atomics. + +#ifndef Py_ATOMIC_STD_H +# error "this header file must not be included directly" +#endif + +#ifdef __cplusplus +extern "C++" { +# include +} +# define _Py_USING_STD using namespace std +# define _Atomic(tp) atomic +#else +# define _Py_USING_STD +# include +#endif + + +// --- _Py_atomic_add -------------------------------------------------------- + +static inline int +_Py_atomic_add_int(int *obj, int value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(int)*)obj, value); +} + +static inline int8_t +_Py_atomic_add_int8(int8_t *obj, int8_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(int8_t)*)obj, value); +} + +static inline int16_t +_Py_atomic_add_int16(int16_t *obj, int16_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(int16_t)*)obj, value); +} + +static inline int32_t +_Py_atomic_add_int32(int32_t *obj, int32_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(int32_t)*)obj, value); +} + +static inline int64_t +_Py_atomic_add_int64(int64_t *obj, int64_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(int64_t)*)obj, value); +} + +static inline intptr_t +_Py_atomic_add_intptr(intptr_t *obj, intptr_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(intptr_t)*)obj, value); +} + +static inline unsigned int +_Py_atomic_add_uint(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(unsigned int)*)obj, value); +} + +static inline uint8_t +_Py_atomic_add_uint8(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(uint8_t)*)obj, value); +} + +static inline uint16_t +_Py_atomic_add_uint16(uint16_t *obj, uint16_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(uint16_t)*)obj, value); +} + +static inline uint32_t +_Py_atomic_add_uint32(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(uint32_t)*)obj, value); +} + +static inline uint64_t +_Py_atomic_add_uint64(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(uint64_t)*)obj, value); +} + +static inline uintptr_t +_Py_atomic_add_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(uintptr_t)*)obj, value); +} + +static inline Py_ssize_t +_Py_atomic_add_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_USING_STD; + return atomic_fetch_add((_Atomic(Py_ssize_t)*)obj, value); +} + + +// --- _Py_atomic_compare_exchange ------------------------------------------- + +static inline int +_Py_atomic_compare_exchange_int(int *obj, int *expected, int desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(int)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_int8(int8_t *obj, int8_t *expected, int8_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(int8_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_int16(int16_t *obj, int16_t *expected, int16_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(int16_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_int32(int32_t *obj, int32_t *expected, int32_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(int32_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_int64(int64_t *obj, int64_t *expected, int64_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(int64_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_intptr(intptr_t *obj, intptr_t *expected, intptr_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(intptr_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_uint(unsigned int *obj, unsigned int *expected, unsigned int desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(unsigned int)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_uint8(uint8_t *obj, uint8_t *expected, uint8_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(uint8_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_uint16(uint16_t *obj, uint16_t *expected, uint16_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(uint16_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_uint32(uint32_t *obj, uint32_t *expected, uint32_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(uint32_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_uint64(uint64_t *obj, uint64_t *expected, uint64_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(uint64_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_uintptr(uintptr_t *obj, uintptr_t *expected, uintptr_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(uintptr_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_ssize(Py_ssize_t *obj, Py_ssize_t *expected, Py_ssize_t desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(Py_ssize_t)*)obj, + expected, desired); +} + +static inline int +_Py_atomic_compare_exchange_ptr(void *obj, void *expected, void *desired) +{ + _Py_USING_STD; + return atomic_compare_exchange_strong((_Atomic(void *)*)obj, + (void **)expected, desired); +} + + +// --- _Py_atomic_exchange --------------------------------------------------- + +static inline int +_Py_atomic_exchange_int(int *obj, int value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(int)*)obj, value); +} + +static inline int8_t +_Py_atomic_exchange_int8(int8_t *obj, int8_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(int8_t)*)obj, value); +} + +static inline int16_t +_Py_atomic_exchange_int16(int16_t *obj, int16_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(int16_t)*)obj, value); +} + +static inline int32_t +_Py_atomic_exchange_int32(int32_t *obj, int32_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(int32_t)*)obj, value); +} + +static inline int64_t +_Py_atomic_exchange_int64(int64_t *obj, int64_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(int64_t)*)obj, value); +} + +static inline intptr_t +_Py_atomic_exchange_intptr(intptr_t *obj, intptr_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(intptr_t)*)obj, value); +} + +static inline unsigned int +_Py_atomic_exchange_uint(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(unsigned int)*)obj, value); +} + +static inline uint8_t +_Py_atomic_exchange_uint8(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(uint8_t)*)obj, value); +} + +static inline uint16_t +_Py_atomic_exchange_uint16(uint16_t *obj, uint16_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(uint16_t)*)obj, value); +} + +static inline uint32_t +_Py_atomic_exchange_uint32(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(uint32_t)*)obj, value); +} + +static inline uint64_t +_Py_atomic_exchange_uint64(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(uint64_t)*)obj, value); +} + +static inline uintptr_t +_Py_atomic_exchange_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(uintptr_t)*)obj, value); +} + +static inline Py_ssize_t +_Py_atomic_exchange_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(Py_ssize_t)*)obj, value); +} + +static inline void* +_Py_atomic_exchange_ptr(void *obj, void *value) +{ + _Py_USING_STD; + return atomic_exchange((_Atomic(void *)*)obj, value); +} + + +// --- _Py_atomic_and -------------------------------------------------------- + +static inline uint8_t +_Py_atomic_and_uint8(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + return atomic_fetch_and((_Atomic(uint8_t)*)obj, value); +} + +static inline uint16_t +_Py_atomic_and_uint16(uint16_t *obj, uint16_t value) +{ + _Py_USING_STD; + return atomic_fetch_and((_Atomic(uint16_t)*)obj, value); +} + +static inline uint32_t +_Py_atomic_and_uint32(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + return atomic_fetch_and((_Atomic(uint32_t)*)obj, value); +} + +static inline uint64_t +_Py_atomic_and_uint64(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + return atomic_fetch_and((_Atomic(uint64_t)*)obj, value); +} + +static inline uintptr_t +_Py_atomic_and_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + return atomic_fetch_and((_Atomic(uintptr_t)*)obj, value); +} + + +// --- _Py_atomic_or --------------------------------------------------------- + +static inline uint8_t +_Py_atomic_or_uint8(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + return atomic_fetch_or((_Atomic(uint8_t)*)obj, value); +} + +static inline uint16_t +_Py_atomic_or_uint16(uint16_t *obj, uint16_t value) +{ + _Py_USING_STD; + return atomic_fetch_or((_Atomic(uint16_t)*)obj, value); +} + +static inline uint32_t +_Py_atomic_or_uint32(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + return atomic_fetch_or((_Atomic(uint32_t)*)obj, value); +} + +static inline uint64_t +_Py_atomic_or_uint64(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + return atomic_fetch_or((_Atomic(uint64_t)*)obj, value); +} + +static inline uintptr_t +_Py_atomic_or_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + return atomic_fetch_or((_Atomic(uintptr_t)*)obj, value); +} + + +// --- _Py_atomic_load ------------------------------------------------------- + +static inline int +_Py_atomic_load_int(const int *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(int)*)obj); +} + +static inline int8_t +_Py_atomic_load_int8(const int8_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(int8_t)*)obj); +} + +static inline int16_t +_Py_atomic_load_int16(const int16_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(int16_t)*)obj); +} + +static inline int32_t +_Py_atomic_load_int32(const int32_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(int32_t)*)obj); +} + +static inline int64_t +_Py_atomic_load_int64(const int64_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(int64_t)*)obj); +} + +static inline intptr_t +_Py_atomic_load_intptr(const intptr_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(intptr_t)*)obj); +} + +static inline uint8_t +_Py_atomic_load_uint8(const uint8_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(uint8_t)*)obj); +} + +static inline uint16_t +_Py_atomic_load_uint16(const uint16_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(uint32_t)*)obj); +} + +static inline uint32_t +_Py_atomic_load_uint32(const uint32_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(uint32_t)*)obj); +} + +static inline uint64_t +_Py_atomic_load_uint64(const uint64_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(uint64_t)*)obj); +} + +static inline uintptr_t +_Py_atomic_load_uintptr(const uintptr_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(uintptr_t)*)obj); +} + +static inline unsigned int +_Py_atomic_load_uint(const unsigned int *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(unsigned int)*)obj); +} + +static inline Py_ssize_t +_Py_atomic_load_ssize(const Py_ssize_t *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(Py_ssize_t)*)obj); +} + +static inline void* +_Py_atomic_load_ptr(const void *obj) +{ + _Py_USING_STD; + return atomic_load((const _Atomic(void*)*)obj); +} + + +// --- _Py_atomic_load_relaxed ----------------------------------------------- + +static inline int +_Py_atomic_load_int_relaxed(const int *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(int)*)obj, + memory_order_relaxed); +} + +static inline int8_t +_Py_atomic_load_int8_relaxed(const int8_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(int8_t)*)obj, + memory_order_relaxed); +} + +static inline int16_t +_Py_atomic_load_int16_relaxed(const int16_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(int16_t)*)obj, + memory_order_relaxed); +} + +static inline int32_t +_Py_atomic_load_int32_relaxed(const int32_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(int32_t)*)obj, + memory_order_relaxed); +} + +static inline int64_t +_Py_atomic_load_int64_relaxed(const int64_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(int64_t)*)obj, + memory_order_relaxed); +} + +static inline intptr_t +_Py_atomic_load_intptr_relaxed(const intptr_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(intptr_t)*)obj, + memory_order_relaxed); +} + +static inline uint8_t +_Py_atomic_load_uint8_relaxed(const uint8_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint8_t)*)obj, + memory_order_relaxed); +} + +static inline uint16_t +_Py_atomic_load_uint16_relaxed(const uint16_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint16_t)*)obj, + memory_order_relaxed); +} + +static inline uint32_t +_Py_atomic_load_uint32_relaxed(const uint32_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint32_t)*)obj, + memory_order_relaxed); +} + +static inline uint64_t +_Py_atomic_load_uint64_relaxed(const uint64_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint64_t)*)obj, + memory_order_relaxed); +} + +static inline uintptr_t +_Py_atomic_load_uintptr_relaxed(const uintptr_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uintptr_t)*)obj, + memory_order_relaxed); +} + +static inline unsigned int +_Py_atomic_load_uint_relaxed(const unsigned int *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(unsigned int)*)obj, + memory_order_relaxed); +} + +static inline Py_ssize_t +_Py_atomic_load_ssize_relaxed(const Py_ssize_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(Py_ssize_t)*)obj, + memory_order_relaxed); +} + +static inline void* +_Py_atomic_load_ptr_relaxed(const void *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(void*)*)obj, + memory_order_relaxed); +} + +static inline unsigned long long +_Py_atomic_load_ullong_relaxed(const unsigned long long *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(unsigned long long)*)obj, + memory_order_relaxed); +} + + +// --- _Py_atomic_store ------------------------------------------------------ + +static inline void +_Py_atomic_store_int(int *obj, int value) +{ + _Py_USING_STD; + atomic_store((_Atomic(int)*)obj, value); +} + +static inline void +_Py_atomic_store_int8(int8_t *obj, int8_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(int8_t)*)obj, value); +} + +static inline void +_Py_atomic_store_int16(int16_t *obj, int16_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(int16_t)*)obj, value); +} + +static inline void +_Py_atomic_store_int32(int32_t *obj, int32_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(int32_t)*)obj, value); +} + +static inline void +_Py_atomic_store_int64(int64_t *obj, int64_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(int64_t)*)obj, value); +} + +static inline void +_Py_atomic_store_intptr(intptr_t *obj, intptr_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(intptr_t)*)obj, value); +} + +static inline void +_Py_atomic_store_uint8(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(uint8_t)*)obj, value); +} + +static inline void +_Py_atomic_store_uint16(uint16_t *obj, uint16_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(uint16_t)*)obj, value); +} + +static inline void +_Py_atomic_store_uint32(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(uint32_t)*)obj, value); +} + +static inline void +_Py_atomic_store_uint64(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(uint64_t)*)obj, value); +} + +static inline void +_Py_atomic_store_uintptr(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(uintptr_t)*)obj, value); +} + +static inline void +_Py_atomic_store_uint(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + atomic_store((_Atomic(unsigned int)*)obj, value); +} + +static inline void +_Py_atomic_store_ptr(void *obj, void *value) +{ + _Py_USING_STD; + atomic_store((_Atomic(void*)*)obj, value); +} + +static inline void +_Py_atomic_store_ssize(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_USING_STD; + atomic_store((_Atomic(Py_ssize_t)*)obj, value); +} + + +// --- _Py_atomic_store_relaxed ---------------------------------------------- + +static inline void +_Py_atomic_store_int_relaxed(int *obj, int value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(int)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_int8_relaxed(int8_t *obj, int8_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(int8_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_int16_relaxed(int16_t *obj, int16_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(int16_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_int32_relaxed(int32_t *obj, int32_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(int32_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_int64_relaxed(int64_t *obj, int64_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(int64_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_intptr_relaxed(intptr_t *obj, intptr_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(intptr_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uint8_relaxed(uint8_t *obj, uint8_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uint8_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uint16_relaxed(uint16_t *obj, uint16_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uint16_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uint32_relaxed(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uint32_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uint64_relaxed(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uint64_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uintptr_relaxed(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uintptr_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_uint_relaxed(unsigned int *obj, unsigned int value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned int)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_ptr_relaxed(void *obj, void *value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(void*)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_ssize_relaxed(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(Py_ssize_t)*)obj, value, + memory_order_relaxed); +} + +static inline void +_Py_atomic_store_ullong_relaxed(unsigned long long *obj, + unsigned long long value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(unsigned long long)*)obj, value, + memory_order_relaxed); +} + + +// --- _Py_atomic_load_ptr_acquire / _Py_atomic_store_ptr_release ------------ + +static inline void * +_Py_atomic_load_ptr_acquire(const void *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(void*)*)obj, + memory_order_acquire); +} + +static inline uintptr_t +_Py_atomic_load_uintptr_acquire(const uintptr_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uintptr_t)*)obj, + memory_order_acquire); +} + +static inline void +_Py_atomic_store_ptr_release(void *obj, void *value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(void*)*)obj, value, + memory_order_release); +} + +static inline void +_Py_atomic_store_uintptr_release(uintptr_t *obj, uintptr_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uintptr_t)*)obj, value, + memory_order_release); +} + +static inline void +_Py_atomic_store_int_release(int *obj, int value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(int)*)obj, value, + memory_order_release); +} + +static inline void +_Py_atomic_store_ssize_release(Py_ssize_t *obj, Py_ssize_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(Py_ssize_t)*)obj, value, + memory_order_release); +} + +static inline int +_Py_atomic_load_int_acquire(const int *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(int)*)obj, + memory_order_acquire); +} + +static inline void +_Py_atomic_store_uint32_release(uint32_t *obj, uint32_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uint32_t)*)obj, value, + memory_order_release); +} + +static inline void +_Py_atomic_store_uint64_release(uint64_t *obj, uint64_t value) +{ + _Py_USING_STD; + atomic_store_explicit((_Atomic(uint64_t)*)obj, value, + memory_order_release); +} + +static inline uint64_t +_Py_atomic_load_uint64_acquire(const uint64_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint64_t)*)obj, + memory_order_acquire); +} + +static inline uint32_t +_Py_atomic_load_uint32_acquire(const uint32_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(uint32_t)*)obj, + memory_order_acquire); +} + +static inline Py_ssize_t +_Py_atomic_load_ssize_acquire(const Py_ssize_t *obj) +{ + _Py_USING_STD; + return atomic_load_explicit((const _Atomic(Py_ssize_t)*)obj, + memory_order_acquire); +} + + +// --- _Py_atomic_fence ------------------------------------------------------ + + static inline void +_Py_atomic_fence_seq_cst(void) +{ + _Py_USING_STD; + atomic_thread_fence(memory_order_seq_cst); +} + + static inline void +_Py_atomic_fence_acquire(void) +{ + _Py_USING_STD; + atomic_thread_fence(memory_order_acquire); +} + + static inline void +_Py_atomic_fence_release(void) +{ + _Py_USING_STD; + atomic_thread_fence(memory_order_release); +} diff --git a/Include/cpython/pyctype.h b/Include/cpython/pyctype.h new file mode 100644 index 0000000000000000000000000000000000000000..729d93275e6c5365fb26fd521b1ff58de22b5fdb --- /dev/null +++ b/Include/cpython/pyctype.h @@ -0,0 +1,39 @@ +#ifndef Py_LIMITED_API +#ifndef PYCTYPE_H +#define PYCTYPE_H +#ifdef __cplusplus +extern "C" { +#endif + +#define PY_CTF_LOWER 0x01 +#define PY_CTF_UPPER 0x02 +#define PY_CTF_ALPHA (PY_CTF_LOWER|PY_CTF_UPPER) +#define PY_CTF_DIGIT 0x04 +#define PY_CTF_ALNUM (PY_CTF_ALPHA|PY_CTF_DIGIT) +#define PY_CTF_SPACE 0x08 +#define PY_CTF_XDIGIT 0x10 + +PyAPI_DATA(const unsigned int) _Py_ctype_table[256]; + +/* Unlike their C counterparts, the following macros are not meant to + * handle an int with any of the values [EOF, 0-UCHAR_MAX]. The argument + * must be a signed/unsigned char. */ +#define Py_ISLOWER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_LOWER) +#define Py_ISUPPER(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_UPPER) +#define Py_ISALPHA(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALPHA) +#define Py_ISDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_DIGIT) +#define Py_ISXDIGIT(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_XDIGIT) +#define Py_ISALNUM(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_ALNUM) +#define Py_ISSPACE(c) (_Py_ctype_table[Py_CHARMASK(c)] & PY_CTF_SPACE) + +PyAPI_DATA(const unsigned char) _Py_ctype_tolower[256]; +PyAPI_DATA(const unsigned char) _Py_ctype_toupper[256]; + +#define Py_TOLOWER(c) (_Py_ctype_tolower[Py_CHARMASK(c)]) +#define Py_TOUPPER(c) (_Py_ctype_toupper[Py_CHARMASK(c)]) + +#ifdef __cplusplus +} +#endif +#endif /* !PYCTYPE_H */ +#endif /* !Py_LIMITED_API */ diff --git a/Include/cpython/pydebug.h b/Include/cpython/pydebug.h new file mode 100644 index 0000000000000000000000000000000000000000..f6ebd99ed7e2ff2755d7fdd1d11fa77ff374088b --- /dev/null +++ b/Include/cpython/pydebug.h @@ -0,0 +1,38 @@ +#ifndef Py_LIMITED_API +#ifndef Py_PYDEBUG_H +#define Py_PYDEBUG_H +#ifdef __cplusplus +extern "C" { +#endif + +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_DebugFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_VerboseFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_QuietFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_InteractiveFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_InspectFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_OptimizeFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_NoSiteFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_BytesWarningFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_FrozenFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_IgnoreEnvironmentFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_DontWriteBytecodeFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_NoUserSiteDirectory; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_UnbufferedStdioFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_HashRandomizationFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_IsolatedFlag; + +#ifdef MS_WINDOWS +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_LegacyWindowsFSEncodingFlag; +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_LegacyWindowsStdioFlag; +#endif + +/* this is a wrapper around getenv() that pays attention to + Py_IgnoreEnvironmentFlag. It should be used for getting variables like + PYTHONPATH and PYTHONHOME from the environment */ +PyAPI_FUNC(char*) Py_GETENV(const char *name); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYDEBUG_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/cpython/pyerrors.h b/Include/cpython/pyerrors.h new file mode 100644 index 0000000000000000000000000000000000000000..b36b4681f5dddb58de67f1cb1f2c4b1b3720369a --- /dev/null +++ b/Include/cpython/pyerrors.h @@ -0,0 +1,131 @@ +#ifndef Py_CPYTHON_ERRORS_H +# error "this header file must not be included directly" +#endif + +/* Error objects */ + +/* PyException_HEAD defines the initial segment of every exception class. */ +#define PyException_HEAD PyObject_HEAD PyObject *dict;\ + PyObject *args; PyObject *notes; PyObject *traceback;\ + PyObject *context; PyObject *cause;\ + char suppress_context; + +typedef struct { + PyException_HEAD +} PyBaseExceptionObject; + +typedef struct { + PyException_HEAD + PyObject *msg; + PyObject *excs; +} PyBaseExceptionGroupObject; + +typedef struct { + PyException_HEAD + PyObject *msg; + PyObject *filename; + PyObject *lineno; + PyObject *offset; + PyObject *end_lineno; + PyObject *end_offset; + PyObject *text; + PyObject *print_file_and_line; +} PySyntaxErrorObject; + +typedef struct { + PyException_HEAD + PyObject *msg; + PyObject *name; + PyObject *path; + PyObject *name_from; +} PyImportErrorObject; + +typedef struct { + PyException_HEAD + PyObject *encoding; + PyObject *object; + Py_ssize_t start; + Py_ssize_t end; + PyObject *reason; +} PyUnicodeErrorObject; + +typedef struct { + PyException_HEAD + PyObject *code; +} PySystemExitObject; + +typedef struct { + PyException_HEAD + PyObject *myerrno; + PyObject *strerror; + PyObject *filename; + PyObject *filename2; +#ifdef MS_WINDOWS + PyObject *winerror; +#endif + Py_ssize_t written; /* only for BlockingIOError, -1 otherwise */ +} PyOSErrorObject; + +typedef struct { + PyException_HEAD + PyObject *value; +} PyStopIterationObject; + +typedef struct { + PyException_HEAD + PyObject *name; +} PyNameErrorObject; + +typedef struct { + PyException_HEAD + PyObject *obj; + PyObject *name; +} PyAttributeErrorObject; + +/* Compatibility typedefs */ +typedef PyOSErrorObject PyEnvironmentErrorObject; +#ifdef MS_WINDOWS +typedef PyOSErrorObject PyWindowsErrorObject; +#endif + +/* Context manipulation (PEP 3134) */ + +PyAPI_FUNC(void) _PyErr_ChainExceptions1(PyObject *); + +/* In exceptions.c */ + +PyAPI_FUNC(PyObject*) PyUnstable_Exc_PrepReraiseStar( + PyObject *orig, + PyObject *excs); + +/* In signalmodule.c */ + +PyAPI_FUNC(int) PySignal_SetWakeupFd(int fd); + +/* Support for adding program text to SyntaxErrors */ + +PyAPI_FUNC(void) PyErr_SyntaxLocationObject( + PyObject *filename, + int lineno, + int col_offset); + +PyAPI_FUNC(void) PyErr_RangedSyntaxLocationObject( + PyObject *filename, + int lineno, + int col_offset, + int end_lineno, + int end_col_offset); + +PyAPI_FUNC(PyObject *) PyErr_ProgramTextObject( + PyObject *filename, + int lineno); + +PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalErrorFunc( + const char *func, + const char *message); + +PyAPI_FUNC(void) PyErr_FormatUnraisable(const char *, ...); + +PyAPI_DATA(PyObject *) PyExc_PythonFinalizationError; + +#define Py_FatalError(message) _Py_FatalErrorFunc(__func__, (message)) diff --git a/Include/cpython/pyfpe.h b/Include/cpython/pyfpe.h new file mode 100644 index 0000000000000000000000000000000000000000..cc2def63aa5527f4ac192aa663a14353a88e1774 --- /dev/null +++ b/Include/cpython/pyfpe.h @@ -0,0 +1,15 @@ +#ifndef Py_PYFPE_H +#define Py_PYFPE_H +/* Header excluded from the stable API */ +#ifndef Py_LIMITED_API + +/* These macros used to do something when Python was built with --with-fpectl, + * but support for that was dropped in 3.7. We continue to define them though, + * to avoid breaking API users. + */ + +#define PyFPE_START_PROTECT(err_string, leave_stmt) +#define PyFPE_END_PROTECT(v) + +#endif /* !defined(Py_LIMITED_API) */ +#endif /* !Py_PYFPE_H */ diff --git a/Include/cpython/pyframe.h b/Include/cpython/pyframe.h new file mode 100644 index 0000000000000000000000000000000000000000..eeafbb17a56badddb542d8d13cbcd799ea1403eb --- /dev/null +++ b/Include/cpython/pyframe.h @@ -0,0 +1,45 @@ +#ifndef Py_CPYTHON_PYFRAME_H +# error "this header file must not be included directly" +#endif + +PyAPI_DATA(PyTypeObject) PyFrame_Type; +PyAPI_DATA(PyTypeObject) PyFrameLocalsProxy_Type; + +#define PyFrame_Check(op) Py_IS_TYPE((op), &PyFrame_Type) +#define PyFrameLocalsProxy_Check(op) Py_IS_TYPE((op), &PyFrameLocalsProxy_Type) + +PyAPI_FUNC(PyFrameObject *) PyFrame_GetBack(PyFrameObject *frame); +PyAPI_FUNC(PyObject *) PyFrame_GetLocals(PyFrameObject *frame); + +PyAPI_FUNC(PyObject *) PyFrame_GetGlobals(PyFrameObject *frame); +PyAPI_FUNC(PyObject *) PyFrame_GetBuiltins(PyFrameObject *frame); + +PyAPI_FUNC(PyObject *) PyFrame_GetGenerator(PyFrameObject *frame); +PyAPI_FUNC(int) PyFrame_GetLasti(PyFrameObject *frame); +PyAPI_FUNC(PyObject*) PyFrame_GetVar(PyFrameObject *frame, PyObject *name); +PyAPI_FUNC(PyObject*) PyFrame_GetVarString(PyFrameObject *frame, const char *name); + +/* The following functions are for use by debuggers and other tools + * implementing custom frame evaluators with PEP 523. */ + +struct _PyInterpreterFrame; + +/* Returns the code object of the frame (strong reference). + * Does not raise an exception. */ +PyAPI_FUNC(PyObject *) PyUnstable_InterpreterFrame_GetCode(struct _PyInterpreterFrame *frame); + +/* Returns a byte ofsset into the last executed instruction. + * Does not raise an exception. */ +PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLasti(struct _PyInterpreterFrame *frame); + +/* Returns the currently executing line number, or -1 if there is no line number. + * Does not raise an exception. */ +PyAPI_FUNC(int) PyUnstable_InterpreterFrame_GetLine(struct _PyInterpreterFrame *frame); + +#define PyUnstable_EXECUTABLE_KIND_SKIP 0 +#define PyUnstable_EXECUTABLE_KIND_PY_FUNCTION 1 +#define PyUnstable_EXECUTABLE_KIND_BUILTIN_FUNCTION 3 +#define PyUnstable_EXECUTABLE_KIND_METHOD_DESCRIPTOR 4 +#define PyUnstable_EXECUTABLE_KINDS 5 + +PyAPI_DATA(const PyTypeObject *) const PyUnstable_ExecutableKinds[PyUnstable_EXECUTABLE_KINDS+1]; diff --git a/Include/cpython/pyhash.h b/Include/cpython/pyhash.h new file mode 100644 index 0000000000000000000000000000000000000000..825c034a8d8474843102de497f4f973125e0674c --- /dev/null +++ b/Include/cpython/pyhash.h @@ -0,0 +1,47 @@ +#ifndef Py_CPYTHON_HASH_H +# error "this header file must not be included directly" +#endif + +/* Prime multiplier used in string and various other hashes. */ +#define PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */ + +/* Parameters used for the numeric hash implementation. See notes for + _Py_HashDouble in Python/pyhash.c. Numeric hashes are based on + reduction modulo the prime 2**_PyHASH_BITS - 1. */ + +#if SIZEOF_VOID_P >= 8 +# define PyHASH_BITS 61 +#else +# define PyHASH_BITS 31 +#endif + +#define PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) +#define PyHASH_INF 314159 +#define PyHASH_IMAG PyHASH_MULTIPLIER + +/* Aliases kept for backward compatibility with Python 3.12 */ +#define _PyHASH_MULTIPLIER PyHASH_MULTIPLIER +#define _PyHASH_BITS PyHASH_BITS +#define _PyHASH_MODULUS PyHASH_MODULUS +#define _PyHASH_INF PyHASH_INF +#define _PyHASH_IMAG PyHASH_IMAG + +/* Helpers for hash functions */ +PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double); + +// Kept for backward compatibility +#define _Py_HashPointer Py_HashPointer + + +/* hash function definition */ +typedef struct { + Py_hash_t (*const hash)(const void *, Py_ssize_t); + const char *name; + const int hash_bits; + const int seed_bits; +} PyHash_FuncDef; + +PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void); + +PyAPI_FUNC(Py_hash_t) Py_HashPointer(const void *ptr); +PyAPI_FUNC(Py_hash_t) PyObject_GenericHash(PyObject *); diff --git a/Include/cpython/pylifecycle.h b/Include/cpython/pylifecycle.h new file mode 100644 index 0000000000000000000000000000000000000000..e46dfe59ec463044b06d3065dd3e4bfb26821e38 --- /dev/null +++ b/Include/cpython/pylifecycle.h @@ -0,0 +1,92 @@ +#ifndef Py_CPYTHON_PYLIFECYCLE_H +# error "this header file must not be included directly" +#endif + +/* Py_FrozenMain is kept out of the Limited API until documented and present + in all builds of Python */ +PyAPI_FUNC(int) Py_FrozenMain(int argc, char **argv); + +/* PEP 432 Multi-phase initialization API (Private while provisional!) */ + +PyAPI_FUNC(PyStatus) Py_PreInitialize( + const PyPreConfig *src_config); +PyAPI_FUNC(PyStatus) Py_PreInitializeFromBytesArgs( + const PyPreConfig *src_config, + Py_ssize_t argc, + char **argv); +PyAPI_FUNC(PyStatus) Py_PreInitializeFromArgs( + const PyPreConfig *src_config, + Py_ssize_t argc, + wchar_t **argv); + + +/* Initialization and finalization */ + +PyAPI_FUNC(PyStatus) Py_InitializeFromConfig( + const PyConfig *config); + +// Python 3.8 provisional API (PEP 587) +PyAPI_FUNC(PyStatus) _Py_InitializeMain(void); + +PyAPI_FUNC(int) Py_RunMain(void); + + +PyAPI_FUNC(void) _Py_NO_RETURN Py_ExitStatusException(PyStatus err); + +PyAPI_FUNC(int) Py_FdIsInteractive(FILE *, const char *); + +/* --- PyInterpreterConfig ------------------------------------ */ + +#define PyInterpreterConfig_DEFAULT_GIL (0) +#define PyInterpreterConfig_SHARED_GIL (1) +#define PyInterpreterConfig_OWN_GIL (2) + +typedef struct { + // XXX "allow_object_sharing"? "own_objects"? + int use_main_obmalloc; + int allow_fork; + int allow_exec; + int allow_threads; + int allow_daemon_threads; + int check_multi_interp_extensions; + int gil; +} PyInterpreterConfig; + +#define _PyInterpreterConfig_INIT \ + { \ + .use_main_obmalloc = 0, \ + .allow_fork = 0, \ + .allow_exec = 0, \ + .allow_threads = 1, \ + .allow_daemon_threads = 0, \ + .check_multi_interp_extensions = 1, \ + .gil = PyInterpreterConfig_OWN_GIL, \ + } + +// gh-117649: The free-threaded build does not currently support single-phase +// init extensions in subinterpreters. For now, we ensure that +// `check_multi_interp_extensions` is always `1`, even in the legacy config. +#ifdef Py_GIL_DISABLED +# define _PyInterpreterConfig_LEGACY_CHECK_MULTI_INTERP_EXTENSIONS 1 +#else +# define _PyInterpreterConfig_LEGACY_CHECK_MULTI_INTERP_EXTENSIONS 0 +#endif + +#define _PyInterpreterConfig_LEGACY_INIT \ + { \ + .use_main_obmalloc = 1, \ + .allow_fork = 1, \ + .allow_exec = 1, \ + .allow_threads = 1, \ + .allow_daemon_threads = 1, \ + .check_multi_interp_extensions = _PyInterpreterConfig_LEGACY_CHECK_MULTI_INTERP_EXTENSIONS, \ + .gil = PyInterpreterConfig_SHARED_GIL, \ + } + +PyAPI_FUNC(PyStatus) Py_NewInterpreterFromConfig( + PyThreadState **tstate_p, + const PyInterpreterConfig *config); + +typedef void (*atexit_datacallbackfunc)(void *); +PyAPI_FUNC(int) PyUnstable_AtExit( + PyInterpreterState *, atexit_datacallbackfunc, void *); diff --git a/Include/cpython/pymem.h b/Include/cpython/pymem.h new file mode 100644 index 0000000000000000000000000000000000000000..76b3221f7b9f39f50fa60ef6b5777eed5e00dc04 --- /dev/null +++ b/Include/cpython/pymem.h @@ -0,0 +1,84 @@ +#ifndef Py_CPYTHON_PYMEM_H +# error "this header file must not be included directly" +#endif + +typedef enum { + /* PyMem_RawMalloc(), PyMem_RawRealloc() and PyMem_RawFree() */ + PYMEM_DOMAIN_RAW, + + /* PyMem_Malloc(), PyMem_Realloc() and PyMem_Free() */ + PYMEM_DOMAIN_MEM, + + /* PyObject_Malloc(), PyObject_Realloc() and PyObject_Free() */ + PYMEM_DOMAIN_OBJ +} PyMemAllocatorDomain; + +typedef enum { + PYMEM_ALLOCATOR_NOT_SET = 0, + PYMEM_ALLOCATOR_DEFAULT = 1, + PYMEM_ALLOCATOR_DEBUG = 2, + PYMEM_ALLOCATOR_MALLOC = 3, + PYMEM_ALLOCATOR_MALLOC_DEBUG = 4, +#ifdef WITH_PYMALLOC + PYMEM_ALLOCATOR_PYMALLOC = 5, + PYMEM_ALLOCATOR_PYMALLOC_DEBUG = 6, +#endif +#ifdef WITH_MIMALLOC + PYMEM_ALLOCATOR_MIMALLOC = 7, + PYMEM_ALLOCATOR_MIMALLOC_DEBUG = 8, +#endif +} PyMemAllocatorName; + + +typedef struct { + /* user context passed as the first argument to the 4 functions */ + void *ctx; + + /* allocate a memory block */ + void* (*malloc) (void *ctx, size_t size); + + /* allocate a memory block initialized by zeros */ + void* (*calloc) (void *ctx, size_t nelem, size_t elsize); + + /* allocate or resize a memory block */ + void* (*realloc) (void *ctx, void *ptr, size_t new_size); + + /* release a memory block */ + void (*free) (void *ctx, void *ptr); +} PyMemAllocatorEx; + +/* Get the memory block allocator of the specified domain. */ +PyAPI_FUNC(void) PyMem_GetAllocator(PyMemAllocatorDomain domain, + PyMemAllocatorEx *allocator); + +/* Set the memory block allocator of the specified domain. + + The new allocator must return a distinct non-NULL pointer when requesting + zero bytes. + + For the PYMEM_DOMAIN_RAW domain, the allocator must be thread-safe: the GIL + is not held when the allocator is called. + + If the new allocator is not a hook (don't call the previous allocator), the + PyMem_SetupDebugHooks() function must be called to reinstall the debug hooks + on top on the new allocator. */ +PyAPI_FUNC(void) PyMem_SetAllocator(PyMemAllocatorDomain domain, + PyMemAllocatorEx *allocator); + +/* Setup hooks to detect bugs in the following Python memory allocator + functions: + + - PyMem_RawMalloc(), PyMem_RawRealloc(), PyMem_RawFree() + - PyMem_Malloc(), PyMem_Realloc(), PyMem_Free() + - PyObject_Malloc(), PyObject_Realloc() and PyObject_Free() + + Newly allocated memory is filled with the byte 0xCB, freed memory is filled + with the byte 0xDB. Additional checks: + + - detect API violations, ex: PyObject_Free() called on a buffer allocated + by PyMem_Malloc() + - detect write before the start of the buffer (buffer underflow) + - detect write after the end of the buffer (buffer overflow) + + The function does nothing if Python is not compiled is debug mode. */ +PyAPI_FUNC(void) PyMem_SetupDebugHooks(void); diff --git a/Include/cpython/pystate.h b/Include/cpython/pystate.h new file mode 100644 index 0000000000000000000000000000000000000000..f005729fff11b6841b6754e23c15770a87ad8609 --- /dev/null +++ b/Include/cpython/pystate.h @@ -0,0 +1,277 @@ +#ifndef Py_CPYTHON_PYSTATE_H +# error "this header file must not be included directly" +#endif + + +/* private interpreter helpers */ + +PyAPI_FUNC(int) _PyInterpreterState_RequiresIDRef(PyInterpreterState *); +PyAPI_FUNC(void) _PyInterpreterState_RequireIDRef(PyInterpreterState *, int); + +PyAPI_FUNC(PyObject *) PyUnstable_InterpreterState_GetMainModule(PyInterpreterState *); + +/* State unique per thread */ + +/* Py_tracefunc return -1 when raising an exception, or 0 for success. */ +typedef int (*Py_tracefunc)(PyObject *, PyFrameObject *, int, PyObject *); + +/* The following values are used for 'what' for tracefunc functions + * + * To add a new kind of trace event, also update "trace_init" in + * Python/sysmodule.c to define the Python level event name + */ +#define PyTrace_CALL 0 +#define PyTrace_EXCEPTION 1 +#define PyTrace_LINE 2 +#define PyTrace_RETURN 3 +#define PyTrace_C_CALL 4 +#define PyTrace_C_EXCEPTION 5 +#define PyTrace_C_RETURN 6 +#define PyTrace_OPCODE 7 + +typedef struct _err_stackitem { + /* This struct represents a single execution context where we might + * be currently handling an exception. It is a per-coroutine state + * (coroutine in the computer science sense, including the thread + * and generators). + * + * This is used as an entry on the exception stack, where each + * entry indicates if it is currently handling an exception. + * This ensures that the exception state is not impacted + * by "yields" from an except handler. The thread + * always has an entry (the bottom-most one). + */ + + /* The exception currently being handled in this context, if any. */ + PyObject *exc_value; + + struct _err_stackitem *previous_item; + +} _PyErr_StackItem; + +typedef struct _stack_chunk { + struct _stack_chunk *previous; + size_t size; + size_t top; + PyObject * data[1]; /* Variable sized */ +} _PyStackChunk; + +struct _ts { + /* See Python/ceval.c for comments explaining most fields */ + + PyThreadState *prev; + PyThreadState *next; + PyInterpreterState *interp; + + /* The global instrumentation version in high bits, plus flags indicating + when to break out of the interpreter loop in lower bits. See details in + pycore_ceval.h. */ + uintptr_t eval_breaker; + + struct { + /* Has been initialized to a safe state. + + In order to be effective, this must be set to 0 during or right + after allocation. */ + unsigned int initialized:1; + + /* Has been bound to an OS thread. */ + unsigned int bound:1; + /* Has been unbound from its OS thread. */ + unsigned int unbound:1; + /* Has been bound aa current for the GILState API. */ + unsigned int bound_gilstate:1; + /* Currently in use (maybe holds the GIL). */ + unsigned int active:1; + /* Currently holds the GIL. */ + unsigned int holds_gil:1; + + /* various stages of finalization */ + unsigned int finalizing:1; + unsigned int cleared:1; + unsigned int finalized:1; + + /* padding to align to 4 bytes */ + unsigned int :23; + } _status; +#ifdef Py_BUILD_CORE +# define _PyThreadState_WHENCE_NOTSET -1 +# define _PyThreadState_WHENCE_UNKNOWN 0 +# define _PyThreadState_WHENCE_INIT 1 +# define _PyThreadState_WHENCE_FINI 2 +# define _PyThreadState_WHENCE_THREADING 3 +# define _PyThreadState_WHENCE_GILSTATE 4 +# define _PyThreadState_WHENCE_EXEC 5 +#endif + int _whence; + + /* Thread state (_Py_THREAD_ATTACHED, _Py_THREAD_DETACHED, _Py_THREAD_SUSPENDED). + See Include/internal/pycore_pystate.h for more details. */ + int state; + + int py_recursion_remaining; + int py_recursion_limit; + + int c_recursion_remaining; + int recursion_headroom; /* Allow 50 more calls to handle any errors. */ + + /* 'tracing' keeps track of the execution depth when tracing/profiling. + This is to prevent the actual trace/profile code from being recorded in + the trace/profile. */ + int tracing; + int what_event; /* The event currently being monitored, if any. */ + + /* Pointer to currently executing frame. */ + struct _PyInterpreterFrame *current_frame; + + Py_tracefunc c_profilefunc; + Py_tracefunc c_tracefunc; + PyObject *c_profileobj; + PyObject *c_traceobj; + + /* The exception currently being raised */ + PyObject *current_exception; + + /* Pointer to the top of the exception stack for the exceptions + * we may be currently handling. (See _PyErr_StackItem above.) + * This is never NULL. */ + _PyErr_StackItem *exc_info; + + PyObject *dict; /* Stores per-thread state */ + + int gilstate_counter; + + PyObject *async_exc; /* Asynchronous exception to raise */ + unsigned long thread_id; /* Thread id where this tstate was created */ + + /* Native thread id where this tstate was created. This will be 0 except on + * those platforms that have the notion of native thread id, for which the + * macro PY_HAVE_THREAD_NATIVE_ID is then defined. + */ + unsigned long native_thread_id; + + PyObject *delete_later; + + /* Tagged pointer to top-most critical section, or zero if there is no + * active critical section. Critical sections are only used in + * `--disable-gil` builds (i.e., when Py_GIL_DISABLED is defined to 1). In the + * default build, this field is always zero. + */ + uintptr_t critical_section; + + int coroutine_origin_tracking_depth; + + PyObject *async_gen_firstiter; + PyObject *async_gen_finalizer; + + PyObject *context; + uint64_t context_ver; + + /* Unique thread state id. */ + uint64_t id; + + _PyStackChunk *datastack_chunk; + PyObject **datastack_top; + PyObject **datastack_limit; + /* XXX signal handlers should also be here */ + + /* The following fields are here to avoid allocation during init. + The data is exposed through PyThreadState pointer fields. + These fields should not be accessed directly outside of init. + This is indicated by an underscore prefix on the field names. + + All other PyInterpreterState pointer fields are populated when + needed and default to NULL. + */ + // Note some fields do not have a leading underscore for backward + // compatibility. See https://bugs.python.org/issue45953#msg412046. + + /* The thread's exception stack entry. (Always the last entry.) */ + _PyErr_StackItem exc_state; + + PyObject *previous_executor; + + uint64_t dict_global_version; + + /* Used to store/retrieve `threading.local` keys/values for this thread */ + PyObject *threading_local_key; + + /* Used by `threading.local`s to be remove keys/values for dying threads. + The PyThreadObject must hold the only reference to this value. + */ + PyObject *threading_local_sentinel; +}; + +#ifdef Py_DEBUG + // A debug build is likely built with low optimization level which implies + // higher stack memory usage than a release build: use a lower limit. +# define Py_C_RECURSION_LIMIT 500 +#elif defined(__s390x__) +# define Py_C_RECURSION_LIMIT 800 +#elif defined(_WIN32) && defined(_M_ARM64) +# define Py_C_RECURSION_LIMIT 1000 +#elif defined(_WIN32) +# define Py_C_RECURSION_LIMIT 3000 +#elif defined(__ANDROID__) + // On an ARM64 emulator, API level 34 was OK with 10000, but API level 21 + // crashed in test_compiler_recursion_limit. +# define Py_C_RECURSION_LIMIT 3000 +#elif defined(_Py_ADDRESS_SANITIZER) +# define Py_C_RECURSION_LIMIT 4000 +#elif defined(__wasi__) + // Based on wasmtime 16. +# define Py_C_RECURSION_LIMIT 5000 +#else + // This value is duplicated in Lib/test/support/__init__.py +# define Py_C_RECURSION_LIMIT 10000 +#endif + + +/* other API */ + +/* Similar to PyThreadState_Get(), but don't issue a fatal error + * if it is NULL. */ +PyAPI_FUNC(PyThreadState *) PyThreadState_GetUnchecked(void); + +// Alias kept for backward compatibility +#define _PyThreadState_UncheckedGet PyThreadState_GetUnchecked + + +// Disable tracing and profiling. +PyAPI_FUNC(void) PyThreadState_EnterTracing(PyThreadState *tstate); + +// Reset tracing and profiling: enable them if a trace function or a profile +// function is set, otherwise disable them. +PyAPI_FUNC(void) PyThreadState_LeaveTracing(PyThreadState *tstate); + +/* PyGILState */ + +/* Helper/diagnostic function - return 1 if the current thread + currently holds the GIL, 0 otherwise. + + The function returns 1 if _PyGILState_check_enabled is non-zero. */ +PyAPI_FUNC(int) PyGILState_Check(void); + +/* The implementation of sys._current_frames() Returns a dict mapping + thread id to that thread's current frame. +*/ +PyAPI_FUNC(PyObject*) _PyThread_CurrentFrames(void); + +/* Routines for advanced debuggers, requested by David Beazley. + Don't use unless you know what you are doing! */ +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Main(void); +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Head(void); +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Next(PyInterpreterState *); +PyAPI_FUNC(PyThreadState *) PyInterpreterState_ThreadHead(PyInterpreterState *); +PyAPI_FUNC(PyThreadState *) PyThreadState_Next(PyThreadState *); +PyAPI_FUNC(void) PyThreadState_DeleteCurrent(void); + +/* Frame evaluation API */ + +typedef PyObject* (*_PyFrameEvalFunction)(PyThreadState *tstate, struct _PyInterpreterFrame *, int); + +PyAPI_FUNC(_PyFrameEvalFunction) _PyInterpreterState_GetEvalFrameFunc( + PyInterpreterState *interp); +PyAPI_FUNC(void) _PyInterpreterState_SetEvalFrameFunc( + PyInterpreterState *interp, + _PyFrameEvalFunction eval_frame); diff --git a/Include/cpython/pystats.h b/Include/cpython/pystats.h new file mode 100644 index 0000000000000000000000000000000000000000..378c2760ec3f5577909f2e489b9adaaad1a63390 --- /dev/null +++ b/Include/cpython/pystats.h @@ -0,0 +1,175 @@ +// Statistics on Python performance. +// +// API: +// +// - _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF() +// and Py_DECREF(). +// - _Py_stats variable +// +// Functions of the sys module: +// +// - sys._stats_on() +// - sys._stats_off() +// - sys._stats_clear() +// - sys._stats_dump() +// +// Python must be built with ./configure --enable-pystats to define the +// Py_STATS macro. +// +// Define _PY_INTERPRETER macro to increment interpreter_increfs and +// interpreter_decrefs. Otherwise, increment increfs and decrefs. +// +// The number of incref operations counted by `incref` and +// `interpreter_incref` is the number of increment operations, which is +// not equal to the total of all reference counts. A single increment +// operation may increase the reference count of an object by more than +// one. For example, see `_Py_RefcntAdd`. + +#ifndef Py_CPYTHON_PYSTATS_H +# error "this header file must not be included directly" +#endif + +#define PYSTATS_MAX_UOP_ID 512 + +#define SPECIALIZATION_FAILURE_KINDS 36 + +/* Stats for determining who is calling PyEval_EvalFrame */ +#define EVAL_CALL_TOTAL 0 +#define EVAL_CALL_VECTOR 1 +#define EVAL_CALL_GENERATOR 2 +#define EVAL_CALL_LEGACY 3 +#define EVAL_CALL_FUNCTION_VECTORCALL 4 +#define EVAL_CALL_BUILD_CLASS 5 +#define EVAL_CALL_SLOT 6 +#define EVAL_CALL_FUNCTION_EX 7 +#define EVAL_CALL_API 8 +#define EVAL_CALL_METHOD 9 + +#define EVAL_CALL_KINDS 10 + +typedef struct _specialization_stats { + uint64_t success; + uint64_t failure; + uint64_t hit; + uint64_t deferred; + uint64_t miss; + uint64_t deopt; + uint64_t failure_kinds[SPECIALIZATION_FAILURE_KINDS]; +} SpecializationStats; + +typedef struct _opcode_stats { + SpecializationStats specialization; + uint64_t execution_count; + uint64_t pair_count[256]; +} OpcodeStats; + +typedef struct _call_stats { + uint64_t inlined_py_calls; + uint64_t pyeval_calls; + uint64_t frames_pushed; + uint64_t frame_objects_created; + uint64_t eval_calls[EVAL_CALL_KINDS]; +} CallStats; + +typedef struct _object_stats { + uint64_t increfs; + uint64_t decrefs; + uint64_t interpreter_increfs; + uint64_t interpreter_decrefs; + uint64_t allocations; + uint64_t allocations512; + uint64_t allocations4k; + uint64_t allocations_big; + uint64_t frees; + uint64_t to_freelist; + uint64_t from_freelist; + uint64_t inline_values; + uint64_t dict_materialized_on_request; + uint64_t dict_materialized_new_key; + uint64_t dict_materialized_too_big; + uint64_t dict_materialized_str_subclass; + uint64_t type_cache_hits; + uint64_t type_cache_misses; + uint64_t type_cache_dunder_hits; + uint64_t type_cache_dunder_misses; + uint64_t type_cache_collisions; + /* Temporary value used during GC */ + uint64_t object_visits; +} ObjectStats; + +typedef struct _gc_stats { + uint64_t collections; + uint64_t object_visits; + uint64_t objects_collected; +} GCStats; + +typedef struct _uop_stats { + uint64_t execution_count; + uint64_t miss; + uint64_t pair_count[PYSTATS_MAX_UOP_ID + 1]; +} UOpStats; + +#define _Py_UOP_HIST_SIZE 32 + +typedef struct _optimization_stats { + uint64_t attempts; + uint64_t traces_created; + uint64_t traces_executed; + uint64_t uops_executed; + uint64_t trace_stack_overflow; + uint64_t trace_stack_underflow; + uint64_t trace_too_long; + uint64_t trace_too_short; + uint64_t inner_loop; + uint64_t recursive_call; + uint64_t low_confidence; + uint64_t executors_invalidated; + UOpStats opcode[PYSTATS_MAX_UOP_ID + 1]; + uint64_t unsupported_opcode[256]; + uint64_t trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t trace_run_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimized_trace_length_hist[_Py_UOP_HIST_SIZE]; + uint64_t optimizer_attempts; + uint64_t optimizer_successes; + uint64_t optimizer_failure_reason_no_memory; + uint64_t remove_globals_builtins_changed; + uint64_t remove_globals_incorrect_keys; + uint64_t error_in_opcode[PYSTATS_MAX_UOP_ID + 1]; +} OptimizationStats; + +typedef struct _rare_event_stats { + /* Setting an object's class, obj.__class__ = ... */ + uint64_t set_class; + /* Setting the bases of a class, cls.__bases__ = ... */ + uint64_t set_bases; + /* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */ + uint64_t set_eval_frame_func; + /* Modifying the builtins, __builtins__.__dict__[var] = ... */ + uint64_t builtin_dict; + /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ + uint64_t func_modification; + /* Modifying a dict that is being watched */ + uint64_t watched_dict_modification; + uint64_t watched_globals_modification; +} RareEventStats; + +typedef struct _stats { + OpcodeStats opcode_stats[256]; + CallStats call_stats; + ObjectStats object_stats; + OptimizationStats optimization_stats; + RareEventStats rare_event_stats; + GCStats *gc_stats; +} PyStats; + + +// Export for shared extensions like 'math' +PyAPI_DATA(PyStats*) _Py_stats; + +#ifdef _PY_INTERPRETER +# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_increfs++; } while (0) +# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.interpreter_decrefs++; } while (0) +#else +# define _Py_INCREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.increfs++; } while (0) +# define _Py_DECREF_STAT_INC() do { if (_Py_stats) _Py_stats->object_stats.decrefs++; } while (0) +#endif diff --git a/Include/cpython/pythonrun.h b/Include/cpython/pythonrun.h new file mode 100644 index 0000000000000000000000000000000000000000..edc40952254029f8c8f2927f4e3324a14d40fba6 --- /dev/null +++ b/Include/cpython/pythonrun.h @@ -0,0 +1,96 @@ +#ifndef Py_CPYTHON_PYTHONRUN_H +# error "this header file must not be included directly" +#endif + +PyAPI_FUNC(int) PyRun_SimpleStringFlags(const char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_AnyFileExFlags( + FILE *fp, + const char *filename, /* decoded from the filesystem encoding */ + int closeit, + PyCompilerFlags *flags); +PyAPI_FUNC(int) PyRun_SimpleFileExFlags( + FILE *fp, + const char *filename, /* decoded from the filesystem encoding */ + int closeit, + PyCompilerFlags *flags); +PyAPI_FUNC(int) PyRun_InteractiveOneFlags( + FILE *fp, + const char *filename, /* decoded from the filesystem encoding */ + PyCompilerFlags *flags); +PyAPI_FUNC(int) PyRun_InteractiveOneObject( + FILE *fp, + PyObject *filename, + PyCompilerFlags *flags); +PyAPI_FUNC(int) PyRun_InteractiveLoopFlags( + FILE *fp, + const char *filename, /* decoded from the filesystem encoding */ + PyCompilerFlags *flags); + + +PyAPI_FUNC(PyObject *) PyRun_StringFlags(const char *, int, PyObject *, + PyObject *, PyCompilerFlags *); + +PyAPI_FUNC(PyObject *) PyRun_FileExFlags( + FILE *fp, + const char *filename, /* decoded from the filesystem encoding */ + int start, + PyObject *globals, + PyObject *locals, + int closeit, + PyCompilerFlags *flags); + + +PyAPI_FUNC(PyObject *) Py_CompileStringExFlags( + const char *str, + const char *filename, /* decoded from the filesystem encoding */ + int start, + PyCompilerFlags *flags, + int optimize); +PyAPI_FUNC(PyObject *) Py_CompileStringObject( + const char *str, + PyObject *filename, int start, + PyCompilerFlags *flags, + int optimize); + +#define Py_CompileString(str, p, s) Py_CompileStringExFlags((str), (p), (s), NULL, -1) +#define Py_CompileStringFlags(str, p, s, f) Py_CompileStringExFlags((str), (p), (s), (f), -1) + +/* A function flavor is also exported by libpython. It is required when + libpython is accessed directly rather than using header files which defines + macros below. On Windows, for example, PyAPI_FUNC() uses dllexport to + export functions in pythonXX.dll. */ +PyAPI_FUNC(PyObject *) PyRun_String(const char *str, int s, PyObject *g, PyObject *l); +PyAPI_FUNC(int) PyRun_AnyFile(FILE *fp, const char *name); +PyAPI_FUNC(int) PyRun_AnyFileEx(FILE *fp, const char *name, int closeit); +PyAPI_FUNC(int) PyRun_AnyFileFlags(FILE *, const char *, PyCompilerFlags *); +PyAPI_FUNC(int) PyRun_SimpleString(const char *s); +PyAPI_FUNC(int) PyRun_SimpleFile(FILE *f, const char *p); +PyAPI_FUNC(int) PyRun_SimpleFileEx(FILE *f, const char *p, int c); +PyAPI_FUNC(int) PyRun_InteractiveOne(FILE *f, const char *p); +PyAPI_FUNC(int) PyRun_InteractiveLoop(FILE *f, const char *p); +PyAPI_FUNC(PyObject *) PyRun_File(FILE *fp, const char *p, int s, PyObject *g, PyObject *l); +PyAPI_FUNC(PyObject *) PyRun_FileEx(FILE *fp, const char *p, int s, PyObject *g, PyObject *l, int c); +PyAPI_FUNC(PyObject *) PyRun_FileFlags(FILE *fp, const char *p, int s, PyObject *g, PyObject *l, PyCompilerFlags *flags); + +/* Use macros for a bunch of old variants */ +#define PyRun_String(str, s, g, l) PyRun_StringFlags((str), (s), (g), (l), NULL) +#define PyRun_AnyFile(fp, name) PyRun_AnyFileExFlags((fp), (name), 0, NULL) +#define PyRun_AnyFileEx(fp, name, closeit) \ + PyRun_AnyFileExFlags((fp), (name), (closeit), NULL) +#define PyRun_AnyFileFlags(fp, name, flags) \ + PyRun_AnyFileExFlags((fp), (name), 0, (flags)) +#define PyRun_SimpleString(s) PyRun_SimpleStringFlags((s), NULL) +#define PyRun_SimpleFile(f, p) PyRun_SimpleFileExFlags((f), (p), 0, NULL) +#define PyRun_SimpleFileEx(f, p, c) PyRun_SimpleFileExFlags((f), (p), (c), NULL) +#define PyRun_InteractiveOne(f, p) PyRun_InteractiveOneFlags((f), (p), NULL) +#define PyRun_InteractiveLoop(f, p) PyRun_InteractiveLoopFlags((f), (p), NULL) +#define PyRun_File(fp, p, s, g, l) \ + PyRun_FileExFlags((fp), (p), (s), (g), (l), 0, NULL) +#define PyRun_FileEx(fp, p, s, g, l, c) \ + PyRun_FileExFlags((fp), (p), (s), (g), (l), (c), NULL) +#define PyRun_FileFlags(fp, p, s, g, l, flags) \ + PyRun_FileExFlags((fp), (p), (s), (g), (l), 0, (flags)) + +/* Stuff with no proper home (yet) */ +PyAPI_FUNC(char *) PyOS_Readline(FILE *, FILE *, const char *); +PyAPI_DATA(char) *(*PyOS_ReadlineFunctionPointer)(FILE *, FILE *, const char *); diff --git a/Include/cpython/pythread.h b/Include/cpython/pythread.h new file mode 100644 index 0000000000000000000000000000000000000000..e658b35bd90700eb1dabad03497c2b52452db9ff --- /dev/null +++ b/Include/cpython/pythread.h @@ -0,0 +1,43 @@ +#ifndef Py_CPYTHON_PYTHREAD_H +# error "this header file must not be included directly" +#endif + +// PY_TIMEOUT_MAX is the highest usable value (in microseconds) of PY_TIMEOUT_T +// type, and depends on the system threading API. +// +// NOTE: this isn't the same value as `_thread.TIMEOUT_MAX`. The _thread module +// exposes a higher-level API, with timeouts expressed in seconds and +// floating-point numbers allowed. +PyAPI_DATA(const long long) PY_TIMEOUT_MAX; + +#define PYTHREAD_INVALID_THREAD_ID ((unsigned long)-1) + +#ifdef HAVE_PTHREAD_H + /* Darwin needs pthread.h to know type name the pthread_key_t. */ +# include +# define NATIVE_TSS_KEY_T pthread_key_t +#elif defined(NT_THREADS) + /* In Windows, native TSS key type is DWORD, + but hardcode the unsigned long to avoid errors for include directive. + */ +# define NATIVE_TSS_KEY_T unsigned long +#elif defined(HAVE_PTHREAD_STUBS) +# include "pthread_stubs.h" +# define NATIVE_TSS_KEY_T pthread_key_t +#else +# error "Require native threads. See https://bugs.python.org/issue31370" +#endif + +/* When Py_LIMITED_API is not defined, the type layout of Py_tss_t is + exposed to allow static allocation in the API clients. Even in this case, + you must handle TSS keys through API functions due to compatibility. +*/ +struct _Py_tss_t { + int _is_initialized; + NATIVE_TSS_KEY_T _key; +}; + +#undef NATIVE_TSS_KEY_T + +/* When static allocation, you must initialize with Py_tss_NEEDS_INIT. */ +#define Py_tss_NEEDS_INIT {0} diff --git a/Include/cpython/pytime.h b/Include/cpython/pytime.h new file mode 100644 index 0000000000000000000000000000000000000000..5c68110aeedb867ba90c8cab047fed1489e083ff --- /dev/null +++ b/Include/cpython/pytime.h @@ -0,0 +1,27 @@ +// PyTime_t C API: see Doc/c-api/time.rst for the documentation. + +#ifndef Py_LIMITED_API +#ifndef Py_PYTIME_H +#define Py_PYTIME_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef int64_t PyTime_t; +#define PyTime_MIN INT64_MIN +#define PyTime_MAX INT64_MAX + +PyAPI_FUNC(double) PyTime_AsSecondsDouble(PyTime_t t); +PyAPI_FUNC(int) PyTime_Monotonic(PyTime_t *result); +PyAPI_FUNC(int) PyTime_PerfCounter(PyTime_t *result); +PyAPI_FUNC(int) PyTime_Time(PyTime_t *result); + +PyAPI_FUNC(int) PyTime_MonotonicRaw(PyTime_t *result); +PyAPI_FUNC(int) PyTime_PerfCounterRaw(PyTime_t *result); +PyAPI_FUNC(int) PyTime_TimeRaw(PyTime_t *result); + +#ifdef __cplusplus +} +#endif +#endif /* Py_PYTIME_H */ +#endif /* Py_LIMITED_API */ diff --git a/Include/cpython/setobject.h b/Include/cpython/setobject.h new file mode 100644 index 0000000000000000000000000000000000000000..89565cb29212fc172d82e2b5cf52295dcd3a6184 --- /dev/null +++ b/Include/cpython/setobject.h @@ -0,0 +1,71 @@ +#ifndef Py_CPYTHON_SETOBJECT_H +# error "this header file must not be included directly" +#endif + +/* There are three kinds of entries in the table: + +1. Unused: key == NULL and hash == 0 +2. Dummy: key == dummy and hash == -1 +3. Active: key != NULL and key != dummy and hash != -1 + +The hash field of Unused slots is always zero. + +The hash field of Dummy slots are set to -1 +meaning that dummy entries can be detected by +either entry->key==dummy or by entry->hash==-1. +*/ + +#define PySet_MINSIZE 8 + +typedef struct { + PyObject *key; + Py_hash_t hash; /* Cached hash code of the key */ +} setentry; + +/* The SetObject data structure is shared by set and frozenset objects. + +Invariant for sets: + - hash is -1 + +Invariants for frozensets: + - data is immutable. + - hash is the hash of the frozenset or -1 if not computed yet. + +*/ + +typedef struct { + PyObject_HEAD + + Py_ssize_t fill; /* Number active and dummy entries*/ + Py_ssize_t used; /* Number active entries */ + + /* The table contains mask + 1 slots, and that's a power of 2. + * We store the mask instead of the size because the mask is more + * frequently needed. + */ + Py_ssize_t mask; + + /* The table points to a fixed-size smalltable for small tables + * or to additional malloc'ed memory for bigger tables. + * The table pointer is never NULL which saves us from repeated + * runtime null-tests. + */ + setentry *table; + Py_hash_t hash; /* Only used by frozenset objects */ + Py_ssize_t finger; /* Search finger for pop() */ + + setentry smalltable[PySet_MINSIZE]; + PyObject *weakreflist; /* List of weak references */ +} PySetObject; + +#define _PySet_CAST(so) \ + (assert(PyAnySet_Check(so)), _Py_CAST(PySetObject*, so)) + +static inline Py_ssize_t PySet_GET_SIZE(PyObject *so) { +#ifdef Py_GIL_DISABLED + return _Py_atomic_load_ssize_relaxed(&(_PySet_CAST(so)->used)); +#else + return _PySet_CAST(so)->used; +#endif +} +#define PySet_GET_SIZE(so) PySet_GET_SIZE(_PyObject_CAST(so)) diff --git a/Include/cpython/sysmodule.h b/Include/cpython/sysmodule.h new file mode 100644 index 0000000000000000000000000000000000000000..a3ac07f538a94f32892d8d98872b8cbced812063 --- /dev/null +++ b/Include/cpython/sysmodule.h @@ -0,0 +1,22 @@ +#ifndef Py_CPYTHON_SYSMODULE_H +# error "this header file must not be included directly" +#endif + +typedef int(*Py_AuditHookFunction)(const char *, PyObject *, void *); + +PyAPI_FUNC(int) PySys_AddAuditHook(Py_AuditHookFunction, void*); + +typedef struct { + FILE* perf_map; + PyThread_type_lock map_lock; +} PerfMapState; + +PyAPI_FUNC(int) PyUnstable_PerfMapState_Init(void); +PyAPI_FUNC(int) PyUnstable_WritePerfMapEntry( + const void *code_addr, + unsigned int code_size, + const char *entry_name); +PyAPI_FUNC(void) PyUnstable_PerfMapState_Fini(void); +PyAPI_FUNC(int) PyUnstable_CopyPerfMapFile(const char* parent_filename); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_CompileCode(PyCodeObject *); +PyAPI_FUNC(int) PyUnstable_PerfTrampoline_SetPersistAfterFork(int enable); diff --git a/Include/cpython/traceback.h b/Include/cpython/traceback.h new file mode 100644 index 0000000000000000000000000000000000000000..81c51944f136f29396699dbcf8a3ddbd5c21b75a --- /dev/null +++ b/Include/cpython/traceback.h @@ -0,0 +1,13 @@ +#ifndef Py_CPYTHON_TRACEBACK_H +# error "this header file must not be included directly" +#endif + +typedef struct _traceback PyTracebackObject; + +struct _traceback { + PyObject_HEAD + PyTracebackObject *tb_next; + PyFrameObject *tb_frame; + int tb_lasti; + int tb_lineno; +}; diff --git a/Include/cpython/tracemalloc.h b/Include/cpython/tracemalloc.h new file mode 100644 index 0000000000000000000000000000000000000000..6d094291ae2e906451675b94836919b63a7c29f0 --- /dev/null +++ b/Include/cpython/tracemalloc.h @@ -0,0 +1,32 @@ +#ifndef Py_LIMITED_API +#ifndef Py_TRACEMALLOC_H +#define Py_TRACEMALLOC_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Track an allocated memory block in the tracemalloc module. + Return 0 on success, return -1 on error (failed to allocate memory to store + the trace). + + Return -2 if tracemalloc is disabled. + + If memory block is already tracked, update the existing trace. */ +PyAPI_FUNC(int) PyTraceMalloc_Track( + unsigned int domain, + uintptr_t ptr, + size_t size); + +/* Untrack an allocated memory block in the tracemalloc module. + Do nothing if the block was not tracked. + + Return -2 if tracemalloc is disabled, otherwise return 0. */ +PyAPI_FUNC(int) PyTraceMalloc_Untrack( + unsigned int domain, + uintptr_t ptr); + +#ifdef __cplusplus +} +#endif +#endif // !Py_TRACEMALLOC_H +#endif // !Py_LIMITED_API diff --git a/Include/cpython/tupleobject.h b/Include/cpython/tupleobject.h new file mode 100644 index 0000000000000000000000000000000000000000..e530c8beda44ab3b409948001085d90c21709312 --- /dev/null +++ b/Include/cpython/tupleobject.h @@ -0,0 +1,38 @@ +#ifndef Py_CPYTHON_TUPLEOBJECT_H +# error "this header file must not be included directly" +#endif + +typedef struct { + PyObject_VAR_HEAD + /* ob_item contains space for 'ob_size' elements. + Items must normally not be NULL, except during construction when + the tuple is not yet visible outside the function that builds it. */ + PyObject *ob_item[1]; +} PyTupleObject; + +PyAPI_FUNC(int) _PyTuple_Resize(PyObject **, Py_ssize_t); + +/* Cast argument to PyTupleObject* type. */ +#define _PyTuple_CAST(op) \ + (assert(PyTuple_Check(op)), _Py_CAST(PyTupleObject*, (op))) + +// Macros and static inline functions, trading safety for speed + +static inline Py_ssize_t PyTuple_GET_SIZE(PyObject *op) { + PyTupleObject *tuple = _PyTuple_CAST(op); + return Py_SIZE(tuple); +} +#define PyTuple_GET_SIZE(op) PyTuple_GET_SIZE(_PyObject_CAST(op)) + +#define PyTuple_GET_ITEM(op, index) (_PyTuple_CAST(op)->ob_item[(index)]) + +/* Function *only* to be used to fill in brand new tuples */ +static inline void +PyTuple_SET_ITEM(PyObject *op, Py_ssize_t index, PyObject *value) { + PyTupleObject *tuple = _PyTuple_CAST(op); + assert(0 <= index); + assert(index < Py_SIZE(tuple)); + tuple->ob_item[index] = value; +} +#define PyTuple_SET_ITEM(op, index, value) \ + PyTuple_SET_ITEM(_PyObject_CAST(op), (index), _PyObject_CAST(value)) diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h new file mode 100644 index 0000000000000000000000000000000000000000..d9b54bce83202daf4297e5009caafcf0e22eb018 --- /dev/null +++ b/Include/cpython/unicodeobject.h @@ -0,0 +1,703 @@ +#ifndef Py_CPYTHON_UNICODEOBJECT_H +# error "this header file must not be included directly" +#endif + +/* Py_UNICODE was the native Unicode storage format (code unit) used by + Python and represents a single Unicode element in the Unicode type. + With PEP 393, Py_UNICODE is deprecated and replaced with a + typedef to wchar_t. */ +Py_DEPRECATED(3.13) typedef wchar_t PY_UNICODE_TYPE; +Py_DEPRECATED(3.13) typedef wchar_t Py_UNICODE; + + +/* --- Internal Unicode Operations ---------------------------------------- */ + +// Static inline functions to work with surrogates +static inline int Py_UNICODE_IS_SURROGATE(Py_UCS4 ch) { + return (0xD800 <= ch && ch <= 0xDFFF); +} +static inline int Py_UNICODE_IS_HIGH_SURROGATE(Py_UCS4 ch) { + return (0xD800 <= ch && ch <= 0xDBFF); +} +static inline int Py_UNICODE_IS_LOW_SURROGATE(Py_UCS4 ch) { + return (0xDC00 <= ch && ch <= 0xDFFF); +} + +// Join two surrogate characters and return a single Py_UCS4 value. +static inline Py_UCS4 Py_UNICODE_JOIN_SURROGATES(Py_UCS4 high, Py_UCS4 low) { + assert(Py_UNICODE_IS_HIGH_SURROGATE(high)); + assert(Py_UNICODE_IS_LOW_SURROGATE(low)); + return 0x10000 + (((high & 0x03FF) << 10) | (low & 0x03FF)); +} + +// High surrogate = top 10 bits added to 0xD800. +// The character must be in the range [U+10000; U+10ffff]. +static inline Py_UCS4 Py_UNICODE_HIGH_SURROGATE(Py_UCS4 ch) { + assert(0x10000 <= ch && ch <= 0x10ffff); + return (0xD800 - (0x10000 >> 10) + (ch >> 10)); +} + +// Low surrogate = bottom 10 bits added to 0xDC00. +// The character must be in the range [U+10000; U+10ffff]. +static inline Py_UCS4 Py_UNICODE_LOW_SURROGATE(Py_UCS4 ch) { + assert(0x10000 <= ch && ch <= 0x10ffff); + return (0xDC00 + (ch & 0x3FF)); +} + + +/* --- Unicode Type ------------------------------------------------------- */ + +/* ASCII-only strings created through PyUnicode_New use the PyASCIIObject + structure. state.ascii and state.compact are set, and the data + immediately follow the structure. utf8_length can be found + in the length field; the utf8 pointer is equal to the data pointer. */ +typedef struct { + /* There are 4 forms of Unicode strings: + + - compact ascii: + + * structure = PyASCIIObject + * test: PyUnicode_IS_COMPACT_ASCII(op) + * kind = PyUnicode_1BYTE_KIND + * compact = 1 + * ascii = 1 + * (length is the length of the utf8) + * (data starts just after the structure) + * (since ASCII is decoded from UTF-8, the utf8 string are the data) + + - compact: + + * structure = PyCompactUnicodeObject + * test: PyUnicode_IS_COMPACT(op) && !PyUnicode_IS_ASCII(op) + * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or + PyUnicode_4BYTE_KIND + * compact = 1 + * ascii = 0 + * utf8 is not shared with data + * utf8_length = 0 if utf8 is NULL + * (data starts just after the structure) + + - legacy string: + + * structure = PyUnicodeObject structure + * test: !PyUnicode_IS_COMPACT(op) + * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or + PyUnicode_4BYTE_KIND + * compact = 0 + * data.any is not NULL + * utf8 is shared and utf8_length = length with data.any if ascii = 1 + * utf8_length = 0 if utf8 is NULL + + Compact strings use only one memory block (structure + characters), + whereas legacy strings use one block for the structure and one block + for characters. + + Legacy strings are created by subclasses of Unicode. + + See also _PyUnicode_CheckConsistency(). + */ + PyObject_HEAD + Py_ssize_t length; /* Number of code points in the string */ + Py_hash_t hash; /* Hash value; -1 if not set */ + struct { + /* If interned is non-zero, the two references from the + dictionary to this object are *not* counted in ob_refcnt. + The possible values here are: + 0: Not Interned + 1: Interned + 2: Interned and Immortal + 3: Interned, Immortal, and Static + This categorization allows the runtime to determine the right + cleanup mechanism at runtime shutdown. */ + unsigned int interned:2; + /* Character size: + + - PyUnicode_1BYTE_KIND (1): + + * character type = Py_UCS1 (8 bits, unsigned) + * all characters are in the range U+0000-U+00FF (latin1) + * if ascii is set, all characters are in the range U+0000-U+007F + (ASCII), otherwise at least one character is in the range + U+0080-U+00FF + + - PyUnicode_2BYTE_KIND (2): + + * character type = Py_UCS2 (16 bits, unsigned) + * all characters are in the range U+0000-U+FFFF (BMP) + * at least one character is in the range U+0100-U+FFFF + + - PyUnicode_4BYTE_KIND (4): + + * character type = Py_UCS4 (32 bits, unsigned) + * all characters are in the range U+0000-U+10FFFF + * at least one character is in the range U+10000-U+10FFFF + */ + unsigned int kind:3; + /* Compact is with respect to the allocation scheme. Compact unicode + objects only require one memory block while non-compact objects use + one block for the PyUnicodeObject struct and another for its data + buffer. */ + unsigned int compact:1; + /* The string only contains characters in the range U+0000-U+007F (ASCII) + and the kind is PyUnicode_1BYTE_KIND. If ascii is set and compact is + set, use the PyASCIIObject structure. */ + unsigned int ascii:1; + /* The object is statically allocated. */ + unsigned int statically_allocated:1; + /* Padding to ensure that PyUnicode_DATA() is always aligned to + 4 bytes (see issue #19537 on m68k). */ + unsigned int :24; + } state; +} PyASCIIObject; + +/* Non-ASCII strings allocated through PyUnicode_New use the + PyCompactUnicodeObject structure. state.compact is set, and the data + immediately follow the structure. */ +typedef struct { + PyASCIIObject _base; + Py_ssize_t utf8_length; /* Number of bytes in utf8, excluding the + * terminating \0. */ + char *utf8; /* UTF-8 representation (null-terminated) */ +} PyCompactUnicodeObject; + +/* Object format for Unicode subclasses. */ +typedef struct { + PyCompactUnicodeObject _base; + union { + void *any; + Py_UCS1 *latin1; + Py_UCS2 *ucs2; + Py_UCS4 *ucs4; + } data; /* Canonical, smallest-form Unicode buffer */ +} PyUnicodeObject; + + +#define _PyASCIIObject_CAST(op) \ + (assert(PyUnicode_Check(op)), \ + _Py_CAST(PyASCIIObject*, (op))) +#define _PyCompactUnicodeObject_CAST(op) \ + (assert(PyUnicode_Check(op)), \ + _Py_CAST(PyCompactUnicodeObject*, (op))) +#define _PyUnicodeObject_CAST(op) \ + (assert(PyUnicode_Check(op)), \ + _Py_CAST(PyUnicodeObject*, (op))) + + +/* --- Flexible String Representation Helper Macros (PEP 393) -------------- */ + +/* Values for PyASCIIObject.state: */ + +/* Interning state. */ +#define SSTATE_NOT_INTERNED 0 +#define SSTATE_INTERNED_MORTAL 1 +#define SSTATE_INTERNED_IMMORTAL 2 +#define SSTATE_INTERNED_IMMORTAL_STATIC 3 + +/* Use only if you know it's a string */ +static inline unsigned int PyUnicode_CHECK_INTERNED(PyObject *op) { + return _PyASCIIObject_CAST(op)->state.interned; +} +#define PyUnicode_CHECK_INTERNED(op) PyUnicode_CHECK_INTERNED(_PyObject_CAST(op)) + +/* For backward compatibility */ +static inline unsigned int PyUnicode_IS_READY(PyObject* Py_UNUSED(op)) { + return 1; +} +#define PyUnicode_IS_READY(op) PyUnicode_IS_READY(_PyObject_CAST(op)) + +/* Return true if the string contains only ASCII characters, or 0 if not. The + string may be compact (PyUnicode_IS_COMPACT_ASCII) or not, but must be + ready. */ +static inline unsigned int PyUnicode_IS_ASCII(PyObject *op) { + return _PyASCIIObject_CAST(op)->state.ascii; +} +#define PyUnicode_IS_ASCII(op) PyUnicode_IS_ASCII(_PyObject_CAST(op)) + +/* Return true if the string is compact or 0 if not. + No type checks or Ready calls are performed. */ +static inline unsigned int PyUnicode_IS_COMPACT(PyObject *op) { + return _PyASCIIObject_CAST(op)->state.compact; +} +#define PyUnicode_IS_COMPACT(op) PyUnicode_IS_COMPACT(_PyObject_CAST(op)) + +/* Return true if the string is a compact ASCII string (use PyASCIIObject + structure), or 0 if not. No type checks or Ready calls are performed. */ +static inline int PyUnicode_IS_COMPACT_ASCII(PyObject *op) { + return (_PyASCIIObject_CAST(op)->state.ascii && PyUnicode_IS_COMPACT(op)); +} +#define PyUnicode_IS_COMPACT_ASCII(op) PyUnicode_IS_COMPACT_ASCII(_PyObject_CAST(op)) + +enum PyUnicode_Kind { +/* Return values of the PyUnicode_KIND() function: */ + PyUnicode_1BYTE_KIND = 1, + PyUnicode_2BYTE_KIND = 2, + PyUnicode_4BYTE_KIND = 4 +}; + +// PyUnicode_KIND(): Return one of the PyUnicode_*_KIND values defined above. +// +// gh-89653: Converting this macro to a static inline function would introduce +// new compiler warnings on "kind < PyUnicode_KIND(str)" (compare signed and +// unsigned numbers) where kind type is an int or on +// "unsigned int kind = PyUnicode_KIND(str)" (cast signed to unsigned). +#define PyUnicode_KIND(op) _Py_RVALUE(_PyASCIIObject_CAST(op)->state.kind) + +/* Return a void pointer to the raw unicode buffer. */ +static inline void* _PyUnicode_COMPACT_DATA(PyObject *op) { + if (PyUnicode_IS_ASCII(op)) { + return _Py_STATIC_CAST(void*, (_PyASCIIObject_CAST(op) + 1)); + } + return _Py_STATIC_CAST(void*, (_PyCompactUnicodeObject_CAST(op) + 1)); +} + +static inline void* _PyUnicode_NONCOMPACT_DATA(PyObject *op) { + void *data; + assert(!PyUnicode_IS_COMPACT(op)); + data = _PyUnicodeObject_CAST(op)->data.any; + assert(data != NULL); + return data; +} + +static inline void* PyUnicode_DATA(PyObject *op) { + if (PyUnicode_IS_COMPACT(op)) { + return _PyUnicode_COMPACT_DATA(op); + } + return _PyUnicode_NONCOMPACT_DATA(op); +} +#define PyUnicode_DATA(op) PyUnicode_DATA(_PyObject_CAST(op)) + +/* Return pointers to the canonical representation cast to unsigned char, + Py_UCS2, or Py_UCS4 for direct character access. + No checks are performed, use PyUnicode_KIND() before to ensure + these will work correctly. */ + +#define PyUnicode_1BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS1*, PyUnicode_DATA(op)) +#define PyUnicode_2BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS2*, PyUnicode_DATA(op)) +#define PyUnicode_4BYTE_DATA(op) _Py_STATIC_CAST(Py_UCS4*, PyUnicode_DATA(op)) + +/* Returns the length of the unicode string. */ +static inline Py_ssize_t PyUnicode_GET_LENGTH(PyObject *op) { + return _PyASCIIObject_CAST(op)->length; +} +#define PyUnicode_GET_LENGTH(op) PyUnicode_GET_LENGTH(_PyObject_CAST(op)) + +/* Write into the canonical representation, this function does not do any sanity + checks and is intended for usage in loops. The caller should cache the + kind and data pointers obtained from other function calls. + index is the index in the string (starts at 0) and value is the new + code point value which should be written to that location. */ +static inline void PyUnicode_WRITE(int kind, void *data, + Py_ssize_t index, Py_UCS4 value) +{ + assert(index >= 0); + if (kind == PyUnicode_1BYTE_KIND) { + assert(value <= 0xffU); + _Py_STATIC_CAST(Py_UCS1*, data)[index] = _Py_STATIC_CAST(Py_UCS1, value); + } + else if (kind == PyUnicode_2BYTE_KIND) { + assert(value <= 0xffffU); + _Py_STATIC_CAST(Py_UCS2*, data)[index] = _Py_STATIC_CAST(Py_UCS2, value); + } + else { + assert(kind == PyUnicode_4BYTE_KIND); + assert(value <= 0x10ffffU); + _Py_STATIC_CAST(Py_UCS4*, data)[index] = value; + } +} +#define PyUnicode_WRITE(kind, data, index, value) \ + PyUnicode_WRITE(_Py_STATIC_CAST(int, kind), _Py_CAST(void*, data), \ + (index), _Py_STATIC_CAST(Py_UCS4, value)) + +/* Read a code point from the string's canonical representation. No checks + or ready calls are performed. */ +static inline Py_UCS4 PyUnicode_READ(int kind, + const void *data, Py_ssize_t index) +{ + assert(index >= 0); + if (kind == PyUnicode_1BYTE_KIND) { + return _Py_STATIC_CAST(const Py_UCS1*, data)[index]; + } + if (kind == PyUnicode_2BYTE_KIND) { + return _Py_STATIC_CAST(const Py_UCS2*, data)[index]; + } + assert(kind == PyUnicode_4BYTE_KIND); + return _Py_STATIC_CAST(const Py_UCS4*, data)[index]; +} +#define PyUnicode_READ(kind, data, index) \ + PyUnicode_READ(_Py_STATIC_CAST(int, kind), \ + _Py_STATIC_CAST(const void*, data), \ + (index)) + +/* PyUnicode_READ_CHAR() is less efficient than PyUnicode_READ() because it + calls PyUnicode_KIND() and might call it twice. For single reads, use + PyUnicode_READ_CHAR, for multiple consecutive reads callers should + cache kind and use PyUnicode_READ instead. */ +static inline Py_UCS4 PyUnicode_READ_CHAR(PyObject *unicode, Py_ssize_t index) +{ + int kind; + + assert(index >= 0); + // Tolerate reading the NUL character at str[len(str)] + assert(index <= PyUnicode_GET_LENGTH(unicode)); + + kind = PyUnicode_KIND(unicode); + if (kind == PyUnicode_1BYTE_KIND) { + return PyUnicode_1BYTE_DATA(unicode)[index]; + } + if (kind == PyUnicode_2BYTE_KIND) { + return PyUnicode_2BYTE_DATA(unicode)[index]; + } + assert(kind == PyUnicode_4BYTE_KIND); + return PyUnicode_4BYTE_DATA(unicode)[index]; +} +#define PyUnicode_READ_CHAR(unicode, index) \ + PyUnicode_READ_CHAR(_PyObject_CAST(unicode), (index)) + +/* Return a maximum character value which is suitable for creating another + string based on op. This is always an approximation but more efficient + than iterating over the string. */ +static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op) +{ + int kind; + + if (PyUnicode_IS_ASCII(op)) { + return 0x7fU; + } + + kind = PyUnicode_KIND(op); + if (kind == PyUnicode_1BYTE_KIND) { + return 0xffU; + } + if (kind == PyUnicode_2BYTE_KIND) { + return 0xffffU; + } + assert(kind == PyUnicode_4BYTE_KIND); + return 0x10ffffU; +} +#define PyUnicode_MAX_CHAR_VALUE(op) \ + PyUnicode_MAX_CHAR_VALUE(_PyObject_CAST(op)) + + +/* === Public API ========================================================= */ + +/* With PEP 393, this is the recommended way to allocate a new unicode object. + This function will allocate the object and its buffer in a single memory + block. Objects created using this function are not resizable. */ +PyAPI_FUNC(PyObject*) PyUnicode_New( + Py_ssize_t size, /* Number of code points in the new string */ + Py_UCS4 maxchar /* maximum code point value in the string */ + ); + +/* For backward compatibility */ +static inline int PyUnicode_READY(PyObject* Py_UNUSED(op)) +{ + return 0; +} +#define PyUnicode_READY(op) PyUnicode_READY(_PyObject_CAST(op)) + +/* Copy character from one unicode object into another, this function performs + character conversion when necessary and falls back to memcpy() if possible. + + Fail if to is too small (smaller than *how_many* or smaller than + len(from)-from_start), or if kind(from[from_start:from_start+how_many]) > + kind(to), or if *to* has more than 1 reference. + + Return the number of written character, or return -1 and raise an exception + on error. + + Pseudo-code: + + how_many = min(how_many, len(from) - from_start) + to[to_start:to_start+how_many] = from[from_start:from_start+how_many] + return how_many + + Note: The function doesn't write a terminating null character. + */ +PyAPI_FUNC(Py_ssize_t) PyUnicode_CopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); + +/* Fill a string with a character: write fill_char into + unicode[start:start+length]. + + Fail if fill_char is bigger than the string maximum character, or if the + string has more than 1 reference. + + Return the number of written character, or return -1 and raise an exception + on error. */ +PyAPI_FUNC(Py_ssize_t) PyUnicode_Fill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); + +/* Create a new string from a buffer of Py_UCS1, Py_UCS2 or Py_UCS4 characters. + Scan the string to find the maximum character. */ +PyAPI_FUNC(PyObject*) PyUnicode_FromKindAndData( + int kind, + const void *buffer, + Py_ssize_t size); + + +/* --- _PyUnicodeWriter API ----------------------------------------------- */ + +typedef struct { + PyObject *buffer; + void *data; + int kind; + Py_UCS4 maxchar; + Py_ssize_t size; + Py_ssize_t pos; + + /* minimum number of allocated characters (default: 0) */ + Py_ssize_t min_length; + + /* minimum character (default: 127, ASCII) */ + Py_UCS4 min_char; + + /* If non-zero, overallocate the buffer (default: 0). */ + unsigned char overallocate; + + /* If readonly is 1, buffer is a shared string (cannot be modified) + and size is set to 0. */ + unsigned char readonly; +} _PyUnicodeWriter ; + +// Initialize a Unicode writer. +// +// By default, the minimum buffer size is 0 character and overallocation is +// disabled. Set min_length, min_char and overallocate attributes to control +// the allocation of the buffer. +PyAPI_FUNC(void) +_PyUnicodeWriter_Init(_PyUnicodeWriter *writer); + +/* Prepare the buffer to write 'length' characters + with the specified maximum character. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR) \ + (((MAXCHAR) <= (WRITER)->maxchar \ + && (LENGTH) <= (WRITER)->size - (WRITER)->pos) \ + ? 0 \ + : (((LENGTH) == 0) \ + ? 0 \ + : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR)))) + +/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro + instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer, + Py_ssize_t length, Py_UCS4 maxchar); + +/* Prepare the buffer to have at least the kind KIND. + For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will + support characters in range U+000-U+FFFF. + + Return 0 on success, raise an exception and return -1 on error. */ +#define _PyUnicodeWriter_PrepareKind(WRITER, KIND) \ + ((KIND) <= (WRITER)->kind \ + ? 0 \ + : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND))) + +/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind() + macro instead. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer, + int kind); + +/* Append a Unicode character. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer, + Py_UCS4 ch + ); + +/* Append a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer, + PyObject *str /* Unicode string */ + ); + +/* Append a substring of a Unicode string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer, + PyObject *str, /* Unicode string */ + Py_ssize_t start, + Py_ssize_t end + ); + +/* Append an ASCII-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer, + const char *str, /* ASCII-encoded byte string */ + Py_ssize_t len /* number of bytes, or -1 if unknown */ + ); + +/* Append a latin1-encoded byte string. + Return 0 on success, raise an exception and return -1 on error. */ +PyAPI_FUNC(int) +_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer, + const char *str, /* latin1-encoded byte string */ + Py_ssize_t len /* length in bytes */ + ); + +/* Get the value of the writer as a Unicode string. Clear the + buffer of the writer. Raise an exception and return NULL + on error. */ +PyAPI_FUNC(PyObject *) +_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) +_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer); + + +/* --- Manage the default encoding ---------------------------------------- */ + +/* Returns a pointer to the default encoding (UTF-8) of the + Unicode object unicode. + + Like PyUnicode_AsUTF8AndSize(), this also caches the UTF-8 representation + in the unicodeobject. + + _PyUnicode_AsString is a #define for PyUnicode_AsUTF8 to + support the previous internal function with the same behaviour. + + Use of this API is DEPRECATED since no size information can be + extracted from the returned data. +*/ + +PyAPI_FUNC(const char *) PyUnicode_AsUTF8(PyObject *unicode); + +// Alias kept for backward compatibility +#define _PyUnicode_AsString PyUnicode_AsUTF8 + + +/* === Characters Type APIs =============================================== */ + +/* These should not be used directly. Use the Py_UNICODE_IS* and + Py_UNICODE_TO* macros instead. + + These APIs are implemented in Objects/unicodectype.c. + +*/ + +PyAPI_FUNC(int) _PyUnicode_IsLowercase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsUppercase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsTitlecase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsWhitespace( + const Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsLinebreak( + const Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_ToDecimalDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_ToDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(double) _PyUnicode_ToNumeric( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsDecimalDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsDigit( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsNumeric( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsPrintable( + Py_UCS4 ch /* Unicode character */ + ); + +PyAPI_FUNC(int) _PyUnicode_IsAlpha( + Py_UCS4 ch /* Unicode character */ + ); + +// Helper array used by Py_UNICODE_ISSPACE(). +PyAPI_DATA(const unsigned char) _Py_ascii_whitespace[]; + +// Since splitting on whitespace is an important use case, and +// whitespace in most situations is solely ASCII whitespace, we +// optimize for the common case by using a quick look-up table +// _Py_ascii_whitespace (see below) with an inlined check. +static inline int Py_UNICODE_ISSPACE(Py_UCS4 ch) { + if (ch < 128) { + return _Py_ascii_whitespace[ch]; + } + return _PyUnicode_IsWhitespace(ch); +} + +#define Py_UNICODE_ISLOWER(ch) _PyUnicode_IsLowercase(ch) +#define Py_UNICODE_ISUPPER(ch) _PyUnicode_IsUppercase(ch) +#define Py_UNICODE_ISTITLE(ch) _PyUnicode_IsTitlecase(ch) +#define Py_UNICODE_ISLINEBREAK(ch) _PyUnicode_IsLinebreak(ch) + +#define Py_UNICODE_TOLOWER(ch) _PyUnicode_ToLowercase(ch) +#define Py_UNICODE_TOUPPER(ch) _PyUnicode_ToUppercase(ch) +#define Py_UNICODE_TOTITLE(ch) _PyUnicode_ToTitlecase(ch) + +#define Py_UNICODE_ISDECIMAL(ch) _PyUnicode_IsDecimalDigit(ch) +#define Py_UNICODE_ISDIGIT(ch) _PyUnicode_IsDigit(ch) +#define Py_UNICODE_ISNUMERIC(ch) _PyUnicode_IsNumeric(ch) +#define Py_UNICODE_ISPRINTABLE(ch) _PyUnicode_IsPrintable(ch) + +#define Py_UNICODE_TODECIMAL(ch) _PyUnicode_ToDecimalDigit(ch) +#define Py_UNICODE_TODIGIT(ch) _PyUnicode_ToDigit(ch) +#define Py_UNICODE_TONUMERIC(ch) _PyUnicode_ToNumeric(ch) + +#define Py_UNICODE_ISALPHA(ch) _PyUnicode_IsAlpha(ch) + +static inline int Py_UNICODE_ISALNUM(Py_UCS4 ch) { + return (Py_UNICODE_ISALPHA(ch) + || Py_UNICODE_ISDECIMAL(ch) + || Py_UNICODE_ISDIGIT(ch) + || Py_UNICODE_ISNUMERIC(ch)); +} + + +/* === Misc functions ===================================================== */ + +// Return an interned Unicode object for an Identifier; may fail if there is no +// memory. +PyAPI_FUNC(PyObject*) _PyUnicode_FromId(_Py_Identifier*); diff --git a/Include/cpython/warnings.h b/Include/cpython/warnings.h new file mode 100644 index 0000000000000000000000000000000000000000..8731fd2e96b716fd87d988d40320e0ab4b15651f --- /dev/null +++ b/Include/cpython/warnings.h @@ -0,0 +1,26 @@ +#ifndef Py_CPYTHON_WARNINGS_H +# error "this header file must not be included directly" +#endif + +PyAPI_FUNC(int) PyErr_WarnExplicitObject( + PyObject *category, + PyObject *message, + PyObject *filename, + int lineno, + PyObject *module, + PyObject *registry); + +PyAPI_FUNC(int) PyErr_WarnExplicitFormat( + PyObject *category, + const char *filename, int lineno, + const char *module, PyObject *registry, + const char *format, ...); + +// DEPRECATED: Use PyErr_WarnEx() instead. +#define PyErr_Warn(category, msg) PyErr_WarnEx((category), (msg), 1) + +int _PyErr_WarnExplicitObjectWithContext( + PyObject *category, + PyObject *message, + PyObject *filename, + int lineno); diff --git a/Include/cpython/weakrefobject.h b/Include/cpython/weakrefobject.h new file mode 100644 index 0000000000000000000000000000000000000000..28acf7265a08563af3de1deaf71cea08a6ec08d7 --- /dev/null +++ b/Include/cpython/weakrefobject.h @@ -0,0 +1,63 @@ +#ifndef Py_CPYTHON_WEAKREFOBJECT_H +# error "this header file must not be included directly" +#endif + +/* PyWeakReference is the base struct for the Python ReferenceType, ProxyType, + * and CallableProxyType. + */ +struct _PyWeakReference { + PyObject_HEAD + + /* The object to which this is a weak reference, or Py_None if none. + * Note that this is a stealth reference: wr_object's refcount is + * not incremented to reflect this pointer. + */ + PyObject *wr_object; + + /* A callable to invoke when wr_object dies, or NULL if none. */ + PyObject *wr_callback; + + /* A cache for wr_object's hash code. As usual for hashes, this is -1 + * if the hash code isn't known yet. + */ + Py_hash_t hash; + + /* If wr_object is weakly referenced, wr_object has a doubly-linked NULL- + * terminated list of weak references to it. These are the list pointers. + * If wr_object goes away, wr_object is set to Py_None, and these pointers + * have no meaning then. + */ + PyWeakReference *wr_prev; + PyWeakReference *wr_next; + vectorcallfunc vectorcall; + +#ifdef Py_GIL_DISABLED + /* Pointer to the lock used when clearing in free-threaded builds. + * Normally this can be derived from wr_object, but in some cases we need + * to lock after wr_object has been set to Py_None. + */ + PyMutex *weakrefs_lock; +#endif +}; + +PyAPI_FUNC(void) _PyWeakref_ClearRef(PyWeakReference *self); + +Py_DEPRECATED(3.13) static inline PyObject* PyWeakref_GET_OBJECT(PyObject *ref_obj) +{ + PyWeakReference *ref; + PyObject *obj; + assert(PyWeakref_Check(ref_obj)); + ref = _Py_CAST(PyWeakReference*, ref_obj); + obj = ref->wr_object; + // Explanation for the Py_REFCNT() check: when a weakref's target is part + // of a long chain of deallocations which triggers the trashcan mechanism, + // clearing the weakrefs can be delayed long after the target's refcount + // has dropped to zero. In the meantime, code accessing the weakref will + // be able to "see" the target object even though it is supposed to be + // unreachable. See issue gh-60806. + if (Py_REFCNT(obj) > 0) { + return obj; + } + return Py_None; +} +#define PyWeakref_GET_OBJECT(ref) PyWeakref_GET_OBJECT(_PyObject_CAST(ref)) diff --git a/Include/critical_section.h b/Include/critical_section.h new file mode 100644 index 0000000000000000000000000000000000000000..3b37615a8b17e2a457b2043c7f24165984e8a4ef --- /dev/null +++ b/Include/critical_section.h @@ -0,0 +1,16 @@ +#ifndef Py_CRITICAL_SECTION_H +#define Py_CRITICAL_SECTION_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_CRITICAL_SECTION_H +# include "cpython/critical_section.h" +# undef Py_CPYTHON_CRITICAL_SECTION_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CRITICAL_SECTION_H */ diff --git a/Include/datetime.h b/Include/datetime.h new file mode 100644 index 0000000000000000000000000000000000000000..b78cc0e8e2e5accd5952438b9fb09c056cc057b7 --- /dev/null +++ b/Include/datetime.h @@ -0,0 +1,267 @@ +/* datetime.h + */ +#ifndef Py_LIMITED_API +#ifndef DATETIME_H +#define DATETIME_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Fields are packed into successive bytes, each viewed as unsigned and + * big-endian, unless otherwise noted: + * + * byte offset + * 0 year 2 bytes, 1-9999 + * 2 month 1 byte, 1-12 + * 3 day 1 byte, 1-31 + * 4 hour 1 byte, 0-23 + * 5 minute 1 byte, 0-59 + * 6 second 1 byte, 0-59 + * 7 usecond 3 bytes, 0-999999 + * 10 + */ + +/* # of bytes for year, month, and day. */ +#define _PyDateTime_DATE_DATASIZE 4 + +/* # of bytes for hour, minute, second, and usecond. */ +#define _PyDateTime_TIME_DATASIZE 6 + +/* # of bytes for year, month, day, hour, minute, second, and usecond. */ +#define _PyDateTime_DATETIME_DATASIZE 10 + + +typedef struct +{ + PyObject_HEAD + Py_hash_t hashcode; /* -1 when unknown */ + int days; /* -MAX_DELTA_DAYS <= days <= MAX_DELTA_DAYS */ + int seconds; /* 0 <= seconds < 24*3600 is invariant */ + int microseconds; /* 0 <= microseconds < 1000000 is invariant */ +} PyDateTime_Delta; + +typedef struct +{ + PyObject_HEAD /* a pure abstract base class */ +} PyDateTime_TZInfo; + + +/* The datetime and time types have hashcodes, and an optional tzinfo member, + * present if and only if hastzinfo is true. + */ +#define _PyTZINFO_HEAD \ + PyObject_HEAD \ + Py_hash_t hashcode; \ + char hastzinfo; /* boolean flag */ + +/* No _PyDateTime_BaseTZInfo is allocated; it's just to have something + * convenient to cast to, when getting at the hastzinfo member of objects + * starting with _PyTZINFO_HEAD. + */ +typedef struct +{ + _PyTZINFO_HEAD +} _PyDateTime_BaseTZInfo; + +/* All time objects are of PyDateTime_TimeType, but that can be allocated + * in two ways, with or without a tzinfo member. Without is the same as + * tzinfo == None, but consumes less memory. _PyDateTime_BaseTime is an + * internal struct used to allocate the right amount of space for the + * "without" case. + */ +#define _PyDateTime_TIMEHEAD \ + _PyTZINFO_HEAD \ + unsigned char data[_PyDateTime_TIME_DATASIZE]; + +typedef struct +{ + _PyDateTime_TIMEHEAD +} _PyDateTime_BaseTime; /* hastzinfo false */ + +typedef struct +{ + _PyDateTime_TIMEHEAD + unsigned char fold; + PyObject *tzinfo; +} PyDateTime_Time; /* hastzinfo true */ + + +/* All datetime objects are of PyDateTime_DateTimeType, but that can be + * allocated in two ways too, just like for time objects above. In addition, + * the plain date type is a base class for datetime, so it must also have + * a hastzinfo member (although it's unused there). + */ +typedef struct +{ + _PyTZINFO_HEAD + unsigned char data[_PyDateTime_DATE_DATASIZE]; +} PyDateTime_Date; + +#define _PyDateTime_DATETIMEHEAD \ + _PyTZINFO_HEAD \ + unsigned char data[_PyDateTime_DATETIME_DATASIZE]; + +typedef struct +{ + _PyDateTime_DATETIMEHEAD +} _PyDateTime_BaseDateTime; /* hastzinfo false */ + +typedef struct +{ + _PyDateTime_DATETIMEHEAD + unsigned char fold; + PyObject *tzinfo; +} PyDateTime_DateTime; /* hastzinfo true */ + + +/* Apply for date and datetime instances. */ + +// o is a pointer to a time or a datetime object. +#define _PyDateTime_HAS_TZINFO(o) (((_PyDateTime_BaseTZInfo *)(o))->hastzinfo) + +#define PyDateTime_GET_YEAR(o) ((((PyDateTime_Date*)(o))->data[0] << 8) | \ + ((PyDateTime_Date*)(o))->data[1]) +#define PyDateTime_GET_MONTH(o) (((PyDateTime_Date*)(o))->data[2]) +#define PyDateTime_GET_DAY(o) (((PyDateTime_Date*)(o))->data[3]) + +#define PyDateTime_DATE_GET_HOUR(o) (((PyDateTime_DateTime*)(o))->data[4]) +#define PyDateTime_DATE_GET_MINUTE(o) (((PyDateTime_DateTime*)(o))->data[5]) +#define PyDateTime_DATE_GET_SECOND(o) (((PyDateTime_DateTime*)(o))->data[6]) +#define PyDateTime_DATE_GET_MICROSECOND(o) \ + ((((PyDateTime_DateTime*)(o))->data[7] << 16) | \ + (((PyDateTime_DateTime*)(o))->data[8] << 8) | \ + ((PyDateTime_DateTime*)(o))->data[9]) +#define PyDateTime_DATE_GET_FOLD(o) (((PyDateTime_DateTime*)(o))->fold) +#define PyDateTime_DATE_GET_TZINFO(o) (_PyDateTime_HAS_TZINFO((o)) ? \ + ((PyDateTime_DateTime *)(o))->tzinfo : Py_None) + +/* Apply for time instances. */ +#define PyDateTime_TIME_GET_HOUR(o) (((PyDateTime_Time*)(o))->data[0]) +#define PyDateTime_TIME_GET_MINUTE(o) (((PyDateTime_Time*)(o))->data[1]) +#define PyDateTime_TIME_GET_SECOND(o) (((PyDateTime_Time*)(o))->data[2]) +#define PyDateTime_TIME_GET_MICROSECOND(o) \ + ((((PyDateTime_Time*)(o))->data[3] << 16) | \ + (((PyDateTime_Time*)(o))->data[4] << 8) | \ + ((PyDateTime_Time*)(o))->data[5]) +#define PyDateTime_TIME_GET_FOLD(o) (((PyDateTime_Time*)(o))->fold) +#define PyDateTime_TIME_GET_TZINFO(o) (_PyDateTime_HAS_TZINFO(o) ? \ + ((PyDateTime_Time *)(o))->tzinfo : Py_None) + +/* Apply for time delta instances */ +#define PyDateTime_DELTA_GET_DAYS(o) (((PyDateTime_Delta*)(o))->days) +#define PyDateTime_DELTA_GET_SECONDS(o) (((PyDateTime_Delta*)(o))->seconds) +#define PyDateTime_DELTA_GET_MICROSECONDS(o) \ + (((PyDateTime_Delta*)(o))->microseconds) + + +/* Define structure for C API. */ +typedef struct { + /* type objects */ + PyTypeObject *DateType; + PyTypeObject *DateTimeType; + PyTypeObject *TimeType; + PyTypeObject *DeltaType; + PyTypeObject *TZInfoType; + + /* singletons */ + PyObject *TimeZone_UTC; + + /* constructors */ + PyObject *(*Date_FromDate)(int, int, int, PyTypeObject*); + PyObject *(*DateTime_FromDateAndTime)(int, int, int, int, int, int, int, + PyObject*, PyTypeObject*); + PyObject *(*Time_FromTime)(int, int, int, int, PyObject*, PyTypeObject*); + PyObject *(*Delta_FromDelta)(int, int, int, int, PyTypeObject*); + PyObject *(*TimeZone_FromTimeZone)(PyObject *offset, PyObject *name); + + /* constructors for the DB API */ + PyObject *(*DateTime_FromTimestamp)(PyObject*, PyObject*, PyObject*); + PyObject *(*Date_FromTimestamp)(PyObject*, PyObject*); + + /* PEP 495 constructors */ + PyObject *(*DateTime_FromDateAndTimeAndFold)(int, int, int, int, int, int, int, + PyObject*, int, PyTypeObject*); + PyObject *(*Time_FromTimeAndFold)(int, int, int, int, PyObject*, int, PyTypeObject*); + +} PyDateTime_CAPI; + +#define PyDateTime_CAPSULE_NAME "datetime.datetime_CAPI" + + +/* This block is only used as part of the public API and should not be + * included in _datetimemodule.c, which does not use the C API capsule. + * See bpo-35081 for more details. + * */ +#ifndef _PY_DATETIME_IMPL +/* Define global variable for the C API and a macro for setting it. */ +static PyDateTime_CAPI *PyDateTimeAPI = NULL; + +#define PyDateTime_IMPORT \ + PyDateTimeAPI = (PyDateTime_CAPI *)PyCapsule_Import(PyDateTime_CAPSULE_NAME, 0) + +/* Macro for access to the UTC singleton */ +#define PyDateTime_TimeZone_UTC PyDateTimeAPI->TimeZone_UTC + +/* Macros for type checking when not building the Python core. */ +#define PyDate_Check(op) PyObject_TypeCheck((op), PyDateTimeAPI->DateType) +#define PyDate_CheckExact(op) Py_IS_TYPE((op), PyDateTimeAPI->DateType) + +#define PyDateTime_Check(op) PyObject_TypeCheck((op), PyDateTimeAPI->DateTimeType) +#define PyDateTime_CheckExact(op) Py_IS_TYPE((op), PyDateTimeAPI->DateTimeType) + +#define PyTime_Check(op) PyObject_TypeCheck((op), PyDateTimeAPI->TimeType) +#define PyTime_CheckExact(op) Py_IS_TYPE((op), PyDateTimeAPI->TimeType) + +#define PyDelta_Check(op) PyObject_TypeCheck((op), PyDateTimeAPI->DeltaType) +#define PyDelta_CheckExact(op) Py_IS_TYPE((op), PyDateTimeAPI->DeltaType) + +#define PyTZInfo_Check(op) PyObject_TypeCheck((op), PyDateTimeAPI->TZInfoType) +#define PyTZInfo_CheckExact(op) Py_IS_TYPE((op), PyDateTimeAPI->TZInfoType) + + +/* Macros for accessing constructors in a simplified fashion. */ +#define PyDate_FromDate(year, month, day) \ + PyDateTimeAPI->Date_FromDate((year), (month), (day), PyDateTimeAPI->DateType) + +#define PyDateTime_FromDateAndTime(year, month, day, hour, min, sec, usec) \ + PyDateTimeAPI->DateTime_FromDateAndTime((year), (month), (day), (hour), \ + (min), (sec), (usec), Py_None, PyDateTimeAPI->DateTimeType) + +#define PyDateTime_FromDateAndTimeAndFold(year, month, day, hour, min, sec, usec, fold) \ + PyDateTimeAPI->DateTime_FromDateAndTimeAndFold((year), (month), (day), (hour), \ + (min), (sec), (usec), Py_None, (fold), PyDateTimeAPI->DateTimeType) + +#define PyTime_FromTime(hour, minute, second, usecond) \ + PyDateTimeAPI->Time_FromTime((hour), (minute), (second), (usecond), \ + Py_None, PyDateTimeAPI->TimeType) + +#define PyTime_FromTimeAndFold(hour, minute, second, usecond, fold) \ + PyDateTimeAPI->Time_FromTimeAndFold((hour), (minute), (second), (usecond), \ + Py_None, (fold), PyDateTimeAPI->TimeType) + +#define PyDelta_FromDSU(days, seconds, useconds) \ + PyDateTimeAPI->Delta_FromDelta((days), (seconds), (useconds), 1, \ + PyDateTimeAPI->DeltaType) + +#define PyTimeZone_FromOffset(offset) \ + PyDateTimeAPI->TimeZone_FromTimeZone((offset), NULL) + +#define PyTimeZone_FromOffsetAndName(offset, name) \ + PyDateTimeAPI->TimeZone_FromTimeZone((offset), (name)) + +/* Macros supporting the DB API. */ +#define PyDateTime_FromTimestamp(args) \ + PyDateTimeAPI->DateTime_FromTimestamp( \ + (PyObject*) (PyDateTimeAPI->DateTimeType), (args), NULL) + +#define PyDate_FromTimestamp(args) \ + PyDateTimeAPI->Date_FromTimestamp( \ + (PyObject*) (PyDateTimeAPI->DateType), (args)) + +#endif /* !defined(_PY_DATETIME_IMPL) */ + +#ifdef __cplusplus +} +#endif +#endif +#endif /* !Py_LIMITED_API */ diff --git a/Include/descrobject.h b/Include/descrobject.h new file mode 100644 index 0000000000000000000000000000000000000000..fd66d17b497a31f295d5791c49fb879fad8883cf --- /dev/null +++ b/Include/descrobject.h @@ -0,0 +1,100 @@ +/* Descriptors */ +#ifndef Py_DESCROBJECT_H +#define Py_DESCROBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef PyObject *(*getter)(PyObject *, void *); +typedef int (*setter)(PyObject *, PyObject *, void *); + +struct PyGetSetDef { + const char *name; + getter get; + setter set; + const char *doc; + void *closure; +}; + +PyAPI_DATA(PyTypeObject) PyClassMethodDescr_Type; +PyAPI_DATA(PyTypeObject) PyGetSetDescr_Type; +PyAPI_DATA(PyTypeObject) PyMemberDescr_Type; +PyAPI_DATA(PyTypeObject) PyMethodDescr_Type; +PyAPI_DATA(PyTypeObject) PyWrapperDescr_Type; +PyAPI_DATA(PyTypeObject) PyDictProxy_Type; +PyAPI_DATA(PyTypeObject) PyProperty_Type; + +PyAPI_FUNC(PyObject *) PyDescr_NewMethod(PyTypeObject *, PyMethodDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewClassMethod(PyTypeObject *, PyMethodDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewMember(PyTypeObject *, PyMemberDef *); +PyAPI_FUNC(PyObject *) PyDescr_NewGetSet(PyTypeObject *, PyGetSetDef *); + +PyAPI_FUNC(PyObject *) PyDictProxy_New(PyObject *); +PyAPI_FUNC(PyObject *) PyWrapper_New(PyObject *, PyObject *); + + +/* An array of PyMemberDef structures defines the name, type and offset + of selected members of a C structure. These can be read by + PyMember_GetOne() and set by PyMember_SetOne() (except if their READONLY + flag is set). The array must be terminated with an entry whose name + pointer is NULL. */ +struct PyMemberDef { + const char *name; + int type; + Py_ssize_t offset; + int flags; + const char *doc; +}; + +// These constants used to be in structmember.h, not prefixed by Py_. +// (structmember.h now has aliases to the new names.) + +/* Types */ +#define Py_T_SHORT 0 +#define Py_T_INT 1 +#define Py_T_LONG 2 +#define Py_T_FLOAT 3 +#define Py_T_DOUBLE 4 +#define Py_T_STRING 5 +#define _Py_T_OBJECT 6 // Deprecated, use Py_T_OBJECT_EX instead +/* the ordering here is weird for binary compatibility */ +#define Py_T_CHAR 7 /* 1-character string */ +#define Py_T_BYTE 8 /* 8-bit signed int */ +/* unsigned variants: */ +#define Py_T_UBYTE 9 +#define Py_T_USHORT 10 +#define Py_T_UINT 11 +#define Py_T_ULONG 12 + +/* Added by Jack: strings contained in the structure */ +#define Py_T_STRING_INPLACE 13 + +/* Added by Lillo: bools contained in the structure (assumed char) */ +#define Py_T_BOOL 14 + +#define Py_T_OBJECT_EX 16 +#define Py_T_LONGLONG 17 +#define Py_T_ULONGLONG 18 + +#define Py_T_PYSSIZET 19 /* Py_ssize_t */ +#define _Py_T_NONE 20 // Deprecated. Value is always None. + +/* Flags */ +#define Py_READONLY 1 +#define Py_AUDIT_READ 2 // Added in 3.10, harmless no-op before that +#define _Py_WRITE_RESTRICTED 4 // Deprecated, no-op. Do not reuse the value. +#define Py_RELATIVE_OFFSET 8 + +PyAPI_FUNC(PyObject *) PyMember_GetOne(const char *, PyMemberDef *); +PyAPI_FUNC(int) PyMember_SetOne(char *, PyMemberDef *, PyObject *); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_DESCROBJECT_H +# include "cpython/descrobject.h" +# undef Py_CPYTHON_DESCROBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_DESCROBJECT_H */ diff --git a/Include/dictobject.h b/Include/dictobject.h new file mode 100644 index 0000000000000000000000000000000000000000..1bbeec1ab699e7581d592bce151186582b6c3c82 --- /dev/null +++ b/Include/dictobject.h @@ -0,0 +1,108 @@ +#ifndef Py_DICTOBJECT_H +#define Py_DICTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Dictionary object type -- mapping from hashable object to object */ + +/* The distribution includes a separate file, Objects/dictnotes.txt, + describing explorations into dictionary design and optimization. + It covers typical dictionary use patterns, the parameters for + tuning dictionaries, and several ideas for possible optimizations. +*/ + +PyAPI_DATA(PyTypeObject) PyDict_Type; + +#define PyDict_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_DICT_SUBCLASS) +#define PyDict_CheckExact(op) Py_IS_TYPE((op), &PyDict_Type) + +PyAPI_FUNC(PyObject *) PyDict_New(void); +PyAPI_FUNC(PyObject *) PyDict_GetItem(PyObject *mp, PyObject *key); +PyAPI_FUNC(PyObject *) PyDict_GetItemWithError(PyObject *mp, PyObject *key); +PyAPI_FUNC(int) PyDict_SetItem(PyObject *mp, PyObject *key, PyObject *item); +PyAPI_FUNC(int) PyDict_DelItem(PyObject *mp, PyObject *key); +PyAPI_FUNC(void) PyDict_Clear(PyObject *mp); +PyAPI_FUNC(int) PyDict_Next( + PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value); +PyAPI_FUNC(PyObject *) PyDict_Keys(PyObject *mp); +PyAPI_FUNC(PyObject *) PyDict_Values(PyObject *mp); +PyAPI_FUNC(PyObject *) PyDict_Items(PyObject *mp); +PyAPI_FUNC(Py_ssize_t) PyDict_Size(PyObject *mp); +PyAPI_FUNC(PyObject *) PyDict_Copy(PyObject *mp); +PyAPI_FUNC(int) PyDict_Contains(PyObject *mp, PyObject *key); + +/* PyDict_Update(mp, other) is equivalent to PyDict_Merge(mp, other, 1). */ +PyAPI_FUNC(int) PyDict_Update(PyObject *mp, PyObject *other); + +/* PyDict_Merge updates/merges from a mapping object (an object that + supports PyMapping_Keys() and PyObject_GetItem()). If override is true, + the last occurrence of a key wins, else the first. The Python + dict.update(other) is equivalent to PyDict_Merge(dict, other, 1). +*/ +PyAPI_FUNC(int) PyDict_Merge(PyObject *mp, + PyObject *other, + int override); + +/* PyDict_MergeFromSeq2 updates/merges from an iterable object producing + iterable objects of length 2. If override is true, the last occurrence + of a key wins, else the first. The Python dict constructor dict(seq2) + is equivalent to dict={}; PyDict_MergeFromSeq(dict, seq2, 1). +*/ +PyAPI_FUNC(int) PyDict_MergeFromSeq2(PyObject *d, + PyObject *seq2, + int override); + +PyAPI_FUNC(PyObject *) PyDict_GetItemString(PyObject *dp, const char *key); +PyAPI_FUNC(int) PyDict_SetItemString(PyObject *dp, const char *key, PyObject *item); +PyAPI_FUNC(int) PyDict_DelItemString(PyObject *dp, const char *key); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000 +// Return the object from dictionary *op* which has a key *key*. +// - If the key is present, set *result to a new strong reference to the value +// and return 1. +// - If the key is missing, set *result to NULL and return 0 . +// - On error, raise an exception and return -1. +PyAPI_FUNC(int) PyDict_GetItemRef(PyObject *mp, PyObject *key, PyObject **result); +PyAPI_FUNC(int) PyDict_GetItemStringRef(PyObject *mp, const char *key, PyObject **result); +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 +PyAPI_FUNC(PyObject *) PyObject_GenericGetDict(PyObject *, void *); +#endif + +/* Dictionary (keys, values, items) views */ + +PyAPI_DATA(PyTypeObject) PyDictKeys_Type; +PyAPI_DATA(PyTypeObject) PyDictValues_Type; +PyAPI_DATA(PyTypeObject) PyDictItems_Type; + +#define PyDictKeys_Check(op) PyObject_TypeCheck((op), &PyDictKeys_Type) +#define PyDictValues_Check(op) PyObject_TypeCheck((op), &PyDictValues_Type) +#define PyDictItems_Check(op) PyObject_TypeCheck((op), &PyDictItems_Type) +/* This excludes Values, since they are not sets. */ +# define PyDictViewSet_Check(op) \ + (PyDictKeys_Check(op) || PyDictItems_Check(op)) + +/* Dictionary (key, value, items) iterators */ + +PyAPI_DATA(PyTypeObject) PyDictIterKey_Type; +PyAPI_DATA(PyTypeObject) PyDictIterValue_Type; +PyAPI_DATA(PyTypeObject) PyDictIterItem_Type; + +PyAPI_DATA(PyTypeObject) PyDictRevIterKey_Type; +PyAPI_DATA(PyTypeObject) PyDictRevIterItem_Type; +PyAPI_DATA(PyTypeObject) PyDictRevIterValue_Type; + + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_DICTOBJECT_H +# include "cpython/dictobject.h" +# undef Py_CPYTHON_DICTOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_DICTOBJECT_H */ diff --git a/Include/dynamic_annotations.h b/Include/dynamic_annotations.h new file mode 100644 index 0000000000000000000000000000000000000000..4d4def9bf8983e21209b598fc7cba728ca8c1d4c --- /dev/null +++ b/Include/dynamic_annotations.h @@ -0,0 +1,499 @@ +/* Copyright (c) 2008-2009, Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * --- + * Author: Kostya Serebryany + * Copied to CPython by Jeffrey Yasskin, with all macros renamed to + * start with _Py_ to avoid colliding with users embedding Python, and + * with deprecated macros removed. + */ + +/* This file defines dynamic annotations for use with dynamic analysis + tool such as valgrind, PIN, etc. + + Dynamic annotation is a source code annotation that affects + the generated code (that is, the annotation is not a comment). + Each such annotation is attached to a particular + instruction and/or to a particular object (address) in the program. + + The annotations that should be used by users are macros in all upper-case + (e.g., _Py_ANNOTATE_NEW_MEMORY). + + Actual implementation of these macros may differ depending on the + dynamic analysis tool being used. + + See https://code.google.com/p/data-race-test/ for more information. + + This file supports the following dynamic analysis tools: + - None (DYNAMIC_ANNOTATIONS_ENABLED is not defined or zero). + Macros are defined empty. + - ThreadSanitizer, Helgrind, DRD (DYNAMIC_ANNOTATIONS_ENABLED is 1). + Macros are defined as calls to non-inlinable empty functions + that are intercepted by Valgrind. */ + +#ifndef __DYNAMIC_ANNOTATIONS_H__ +#define __DYNAMIC_ANNOTATIONS_H__ + +#ifndef DYNAMIC_ANNOTATIONS_ENABLED +# define DYNAMIC_ANNOTATIONS_ENABLED 0 +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 + + /* ------------------------------------------------------------- + Annotations useful when implementing condition variables such as CondVar, + using conditional critical sections (Await/LockWhen) and when constructing + user-defined synchronization mechanisms. + + The annotations _Py_ANNOTATE_HAPPENS_BEFORE() and + _Py_ANNOTATE_HAPPENS_AFTER() can be used to define happens-before arcs in + user-defined synchronization mechanisms: the race detector will infer an + arc from the former to the latter when they share the same argument + pointer. + + Example 1 (reference counting): + + void Unref() { + _Py_ANNOTATE_HAPPENS_BEFORE(&refcount_); + if (AtomicDecrementByOne(&refcount_) == 0) { + _Py_ANNOTATE_HAPPENS_AFTER(&refcount_); + delete this; + } + } + + Example 2 (message queue): + + void MyQueue::Put(Type *e) { + MutexLock lock(&mu_); + _Py_ANNOTATE_HAPPENS_BEFORE(e); + PutElementIntoMyQueue(e); + } + + Type *MyQueue::Get() { + MutexLock lock(&mu_); + Type *e = GetElementFromMyQueue(); + _Py_ANNOTATE_HAPPENS_AFTER(e); + return e; + } + + Note: when possible, please use the existing reference counting and message + queue implementations instead of inventing new ones. */ + + /* Report that wait on the condition variable at address "cv" has succeeded + and the lock at address "lock" is held. */ +#define _Py_ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, lock) + + /* Report that wait on the condition variable at "cv" has succeeded. Variant + w/o lock. */ +#define _Py_ANNOTATE_CONDVAR_WAIT(cv) \ + AnnotateCondVarWait(__FILE__, __LINE__, cv, NULL) + + /* Report that we are about to signal on the condition variable at address + "cv". */ +#define _Py_ANNOTATE_CONDVAR_SIGNAL(cv) \ + AnnotateCondVarSignal(__FILE__, __LINE__, cv) + + /* Report that we are about to signal_all on the condition variable at "cv". */ +#define _Py_ANNOTATE_CONDVAR_SIGNAL_ALL(cv) \ + AnnotateCondVarSignalAll(__FILE__, __LINE__, cv) + + /* Annotations for user-defined synchronization mechanisms. */ +#define _Py_ANNOTATE_HAPPENS_BEFORE(obj) _Py_ANNOTATE_CONDVAR_SIGNAL(obj) +#define _Py_ANNOTATE_HAPPENS_AFTER(obj) _Py_ANNOTATE_CONDVAR_WAIT(obj) + + /* Report that the bytes in the range [pointer, pointer+size) are about + to be published safely. The race checker will create a happens-before + arc from the call _Py_ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) to + subsequent accesses to this memory. + Note: this annotation may not work properly if the race detector uses + sampling, i.e. does not observe all memory accesses. + */ +#define _Py_ANNOTATE_PUBLISH_MEMORY_RANGE(pointer, size) \ + AnnotatePublishMemoryRange(__FILE__, __LINE__, pointer, size) + + /* Instruct the tool to create a happens-before arc between mu->Unlock() and + mu->Lock(). This annotation may slow down the race detector and hide real + races. Normally it is used only when it would be difficult to annotate each + of the mutex's critical sections individually using the annotations above. + This annotation makes sense only for hybrid race detectors. For pure + happens-before detectors this is a no-op. For more details see + https://code.google.com/p/data-race-test/wiki/PureHappensBeforeVsHybrid . */ +#define _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) \ + AnnotateMutexIsUsedAsCondVar(__FILE__, __LINE__, mu) + + /* ------------------------------------------------------------- + Annotations useful when defining memory allocators, or when memory that + was protected in one way starts to be protected in another. */ + + /* Report that a new memory at "address" of size "size" has been allocated. + This might be used when the memory has been retrieved from a free list and + is about to be reused, or when the locking discipline for a variable + changes. */ +#define _Py_ANNOTATE_NEW_MEMORY(address, size) \ + AnnotateNewMemory(__FILE__, __LINE__, address, size) + + /* ------------------------------------------------------------- + Annotations useful when defining FIFO queues that transfer data between + threads. */ + + /* Report that the producer-consumer queue (such as ProducerConsumerQueue) at + address "pcq" has been created. The _Py_ANNOTATE_PCQ_* annotations should + be used only for FIFO queues. For non-FIFO queues use + _Py_ANNOTATE_HAPPENS_BEFORE (for put) and _Py_ANNOTATE_HAPPENS_AFTER (for + get). */ +#define _Py_ANNOTATE_PCQ_CREATE(pcq) \ + AnnotatePCQCreate(__FILE__, __LINE__, pcq) + + /* Report that the queue at address "pcq" is about to be destroyed. */ +#define _Py_ANNOTATE_PCQ_DESTROY(pcq) \ + AnnotatePCQDestroy(__FILE__, __LINE__, pcq) + + /* Report that we are about to put an element into a FIFO queue at address + "pcq". */ +#define _Py_ANNOTATE_PCQ_PUT(pcq) \ + AnnotatePCQPut(__FILE__, __LINE__, pcq) + + /* Report that we've just got an element from a FIFO queue at address "pcq". */ +#define _Py_ANNOTATE_PCQ_GET(pcq) \ + AnnotatePCQGet(__FILE__, __LINE__, pcq) + + /* ------------------------------------------------------------- + Annotations that suppress errors. It is usually better to express the + program's synchronization using the other annotations, but these can + be used when all else fails. */ + + /* Report that we may have a benign race at "pointer", with size + "sizeof(*(pointer))". "pointer" must be a non-void* pointer. Insert at the + point where "pointer" has been allocated, preferably close to the point + where the race happens. See also _Py_ANNOTATE_BENIGN_RACE_STATIC. */ +#define _Py_ANNOTATE_BENIGN_RACE(pointer, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, pointer, \ + sizeof(*(pointer)), description) + + /* Same as _Py_ANNOTATE_BENIGN_RACE(address, description), but applies to + the memory range [address, address+size). */ +#define _Py_ANNOTATE_BENIGN_RACE_SIZED(address, size, description) \ + AnnotateBenignRaceSized(__FILE__, __LINE__, address, size, description) + + /* Request the analysis tool to ignore all reads in the current thread + until _Py_ANNOTATE_IGNORE_READS_END is called. + Useful to ignore intentional racey reads, while still checking + other reads and all writes. + See also _Py_ANNOTATE_UNPROTECTED_READ. */ +#define _Py_ANNOTATE_IGNORE_READS_BEGIN() \ + AnnotateIgnoreReadsBegin(__FILE__, __LINE__) + + /* Stop ignoring reads. */ +#define _Py_ANNOTATE_IGNORE_READS_END() \ + AnnotateIgnoreReadsEnd(__FILE__, __LINE__) + + /* Similar to _Py_ANNOTATE_IGNORE_READS_BEGIN, but ignore writes. */ +#define _Py_ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) + + /* Stop ignoring writes. */ +#define _Py_ANNOTATE_IGNORE_WRITES_END() \ + AnnotateIgnoreWritesEnd(__FILE__, __LINE__) + + /* Start ignoring all memory accesses (reads and writes). */ +#define _Py_ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() \ + do {\ + _Py_ANNOTATE_IGNORE_READS_BEGIN();\ + _Py_ANNOTATE_IGNORE_WRITES_BEGIN();\ + }while(0)\ + + /* Stop ignoring all memory accesses. */ +#define _Py_ANNOTATE_IGNORE_READS_AND_WRITES_END() \ + do {\ + _Py_ANNOTATE_IGNORE_WRITES_END();\ + _Py_ANNOTATE_IGNORE_READS_END();\ + }while(0)\ + + /* Similar to _Py_ANNOTATE_IGNORE_READS_BEGIN, but ignore synchronization events: + RWLOCK* and CONDVAR*. */ +#define _Py_ANNOTATE_IGNORE_SYNC_BEGIN() \ + AnnotateIgnoreSyncBegin(__FILE__, __LINE__) + + /* Stop ignoring sync events. */ +#define _Py_ANNOTATE_IGNORE_SYNC_END() \ + AnnotateIgnoreSyncEnd(__FILE__, __LINE__) + + + /* Enable (enable!=0) or disable (enable==0) race detection for all threads. + This annotation could be useful if you want to skip expensive race analysis + during some period of program execution, e.g. during initialization. */ +#define _Py_ANNOTATE_ENABLE_RACE_DETECTION(enable) \ + AnnotateEnableRaceDetection(__FILE__, __LINE__, enable) + + /* ------------------------------------------------------------- + Annotations useful for debugging. */ + + /* Request to trace every access to "address". */ +#define _Py_ANNOTATE_TRACE_MEMORY(address) \ + AnnotateTraceMemory(__FILE__, __LINE__, address) + + /* Report the current thread name to a race detector. */ +#define _Py_ANNOTATE_THREAD_NAME(name) \ + AnnotateThreadName(__FILE__, __LINE__, name) + + /* ------------------------------------------------------------- + Annotations useful when implementing locks. They are not + normally needed by modules that merely use locks. + The "lock" argument is a pointer to the lock object. */ + + /* Report that a lock has been created at address "lock". */ +#define _Py_ANNOTATE_RWLOCK_CREATE(lock) \ + AnnotateRWLockCreate(__FILE__, __LINE__, lock) + + /* Report that the lock at address "lock" is about to be destroyed. */ +#define _Py_ANNOTATE_RWLOCK_DESTROY(lock) \ + AnnotateRWLockDestroy(__FILE__, __LINE__, lock) + + /* Report that the lock at address "lock" has been acquired. + is_w=1 for writer lock, is_w=0 for reader lock. */ +#define _Py_ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, lock, is_w) + + /* Report that the lock at address "lock" is about to be released. */ +#define _Py_ANNOTATE_RWLOCK_RELEASED(lock, is_w) \ + AnnotateRWLockReleased(__FILE__, __LINE__, lock, is_w) + + /* ------------------------------------------------------------- + Annotations useful when implementing barriers. They are not + normally needed by modules that merely use barriers. + The "barrier" argument is a pointer to the barrier object. */ + + /* Report that the "barrier" has been initialized with initial "count". + If 'reinitialization_allowed' is true, initialization is allowed to happen + multiple times w/o calling barrier_destroy() */ +#define _Py_ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) \ + AnnotateBarrierInit(__FILE__, __LINE__, barrier, count, \ + reinitialization_allowed) + + /* Report that we are about to enter barrier_wait("barrier"). */ +#define _Py_ANNOTATE_BARRIER_WAIT_BEFORE(barrier) \ + AnnotateBarrierWaitBefore(__FILE__, __LINE__, barrier) + + /* Report that we just exited barrier_wait("barrier"). */ +#define _Py_ANNOTATE_BARRIER_WAIT_AFTER(barrier) \ + AnnotateBarrierWaitAfter(__FILE__, __LINE__, barrier) + + /* Report that the "barrier" has been destroyed. */ +#define _Py_ANNOTATE_BARRIER_DESTROY(barrier) \ + AnnotateBarrierDestroy(__FILE__, __LINE__, barrier) + + /* ------------------------------------------------------------- + Annotations useful for testing race detectors. */ + + /* Report that we expect a race on the variable at "address". + Use only in unit tests for a race detector. */ +#define _Py_ANNOTATE_EXPECT_RACE(address, description) \ + AnnotateExpectRace(__FILE__, __LINE__, address, description) + + /* A no-op. Insert where you like to test the interceptors. */ +#define _Py_ANNOTATE_NO_OP(arg) \ + AnnotateNoOp(__FILE__, __LINE__, arg) + + /* Force the race detector to flush its state. The actual effect depends on + * the implementation of the detector. */ +#define _Py_ANNOTATE_FLUSH_STATE() \ + AnnotateFlushState(__FILE__, __LINE__) + + +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + +#define _Py_ANNOTATE_RWLOCK_CREATE(lock) /* empty */ +#define _Py_ANNOTATE_RWLOCK_DESTROY(lock) /* empty */ +#define _Py_ANNOTATE_RWLOCK_ACQUIRED(lock, is_w) /* empty */ +#define _Py_ANNOTATE_RWLOCK_RELEASED(lock, is_w) /* empty */ +#define _Py_ANNOTATE_BARRIER_INIT(barrier, count, reinitialization_allowed) /* */ +#define _Py_ANNOTATE_BARRIER_WAIT_BEFORE(barrier) /* empty */ +#define _Py_ANNOTATE_BARRIER_WAIT_AFTER(barrier) /* empty */ +#define _Py_ANNOTATE_BARRIER_DESTROY(barrier) /* empty */ +#define _Py_ANNOTATE_CONDVAR_LOCK_WAIT(cv, lock) /* empty */ +#define _Py_ANNOTATE_CONDVAR_WAIT(cv) /* empty */ +#define _Py_ANNOTATE_CONDVAR_SIGNAL(cv) /* empty */ +#define _Py_ANNOTATE_CONDVAR_SIGNAL_ALL(cv) /* empty */ +#define _Py_ANNOTATE_HAPPENS_BEFORE(obj) /* empty */ +#define _Py_ANNOTATE_HAPPENS_AFTER(obj) /* empty */ +#define _Py_ANNOTATE_PUBLISH_MEMORY_RANGE(address, size) /* empty */ +#define _Py_ANNOTATE_UNPUBLISH_MEMORY_RANGE(address, size) /* empty */ +#define _Py_ANNOTATE_SWAP_MEMORY_RANGE(address, size) /* empty */ +#define _Py_ANNOTATE_PCQ_CREATE(pcq) /* empty */ +#define _Py_ANNOTATE_PCQ_DESTROY(pcq) /* empty */ +#define _Py_ANNOTATE_PCQ_PUT(pcq) /* empty */ +#define _Py_ANNOTATE_PCQ_GET(pcq) /* empty */ +#define _Py_ANNOTATE_NEW_MEMORY(address, size) /* empty */ +#define _Py_ANNOTATE_EXPECT_RACE(address, description) /* empty */ +#define _Py_ANNOTATE_BENIGN_RACE(address, description) /* empty */ +#define _Py_ANNOTATE_BENIGN_RACE_SIZED(address, size, description) /* empty */ +#define _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX(mu) /* empty */ +#define _Py_ANNOTATE_MUTEX_IS_USED_AS_CONDVAR(mu) /* empty */ +#define _Py_ANNOTATE_TRACE_MEMORY(arg) /* empty */ +#define _Py_ANNOTATE_THREAD_NAME(name) /* empty */ +#define _Py_ANNOTATE_IGNORE_READS_BEGIN() /* empty */ +#define _Py_ANNOTATE_IGNORE_READS_END() /* empty */ +#define _Py_ANNOTATE_IGNORE_WRITES_BEGIN() /* empty */ +#define _Py_ANNOTATE_IGNORE_WRITES_END() /* empty */ +#define _Py_ANNOTATE_IGNORE_READS_AND_WRITES_BEGIN() /* empty */ +#define _Py_ANNOTATE_IGNORE_READS_AND_WRITES_END() /* empty */ +#define _Py_ANNOTATE_IGNORE_SYNC_BEGIN() /* empty */ +#define _Py_ANNOTATE_IGNORE_SYNC_END() /* empty */ +#define _Py_ANNOTATE_ENABLE_RACE_DETECTION(enable) /* empty */ +#define _Py_ANNOTATE_NO_OP(arg) /* empty */ +#define _Py_ANNOTATE_FLUSH_STATE() /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +/* Use the macros above rather than using these functions directly. */ +#ifdef __cplusplus +extern "C" { +#endif +void AnnotateRWLockCreate(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockDestroy(const char *file, int line, + const volatile void *lock); +void AnnotateRWLockAcquired(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateRWLockReleased(const char *file, int line, + const volatile void *lock, long is_w); +void AnnotateBarrierInit(const char *file, int line, + const volatile void *barrier, long count, + long reinitialization_allowed); +void AnnotateBarrierWaitBefore(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierWaitAfter(const char *file, int line, + const volatile void *barrier); +void AnnotateBarrierDestroy(const char *file, int line, + const volatile void *barrier); +void AnnotateCondVarWait(const char *file, int line, + const volatile void *cv, + const volatile void *lock); +void AnnotateCondVarSignal(const char *file, int line, + const volatile void *cv); +void AnnotateCondVarSignalAll(const char *file, int line, + const volatile void *cv); +void AnnotatePublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotateUnpublishMemoryRange(const char *file, int line, + const volatile void *address, + long size); +void AnnotatePCQCreate(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQDestroy(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQPut(const char *file, int line, + const volatile void *pcq); +void AnnotatePCQGet(const char *file, int line, + const volatile void *pcq); +void AnnotateNewMemory(const char *file, int line, + const volatile void *address, + long size); +void AnnotateExpectRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRace(const char *file, int line, + const volatile void *address, + const char *description); +void AnnotateBenignRaceSized(const char *file, int line, + const volatile void *address, + long size, + const char *description); +void AnnotateMutexIsUsedAsCondVar(const char *file, int line, + const volatile void *mu); +void AnnotateTraceMemory(const char *file, int line, + const volatile void *arg); +void AnnotateThreadName(const char *file, int line, + const char *name); +void AnnotateIgnoreReadsBegin(const char *file, int line); +void AnnotateIgnoreReadsEnd(const char *file, int line); +void AnnotateIgnoreWritesBegin(const char *file, int line); +void AnnotateIgnoreWritesEnd(const char *file, int line); +void AnnotateEnableRaceDetection(const char *file, int line, int enable); +void AnnotateNoOp(const char *file, int line, + const volatile void *arg); +void AnnotateFlushState(const char *file, int line); + +/* Return non-zero value if running under valgrind. + + If "valgrind.h" is included into dynamic_annotations.c, + the regular valgrind mechanism will be used. + See http://valgrind.org/docs/manual/manual-core-adv.html about + RUNNING_ON_VALGRIND and other valgrind "client requests". + The file "valgrind.h" may be obtained by doing + svn co svn://svn.valgrind.org/valgrind/trunk/include + + If for some reason you can't use "valgrind.h" or want to fake valgrind, + there are two ways to make this function return non-zero: + - Use environment variable: export RUNNING_ON_VALGRIND=1 + - Make your tool intercept the function RunningOnValgrind() and + change its return value. + */ +int RunningOnValgrind(void); + +#ifdef __cplusplus +} +#endif + +#if DYNAMIC_ANNOTATIONS_ENABLED != 0 && defined(__cplusplus) + + /* _Py_ANNOTATE_UNPROTECTED_READ is the preferred way to annotate racey reads. + + Instead of doing + _Py_ANNOTATE_IGNORE_READS_BEGIN(); + ... = x; + _Py_ANNOTATE_IGNORE_READS_END(); + one can use + ... = _Py_ANNOTATE_UNPROTECTED_READ(x); */ + template + inline T _Py_ANNOTATE_UNPROTECTED_READ(const volatile T &x) { + _Py_ANNOTATE_IGNORE_READS_BEGIN(); + T res = x; + _Py_ANNOTATE_IGNORE_READS_END(); + return res; + } + /* Apply _Py_ANNOTATE_BENIGN_RACE_SIZED to a static variable. */ +#define _Py_ANNOTATE_BENIGN_RACE_STATIC(static_var, description) \ + namespace { \ + class static_var ## _annotator { \ + public: \ + static_var ## _annotator() { \ + _Py_ANNOTATE_BENIGN_RACE_SIZED(&static_var, \ + sizeof(static_var), \ + # static_var ": " description); \ + } \ + }; \ + static static_var ## _annotator the ## static_var ## _annotator;\ + } +#else /* DYNAMIC_ANNOTATIONS_ENABLED == 0 */ + +#define _Py_ANNOTATE_UNPROTECTED_READ(x) (x) +#define _Py_ANNOTATE_BENIGN_RACE_STATIC(static_var, description) /* empty */ + +#endif /* DYNAMIC_ANNOTATIONS_ENABLED */ + +#endif /* __DYNAMIC_ANNOTATIONS_H__ */ diff --git a/Include/enumobject.h b/Include/enumobject.h new file mode 100644 index 0000000000000000000000000000000000000000..c14dbfc8c37e7c9316b9cca0a5969ee925d729d8 --- /dev/null +++ b/Include/enumobject.h @@ -0,0 +1,17 @@ +#ifndef Py_ENUMOBJECT_H +#define Py_ENUMOBJECT_H + +/* Enumerate Object */ + +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyEnum_Type; +PyAPI_DATA(PyTypeObject) PyReversed_Type; + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_ENUMOBJECT_H */ diff --git a/Include/errcode.h b/Include/errcode.h new file mode 100644 index 0000000000000000000000000000000000000000..dac5cf068c99d6d4ab1b678c48da3e6f1e2660ee --- /dev/null +++ b/Include/errcode.h @@ -0,0 +1,45 @@ +// Error codes passed around between file input, tokenizer, parser and +// interpreter. This is necessary so we can turn them into Python +// exceptions at a higher level. Note that some errors have a +// slightly different meaning when passed from the tokenizer to the +// parser than when passed from the parser to the interpreter; e.g. +// the parser only returns E_EOF when it hits EOF immediately, and it +// never returns E_OK. +// +// The public PyRun_InteractiveOneObjectEx() function can return E_EOF, +// same as its variants: +// +// * PyRun_InteractiveOneObject() +// * PyRun_InteractiveOneFlags() +// * PyRun_InteractiveOne() + +#ifndef Py_ERRCODE_H +#define Py_ERRCODE_H +#ifdef __cplusplus +extern "C" { +#endif + +#define E_OK 10 /* No error */ +#define E_EOF 11 /* End Of File */ +#define E_INTR 12 /* Interrupted */ +#define E_TOKEN 13 /* Bad token */ +#define E_SYNTAX 14 /* Syntax error */ +#define E_NOMEM 15 /* Ran out of memory */ +#define E_DONE 16 /* Parsing complete */ +#define E_ERROR 17 /* Execution error */ +#define E_TABSPACE 18 /* Inconsistent mixing of tabs and spaces */ +#define E_OVERFLOW 19 /* Node had too many children */ +#define E_TOODEEP 20 /* Too many indentation levels */ +#define E_DEDENT 21 /* No matching outer block for dedent */ +#define E_DECODE 22 /* Error in decoding into Unicode */ +#define E_EOFS 23 /* EOF in triple-quoted string */ +#define E_EOLS 24 /* EOL in single-quoted string */ +#define E_LINECONT 25 /* Unexpected characters after a line continuation */ +#define E_BADSINGLE 27 /* Ill-formed single statement input */ +#define E_INTERACT_STOP 28 /* Interactive mode stopped tokenization */ +#define E_COLUMNOVERFLOW 29 /* Column offset overflow */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ERRCODE_H */ diff --git a/Include/exports.h b/Include/exports.h new file mode 100644 index 0000000000000000000000000000000000000000..ce601216f171563df33cec3e2ed5cbe42aad5a00 --- /dev/null +++ b/Include/exports.h @@ -0,0 +1,108 @@ +#ifndef Py_EXPORTS_H +#define Py_EXPORTS_H + +/* Declarations for symbol visibility. + + PyAPI_FUNC(type): Declares a public Python API function and return type + PyAPI_DATA(type): Declares public Python data and its type + PyMODINIT_FUNC: A Python module init function. If these functions are + inside the Python core, they are private to the core. + If in an extension module, it may be declared with + external linkage depending on the platform. + + As a number of platforms support/require "__declspec(dllimport/dllexport)", + we support a HAVE_DECLSPEC_DLL macro to save duplication. +*/ + +/* + All windows ports, except cygwin, are handled in PC/pyconfig.h. + + Cygwin is the only other autoconf platform requiring special + linkage handling and it uses __declspec(). +*/ +#if defined(__CYGWIN__) +# define HAVE_DECLSPEC_DLL +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) + #if defined(Py_ENABLE_SHARED) + #define Py_IMPORTED_SYMBOL __declspec(dllimport) + #define Py_EXPORTED_SYMBOL __declspec(dllexport) + #define Py_LOCAL_SYMBOL + #else + #define Py_IMPORTED_SYMBOL + #define Py_EXPORTED_SYMBOL + #define Py_LOCAL_SYMBOL + #endif +#else +/* + * If we only ever used gcc >= 5, we could use __has_attribute(visibility) + * as a cross-platform way to determine if visibility is supported. However, + * we may still need to support gcc >= 4, as some Ubuntu LTS and Centos versions + * have 4 < gcc < 5. + */ + #ifndef __has_attribute + #define __has_attribute(x) 0 // Compatibility with non-clang compilers. + #endif + #if (defined(__GNUC__) && (__GNUC__ >= 4)) ||\ + (defined(__clang__) && __has_attribute(visibility)) + #define Py_IMPORTED_SYMBOL __attribute__ ((visibility ("default"))) + #define Py_EXPORTED_SYMBOL __attribute__ ((visibility ("default"))) + #define Py_LOCAL_SYMBOL __attribute__ ((visibility ("hidden"))) + #else + #define Py_IMPORTED_SYMBOL + #define Py_EXPORTED_SYMBOL + #define Py_LOCAL_SYMBOL + #endif +#endif + +/* only get special linkage if built as shared or platform is Cygwin */ +#if defined(Py_ENABLE_SHARED) || defined(__CYGWIN__) +# if defined(HAVE_DECLSPEC_DLL) +# if defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_MODULE) +# define PyAPI_FUNC(RTYPE) Py_EXPORTED_SYMBOL RTYPE +# define PyAPI_DATA(RTYPE) extern Py_EXPORTED_SYMBOL RTYPE + /* module init functions inside the core need no external linkage */ + /* except for Cygwin to handle embedding */ +# if defined(__CYGWIN__) +# define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* +# else /* __CYGWIN__ */ +# define PyMODINIT_FUNC PyObject* +# endif /* __CYGWIN__ */ +# else /* Py_BUILD_CORE */ + /* Building an extension module, or an embedded situation */ + /* public Python functions and data are imported */ + /* Under Cygwin, auto-import functions to prevent compilation */ + /* failures similar to those described at the bottom of 4.1: */ + /* http://docs.python.org/extending/windows.html#a-cookbook-approach */ +# if !defined(__CYGWIN__) +# define PyAPI_FUNC(RTYPE) Py_IMPORTED_SYMBOL RTYPE +# endif /* !__CYGWIN__ */ +# define PyAPI_DATA(RTYPE) extern Py_IMPORTED_SYMBOL RTYPE + /* module init functions outside the core must be exported */ +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" Py_EXPORTED_SYMBOL PyObject* +# else /* __cplusplus */ +# define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* +# endif /* __cplusplus */ +# endif /* Py_BUILD_CORE */ +# endif /* HAVE_DECLSPEC_DLL */ +#endif /* Py_ENABLE_SHARED */ + +/* If no external linkage macros defined by now, create defaults */ +#ifndef PyAPI_FUNC +# define PyAPI_FUNC(RTYPE) Py_EXPORTED_SYMBOL RTYPE +#endif +#ifndef PyAPI_DATA +# define PyAPI_DATA(RTYPE) extern Py_EXPORTED_SYMBOL RTYPE +#endif +#ifndef PyMODINIT_FUNC +# if defined(__cplusplus) +# define PyMODINIT_FUNC extern "C" Py_EXPORTED_SYMBOL PyObject* +# else /* __cplusplus */ +# define PyMODINIT_FUNC Py_EXPORTED_SYMBOL PyObject* +# endif /* __cplusplus */ +#endif + + +#endif /* Py_EXPORTS_H */ diff --git a/Include/fileobject.h b/Include/fileobject.h new file mode 100644 index 0000000000000000000000000000000000000000..6a6d11409497fab0fac0b20851af8786fd09545a --- /dev/null +++ b/Include/fileobject.h @@ -0,0 +1,41 @@ +/* File object interface (what's left of it -- see io.py) */ + +#ifndef Py_FILEOBJECT_H +#define Py_FILEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#define PY_STDIOTEXTMODE "b" + +PyAPI_FUNC(PyObject *) PyFile_FromFd(int, const char *, const char *, int, + const char *, const char *, + const char *, int); +PyAPI_FUNC(PyObject *) PyFile_GetLine(PyObject *, int); +PyAPI_FUNC(int) PyFile_WriteObject(PyObject *, PyObject *, int); +PyAPI_FUNC(int) PyFile_WriteString(const char *, PyObject *); +PyAPI_FUNC(int) PyObject_AsFileDescriptor(PyObject *); + +/* The default encoding used by the platform file system APIs + If non-NULL, this is different than the default encoding for strings +*/ +Py_DEPRECATED(3.12) PyAPI_DATA(const char *) Py_FileSystemDefaultEncoding; +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03060000 +Py_DEPRECATED(3.12) PyAPI_DATA(const char *) Py_FileSystemDefaultEncodeErrors; +#endif +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_HasFileSystemDefaultEncoding; + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000 +Py_DEPRECATED(3.12) PyAPI_DATA(int) Py_UTF8Mode; +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_FILEOBJECT_H +# include "cpython/fileobject.h" +# undef Py_CPYTHON_FILEOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FILEOBJECT_H */ diff --git a/Include/fileutils.h b/Include/fileutils.h new file mode 100644 index 0000000000000000000000000000000000000000..1509198e45f0cae4ae81a883890ea542c275cf81 --- /dev/null +++ b/Include/fileutils.h @@ -0,0 +1,62 @@ +#ifndef Py_FILEUTILS_H +#define Py_FILEUTILS_H + +/******************************* + * stat() and fstat() fiddling * + *******************************/ + +#ifdef HAVE_SYS_STAT_H +# include // S_ISREG() +#elif defined(HAVE_STAT_H) +# include // S_ISREG() +#endif + +#ifndef S_IFMT + // VisualAge C/C++ Failed to Define MountType Field in sys/stat.h. +# define S_IFMT 0170000 +#endif +#ifndef S_IFLNK + // Windows doesn't define S_IFLNK, but posixmodule.c maps + // IO_REPARSE_TAG_SYMLINK to S_IFLNK. +# define S_IFLNK 0120000 +#endif +#ifndef S_ISREG +# define S_ISREG(x) (((x) & S_IFMT) == S_IFREG) +#endif +#ifndef S_ISDIR +# define S_ISDIR(x) (((x) & S_IFMT) == S_IFDIR) +#endif +#ifndef S_ISCHR +# define S_ISCHR(x) (((x) & S_IFMT) == S_IFCHR) +#endif +#ifndef S_ISLNK +# define S_ISLNK(x) (((x) & S_IFMT) == S_IFLNK) +#endif + + +// Move this down here since some C++ #include's don't like to be included +// inside an extern "C". +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +PyAPI_FUNC(wchar_t *) Py_DecodeLocale( + const char *arg, + size_t *size); + +PyAPI_FUNC(char*) Py_EncodeLocale( + const wchar_t *text, + size_t *error_pos); +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_FILEUTILS_H +# include "cpython/fileutils.h" +# undef Py_CPYTHON_FILEUTILS_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FILEUTILS_H */ diff --git a/Include/floatobject.h b/Include/floatobject.h new file mode 100644 index 0000000000000000000000000000000000000000..8963c16832a4bc6f7f38b9db2327e76ba44464e5 --- /dev/null +++ b/Include/floatobject.h @@ -0,0 +1,54 @@ + +/* Float object interface */ + +/* +PyFloatObject represents a (double precision) floating-point number. +*/ + +#ifndef Py_FLOATOBJECT_H +#define Py_FLOATOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyFloat_Type; + +#define PyFloat_Check(op) PyObject_TypeCheck(op, &PyFloat_Type) +#define PyFloat_CheckExact(op) Py_IS_TYPE((op), &PyFloat_Type) + +#define Py_RETURN_NAN return PyFloat_FromDouble(Py_NAN) + +#define Py_RETURN_INF(sign) \ + do { \ + if (copysign(1., sign) == 1.) { \ + return PyFloat_FromDouble(Py_HUGE_VAL); \ + } \ + else { \ + return PyFloat_FromDouble(-Py_HUGE_VAL); \ + } \ + } while(0) + +PyAPI_FUNC(double) PyFloat_GetMax(void); +PyAPI_FUNC(double) PyFloat_GetMin(void); +PyAPI_FUNC(PyObject*) PyFloat_GetInfo(void); + +/* Return Python float from string PyObject. */ +PyAPI_FUNC(PyObject*) PyFloat_FromString(PyObject*); + +/* Return Python float from C double. */ +PyAPI_FUNC(PyObject*) PyFloat_FromDouble(double); + +/* Extract C double from Python float. The macro version trades safety for + speed. */ +PyAPI_FUNC(double) PyFloat_AsDouble(PyObject*); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_FLOATOBJECT_H +# include "cpython/floatobject.h" +# undef Py_CPYTHON_FLOATOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FLOATOBJECT_H */ diff --git a/Include/frameobject.h b/Include/frameobject.h new file mode 100644 index 0000000000000000000000000000000000000000..adb628f6314fcfc6bff80b4bb09483158354b91f --- /dev/null +++ b/Include/frameobject.h @@ -0,0 +1,20 @@ +/* Frame object interface */ + +#ifndef Py_FRAMEOBJECT_H +#define Py_FRAMEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "pyframe.h" + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_FRAMEOBJECT_H +# include "cpython/frameobject.h" +# undef Py_CPYTHON_FRAMEOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_FRAMEOBJECT_H */ diff --git a/Include/genericaliasobject.h b/Include/genericaliasobject.h new file mode 100644 index 0000000000000000000000000000000000000000..cf002976b27cd7712ee80bf348f55c5c562bc12d --- /dev/null +++ b/Include/genericaliasobject.h @@ -0,0 +1,14 @@ +// Implementation of PEP 585: support list[int] etc. +#ifndef Py_GENERICALIASOBJECT_H +#define Py_GENERICALIASOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(PyObject *) Py_GenericAlias(PyObject *, PyObject *); +PyAPI_DATA(PyTypeObject) Py_GenericAliasType; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_GENERICALIASOBJECT_H */ diff --git a/Include/import.h b/Include/import.h new file mode 100644 index 0000000000000000000000000000000000000000..24b23b9119196fcb5908ae70e035a884543bcc05 --- /dev/null +++ b/Include/import.h @@ -0,0 +1,103 @@ +/* Module definition and import interface */ + +#ifndef Py_IMPORT_H +#define Py_IMPORT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(long) PyImport_GetMagicNumber(void); +PyAPI_FUNC(const char *) PyImport_GetMagicTag(void); +PyAPI_FUNC(PyObject *) PyImport_ExecCodeModule( + const char *name, /* UTF-8 encoded string */ + PyObject *co + ); +PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleEx( + const char *name, /* UTF-8 encoded string */ + PyObject *co, + const char *pathname /* decoded from the filesystem encoding */ + ); +PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleWithPathnames( + const char *name, /* UTF-8 encoded string */ + PyObject *co, + const char *pathname, /* decoded from the filesystem encoding */ + const char *cpathname /* decoded from the filesystem encoding */ + ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject *) PyImport_ExecCodeModuleObject( + PyObject *name, + PyObject *co, + PyObject *pathname, + PyObject *cpathname + ); +#endif +PyAPI_FUNC(PyObject *) PyImport_GetModuleDict(void); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000 +PyAPI_FUNC(PyObject *) PyImport_GetModule(PyObject *name); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject *) PyImport_AddModuleObject( + PyObject *name + ); +#endif +PyAPI_FUNC(PyObject *) PyImport_AddModule( + const char *name /* UTF-8 encoded string */ + ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(PyObject *) PyImport_AddModuleRef( + const char *name /* UTF-8 encoded string */ + ); +#endif +PyAPI_FUNC(PyObject *) PyImport_ImportModule( + const char *name /* UTF-8 encoded string */ + ); +Py_DEPRECATED(3.13) PyAPI_FUNC(PyObject *) PyImport_ImportModuleNoBlock( + const char *name /* UTF-8 encoded string */ + ); +PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevel( + const char *name, /* UTF-8 encoded string */ + PyObject *globals, + PyObject *locals, + PyObject *fromlist, + int level + ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +PyAPI_FUNC(PyObject *) PyImport_ImportModuleLevelObject( + PyObject *name, + PyObject *globals, + PyObject *locals, + PyObject *fromlist, + int level + ); +#endif + +#define PyImport_ImportModuleEx(n, g, l, f) \ + PyImport_ImportModuleLevel((n), (g), (l), (f), 0) + +PyAPI_FUNC(PyObject *) PyImport_GetImporter(PyObject *path); +PyAPI_FUNC(PyObject *) PyImport_Import(PyObject *name); +PyAPI_FUNC(PyObject *) PyImport_ReloadModule(PyObject *m); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(int) PyImport_ImportFrozenModuleObject( + PyObject *name + ); +#endif +PyAPI_FUNC(int) PyImport_ImportFrozenModule( + const char *name /* UTF-8 encoded string */ + ); + +PyAPI_FUNC(int) PyImport_AppendInittab( + const char *name, /* ASCII encoded string */ + PyObject* (*initfunc)(void) + ); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_IMPORT_H +# include "cpython/import.h" +# undef Py_CPYTHON_IMPORT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_IMPORT_H */ diff --git a/Include/internal/mimalloc/mimalloc.h b/Include/internal/mimalloc/mimalloc.h new file mode 100644 index 0000000000000000000000000000000000000000..821129e7690b1b267dfe802a8c8a8b913f7299ea --- /dev/null +++ b/Include/internal/mimalloc/mimalloc.h @@ -0,0 +1,565 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_H +#define MIMALLOC_H + +#define MI_MALLOC_VERSION 212 // major + 2 digits minor + +// ------------------------------------------------------ +// Compiler specific attributes +// ------------------------------------------------------ + +#ifdef __cplusplus + #if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 + #define mi_attr_noexcept noexcept + #else + #define mi_attr_noexcept throw() + #endif +#else + #define mi_attr_noexcept +#endif + +#if defined(__cplusplus) && (__cplusplus >= 201703) + #define mi_decl_nodiscard [[nodiscard]] +#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl + #define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif defined(_HAS_NODISCARD) + #define mi_decl_nodiscard _NODISCARD +#elif (_MSC_VER >= 1700) + #define mi_decl_nodiscard _Check_return_ +#else + #define mi_decl_nodiscard +#endif + +#if defined(_MSC_VER) || defined(__MINGW32__) + #if !defined(MI_SHARED_LIB) + #define mi_decl_export + #elif defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __declspec(dllexport) + #else + #define mi_decl_export __declspec(dllimport) + #endif + #if defined(__MINGW32__) + #define mi_decl_restrict + #define mi_attr_malloc __attribute__((malloc)) + #else + #if (_MSC_VER >= 1900) && !defined(__EDG__) + #define mi_decl_restrict __declspec(allocator) __declspec(restrict) + #else + #define mi_decl_restrict __declspec(restrict) + #endif + #define mi_attr_malloc + #endif + #define mi_cdecl __cdecl + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) +#elif defined(__GNUC__) // includes clang and icc + #if defined(MI_SHARED_LIB) && defined(MI_SHARED_LIB_EXPORT) + #define mi_decl_export __attribute__((visibility("default"))) + #else + #define mi_decl_export + #endif + #define mi_cdecl // leads to warnings... __attribute__((cdecl)) + #define mi_decl_restrict + #define mi_attr_malloc __attribute__((malloc)) + #if (defined(__clang_major__) && (__clang_major__ < 4)) || (__GNUC__ < 5) + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) + #elif defined(__INTEL_COMPILER) + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) + #else + #define mi_attr_alloc_size(s) __attribute__((alloc_size(s))) + #define mi_attr_alloc_size2(s1,s2) __attribute__((alloc_size(s1,s2))) + #define mi_attr_alloc_align(p) __attribute__((alloc_align(p))) + #endif +#else + #define mi_cdecl + #define mi_decl_export + #define mi_decl_restrict + #define mi_attr_malloc + #define mi_attr_alloc_size(s) + #define mi_attr_alloc_size2(s1,s2) + #define mi_attr_alloc_align(p) +#endif + +// ------------------------------------------------------ +// Includes +// ------------------------------------------------------ + +#include // size_t +#include // bool +#include // INTPTR_MAX + +#ifdef __cplusplus +extern "C" { +#endif + +// ------------------------------------------------------ +// Standard malloc interface +// ------------------------------------------------------ + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_export void* mi_expand(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); + +mi_decl_export void mi_free(void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; + +// ------------------------------------------------------ +// Extended functionality +// ------------------------------------------------------ +#define MI_SMALL_WSIZE_MAX (128) +#define MI_SMALL_SIZE_MAX (MI_SMALL_WSIZE_MAX*sizeof(void*)) + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); + +mi_decl_nodiscard mi_decl_export size_t mi_usable_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_good_size(size_t size) mi_attr_noexcept; + + +// ------------------------------------------------------ +// Internals +// ------------------------------------------------------ + +typedef void (mi_cdecl mi_deferred_free_fun)(bool force, unsigned long long heartbeat, void* arg); +mi_decl_export void mi_register_deferred_free(mi_deferred_free_fun* deferred_free, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_output_fun)(const char* msg, void* arg); +mi_decl_export void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept; + +typedef void (mi_cdecl mi_error_fun)(int err, void* arg); +mi_decl_export void mi_register_error(mi_error_fun* fun, void* arg); + +mi_decl_export void mi_collect(bool force) mi_attr_noexcept; +mi_decl_export int mi_version(void) mi_attr_noexcept; +mi_decl_export void mi_stats_reset(void) mi_attr_noexcept; +mi_decl_export void mi_stats_merge(void) mi_attr_noexcept; +mi_decl_export void mi_stats_print(void* out) mi_attr_noexcept; // backward compatibility: `out` is ignored and should be NULL +mi_decl_export void mi_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + +mi_decl_export void mi_process_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_init(void) mi_attr_noexcept; +mi_decl_export void mi_thread_done(void) mi_attr_noexcept; +mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; + +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, + size_t* current_rss, size_t* peak_rss, + size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; + +// ------------------------------------------------------------------------------------- +// Aligned allocation +// Note that `alignment` always follows `size` for consistency with unaligned +// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. +// ------------------------------------------------------------------------------------- + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(1,2); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); + + +// ------------------------------------------------------------------------------------- +// Heaps: first-class, but can only allocate from the same thread that created it. +// ------------------------------------------------------------------------------------- + +struct mi_heap_s; +typedef struct mi_heap_s mi_heap_t; + +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new(void); +mi_decl_export void mi_heap_delete(mi_heap_t* heap); +mi_decl_export void mi_heap_destroy(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_set_default(mi_heap_t* heap); +mi_decl_export mi_heap_t* mi_heap_get_default(void); +mi_decl_export mi_heap_t* mi_heap_get_backing(void); +mi_decl_export void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); + +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); +mi_decl_nodiscard mi_decl_export void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept mi_attr_malloc; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); + + +// -------------------------------------------------------------------------------- +// Zero initialized re-allocation. +// Only valid on memory that was originally allocated with zero initialization too. +// e.g. `mi_calloc`, `mi_zalloc`, `mi_zalloc_aligned` etc. +// see +// -------------------------------------------------------------------------------- + +mi_decl_nodiscard mi_decl_export void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc(void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); + +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(2) mi_attr_alloc_align(3); +mi_decl_nodiscard mi_decl_export void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(2,3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(2,3); + +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t newcount, size_t size) mi_attr_noexcept mi_attr_alloc_size2(3,4); + +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept mi_attr_alloc_size(3) mi_attr_alloc_align(4); +mi_decl_nodiscard mi_decl_export void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size(3); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept mi_attr_alloc_size2(3,4) mi_attr_alloc_align(5); +mi_decl_nodiscard mi_decl_export void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_alloc_size2(3,4); + + +// ------------------------------------------------------ +// Analysis +// ------------------------------------------------------ + +mi_decl_export bool mi_heap_contains_block(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_heap_check_owned(mi_heap_t* heap, const void* p); +mi_decl_export bool mi_check_owned(const void* p); + +// An area of heap space contains blocks of a single size. +typedef struct mi_heap_area_s { + void* blocks; // start of the area containing heap blocks + size_t reserved; // bytes reserved for this area (virtual) + size_t committed; // current available bytes for this area + size_t used; // number of allocated blocks + size_t block_size; // size in bytes of each block + size_t full_block_size; // size in bytes of a full block including padding and metadata. +} mi_heap_area_t; + +typedef bool (mi_cdecl mi_block_visit_fun)(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg); + +mi_decl_export bool mi_heap_visit_blocks(const mi_heap_t* heap, bool visit_all_blocks, mi_block_visit_fun* visitor, void* arg); + +// Experimental +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export bool mi_is_redirected(void) mi_attr_noexcept; + +mi_decl_export int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept; +mi_decl_export int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept; + +mi_decl_export int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept; + +mi_decl_export void mi_debug_show_arenas(void) mi_attr_noexcept; + +// Experimental: heaps associated with specific memory arena's +typedef int mi_arena_id_t; +mi_decl_export void* mi_arena_area(mi_arena_id_t arena_id, size_t* size); +mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; +mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; + +#if MI_MALLOC_VERSION >= 182 +// Create a heap that only allocates in the specified arena +mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); +#endif + +// deprecated +mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept; + + +// ------------------------------------------------------ +// Convenience +// ------------------------------------------------------ + +#define mi_malloc_tp(tp) ((tp*)mi_malloc(sizeof(tp))) +#define mi_zalloc_tp(tp) ((tp*)mi_zalloc(sizeof(tp))) +#define mi_calloc_tp(tp,n) ((tp*)mi_calloc(n,sizeof(tp))) +#define mi_mallocn_tp(tp,n) ((tp*)mi_mallocn(n,sizeof(tp))) +#define mi_reallocn_tp(p,tp,n) ((tp*)mi_reallocn(p,n,sizeof(tp))) +#define mi_recalloc_tp(p,tp,n) ((tp*)mi_recalloc(p,n,sizeof(tp))) + +#define mi_heap_malloc_tp(hp,tp) ((tp*)mi_heap_malloc(hp,sizeof(tp))) +#define mi_heap_zalloc_tp(hp,tp) ((tp*)mi_heap_zalloc(hp,sizeof(tp))) +#define mi_heap_calloc_tp(hp,tp,n) ((tp*)mi_heap_calloc(hp,n,sizeof(tp))) +#define mi_heap_mallocn_tp(hp,tp,n) ((tp*)mi_heap_mallocn(hp,n,sizeof(tp))) +#define mi_heap_reallocn_tp(hp,p,tp,n) ((tp*)mi_heap_reallocn(hp,p,n,sizeof(tp))) +#define mi_heap_recalloc_tp(hp,p,tp,n) ((tp*)mi_heap_recalloc(hp,p,n,sizeof(tp))) + + +// ------------------------------------------------------ +// Options +// ------------------------------------------------------ + +typedef enum mi_option_e { + // stable options + mi_option_show_errors, // print error messages + mi_option_show_stats, // print statistics on termination + mi_option_verbose, // print verbose messages + // the following options are experimental (see src/options.h) + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) + mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup + mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup + mi_option_deprecated_segment_cache, + mi_option_deprecated_page_reset, + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. + mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) + mi_option_arena_purge_mult, + mi_option_purge_extend_delay, + _mi_option_last, + // legacy option names + mi_option_large_os_pages = mi_option_allow_large_os_pages, + mi_option_eager_region_commit = mi_option_arena_eager_commit, + mi_option_reset_decommits = mi_option_purge_decommits, + mi_option_reset_delay = mi_option_purge_delay, + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge +} mi_option_t; + + +mi_decl_nodiscard mi_decl_export bool mi_option_is_enabled(mi_option_t option); +mi_decl_export void mi_option_enable(mi_option_t option); +mi_decl_export void mi_option_disable(mi_option_t option); +mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); +mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); + +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); +mi_decl_export void mi_option_set(mi_option_t option, long value); +mi_decl_export void mi_option_set_default(mi_option_t option, long value); + + +// ------------------------------------------------------------------------------------------------------- +// "mi" prefixed implementations of various posix, Unix, Windows, and C++ allocation functions. +// (This can be convenient when providing overrides of these functions as done in `mimalloc-override.h`.) +// note: we use `mi_cfree` as "checked free" and it checks if the pointer is in our heap before free-ing. +// ------------------------------------------------------------------------------------------------------- + +mi_decl_export void mi_cfree(void* p) mi_attr_noexcept; +mi_decl_export void* mi__expand(void* p, size_t newsize) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_size(const void* p) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_good_size(size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept; + +mi_decl_export int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_memalign(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_valloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(2) mi_attr_alloc_align(1); + +mi_decl_nodiscard mi_decl_export void* mi_reallocarray(void* p, size_t count, size_t size) mi_attr_noexcept mi_attr_alloc_size2(2,3); +mi_decl_nodiscard mi_decl_export int mi_reallocarr(void* p, size_t count, size_t size) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_recalloc(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_nodiscard mi_decl_export void* mi_aligned_offset_recalloc(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept; + +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned short* mi_wcsdup(const unsigned short* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_nodiscard mi_decl_export mi_decl_restrict unsigned char* mi_mbsdup(const unsigned char* s) mi_attr_noexcept mi_attr_malloc; +mi_decl_export int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept; +mi_decl_export int mi_wdupenv_s(unsigned short** buf, size_t* size, const unsigned short* name) mi_attr_noexcept; + +mi_decl_export void mi_free_size(void* p, size_t size) mi_attr_noexcept; +mi_decl_export void mi_free_size_aligned(void* p, size_t size, size_t alignment) mi_attr_noexcept; +mi_decl_export void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept; + +// The `mi_new` wrappers implement C++ semantics on out-of-memory instead of directly returning `NULL`. +// (and call `std::get_new_handler` and potentially raise a `std::bad_alloc` exception). +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new(size_t size) mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(1, 2); +mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); + +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); + +#ifdef __cplusplus +} +#endif + +// --------------------------------------------------------------------------------------------- +// Implement the C++ std::allocator interface for use in STL containers. +// (note: see `mimalloc-new-delete.h` for overriding the new/delete operators globally) +// --------------------------------------------------------------------------------------------- +#ifdef __cplusplus + +#include // std::size_t +#include // PTRDIFF_MAX +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include // std::true_type +#include // std::forward +#endif + +template struct _mi_stl_allocator_common { + typedef T value_type; + typedef std::size_t size_type; + typedef std::ptrdiff_t difference_type; + typedef value_type& reference; + typedef value_type const& const_reference; + typedef value_type* pointer; + typedef value_type const* const_pointer; + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + template void construct(U* p, Args&& ...args) { ::new(p) U(std::forward(args)...); } + template void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template struct mi_stl_allocator : public _mi_stl_allocator_common { + using typename _mi_stl_allocator_common::size_type; + using typename _mi_stl_allocator_common::value_type; + using typename _mi_stl_allocator_common::pointer; + template struct rebind { typedef mi_stl_allocator other; }; + + mi_stl_allocator() mi_attr_noexcept = default; + mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept = default; + template mi_stl_allocator(const mi_stl_allocator&) mi_attr_noexcept { } + mi_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_new_n(count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_new_n(count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::true_type; + #endif +}; + +template bool operator==(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return true; } +template bool operator!=(const mi_stl_allocator& , const mi_stl_allocator& ) mi_attr_noexcept { return false; } + + +#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900) // C++11 +#define MI_HAS_HEAP_STL_ALLOCATOR 1 + +#include // std::shared_ptr + +// Common base class for STL allocators in a specific heap +template struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common { + using typename _mi_stl_allocator_common::size_type; + using typename _mi_stl_allocator_common::value_type; + using typename _mi_stl_allocator_common::pointer; + + _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */ + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::false_type; + #endif + + void collect(bool force) { mi_heap_collect(this->heap.get(), force); } + template bool is_equal(const _mi_heap_stl_allocator_common& x) const { return (this->heap == x.heap); } + +protected: + std::shared_ptr heap; + template friend struct _mi_heap_stl_allocator_common; + + _mi_heap_stl_allocator_common() { + mi_heap_t* hp = mi_heap_new(); + this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + } + _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + +private: + static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } + static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } } +}; + +// STL allocator allocation in a specific heap +template struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common { + using typename _mi_heap_stl_allocator_common::size_type; + mi_heap_stl_allocator() : _mi_heap_stl_allocator_common() { } // creates fresh heap that is deleted when the destructor is called + mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common(hp) { } // no delete nor destroy on the passed in heap + template mi_heap_stl_allocator(const mi_heap_stl_allocator& x) mi_attr_noexcept : _mi_heap_stl_allocator_common(x) { } + + mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + template struct rebind { typedef mi_heap_stl_allocator other; }; +}; + +template bool operator==(const mi_heap_stl_allocator& x, const mi_heap_stl_allocator& y) mi_attr_noexcept { return (x.is_equal(y)); } +template bool operator!=(const mi_heap_stl_allocator& x, const mi_heap_stl_allocator& y) mi_attr_noexcept { return (!x.is_equal(y)); } + + +// STL allocator allocation in a specific heap, where `free` does nothing and +// the heap is destroyed in one go on destruction -- use with care! +template struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common { + using typename _mi_heap_stl_allocator_common::size_type; + mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common() { } // creates fresh heap that is destroyed when the destructor is called + mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common(hp) { } // no delete nor destroy on the passed in heap + template mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator& x) mi_attr_noexcept : _mi_heap_stl_allocator_common(x) { } + + mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ } + template struct rebind { typedef mi_heap_destroy_stl_allocator other; }; +}; + +template bool operator==(const mi_heap_destroy_stl_allocator& x, const mi_heap_destroy_stl_allocator& y) mi_attr_noexcept { return (x.is_equal(y)); } +template bool operator!=(const mi_heap_destroy_stl_allocator& x, const mi_heap_destroy_stl_allocator& y) mi_attr_noexcept { return (!x.is_equal(y)); } + +#endif // C++11 + +#endif // __cplusplus + +#endif diff --git a/Include/internal/mimalloc/mimalloc/atomic.h b/Include/internal/mimalloc/mimalloc/atomic.h new file mode 100644 index 0000000000000000000000000000000000000000..a46a7676ad20b8feb830f6429364dfce176aae0f --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/atomic.h @@ -0,0 +1,392 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023 Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_ATOMIC_H +#define MIMALLOC_ATOMIC_H + +// -------------------------------------------------------------------------------------------- +// Atomics +// We need to be portable between C, C++, and MSVC. +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// instead of passing the memory order as a parameter. +// ----------------------------------------------------------------------------------------------- + +#if defined(__cplusplus) +// Use C++ atomics +#include +#define _Atomic(tp) std::atomic +#define mi_atomic(name) std::atomic_##name +#define mi_memory_order(name) std::memory_order_##name +#if (__cplusplus >= 202002L) // c++20, see issue #571 + #define MI_ATOMIC_VAR_INIT(x) x +#elif !defined(ATOMIC_VAR_INIT) + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif +#elif defined(_MSC_VER) +// Use MSVC C wrapper for C11 atomics +#define _Atomic(tp) tp +#define MI_ATOMIC_VAR_INIT(x) x +#define mi_atomic(name) mi_atomic_##name +#define mi_memory_order(name) mi_memory_order_##name +#else +// Use C11 atomics +#include +#define mi_atomic(name) atomic_##name +#define mi_memory_order(name) memory_order_##name +#if (__STDC_VERSION__ >= 201710L) // c17, see issue #735 + #define MI_ATOMIC_VAR_INIT(x) x +#elif !defined(ATOMIC_VAR_INIT) + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif +#endif + +// Various defines for all used memory orders in mimalloc +#define mi_atomic_cas_weak(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_weak_explicit)(p,expected,desired,mem_success,mem_fail) + +#define mi_atomic_cas_strong(p,expected,desired,mem_success,mem_fail) \ + mi_atomic(compare_exchange_strong_explicit)(p,expected,desired,mem_success,mem_fail) + +#define mi_atomic_load_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_load_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_store_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_store_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_exchange_release(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_exchange_acq_rel(p,x) mi_atomic(exchange_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_cas_weak_release(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_weak_acq_rel(p,exp,des) mi_atomic_cas_weak(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) +#define mi_atomic_cas_strong_release(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(release),mi_memory_order(relaxed)) +#define mi_atomic_cas_strong_acq_rel(p,exp,des) mi_atomic_cas_strong(p,exp,des,mi_memory_order(acq_rel),mi_memory_order(acquire)) + +#define mi_atomic_add_relaxed(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_sub_relaxed(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_add_acq_rel(p,x) mi_atomic(fetch_add_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_sub_acq_rel(p,x) mi_atomic(fetch_sub_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_and_acq_rel(p,x) mi_atomic(fetch_and_explicit)(p,x,mi_memory_order(acq_rel)) +#define mi_atomic_or_acq_rel(p,x) mi_atomic(fetch_or_explicit)(p,x,mi_memory_order(acq_rel)) + +#define mi_atomic_increment_relaxed(p) mi_atomic_add_relaxed(p,(uintptr_t)1) +#define mi_atomic_decrement_relaxed(p) mi_atomic_sub_relaxed(p,(uintptr_t)1) +#define mi_atomic_increment_acq_rel(p) mi_atomic_add_acq_rel(p,(uintptr_t)1) +#define mi_atomic_decrement_acq_rel(p) mi_atomic_sub_acq_rel(p,(uintptr_t)1) + +static inline void mi_atomic_yield(void); +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add); +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub); + + +#if defined(__cplusplus) || !defined(_MSC_VER) + +// In C++/C11 atomics we have polymorphic atomics so can use the typed `ptr` variants (where `tp` is the type of atomic value) +// We use these macros so we can provide a typed wrapper in MSVC in C compilation mode as well +#define mi_atomic_load_ptr_acquire(tp,p) mi_atomic_load_acquire(p) +#define mi_atomic_load_ptr_relaxed(tp,p) mi_atomic_load_relaxed(p) + +// In C++ we need to add casts to help resolve templates if NULL is passed +#if defined(__cplusplus) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,(tp*)x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,(tp*)x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,(tp*)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,(tp*)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,(tp*)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,(tp*)x) +#else +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release(p,x) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed(p,x) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release(p,exp,des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel(p,exp,des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release(p,exp,des) +#define mi_atomic_exchange_ptr_release(tp,p,x) mi_atomic_exchange_release(p,x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) mi_atomic_exchange_acq_rel(p,x) +#endif + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile int64_t* p, int64_t add) { + return mi_atomic(fetch_add_explicit)((_Atomic(int64_t)*)p, add, mi_memory_order(relaxed)); +} +static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { + int64_t current = mi_atomic_load_relaxed((_Atomic(int64_t)*)p); + while (current < x && !mi_atomic_cas_weak_release((_Atomic(int64_t)*)p, ¤t, x)) { /* nothing */ }; +} + +// Used by timers +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) + +#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) +#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) + + +#elif defined(_MSC_VER) + +// MSVC C compilation wrapper that uses Interlocked operations to model C11 atomics. +#define WIN32_LEAN_AND_MEAN +#include +#include +#ifdef _WIN64 +typedef LONG64 msc_intptr_t; +#define MI_64(f) f##64 +#else +typedef LONG msc_intptr_t; +#define MI_64(f) f +#endif + +typedef enum mi_memory_order_e { + mi_memory_order_relaxed, + mi_memory_order_consume, + mi_memory_order_acquire, + mi_memory_order_release, + mi_memory_order_acq_rel, + mi_memory_order_seq_cst +} mi_memory_order; + +static inline uintptr_t mi_atomic_fetch_add_explicit(_Atomic(uintptr_t)*p, uintptr_t add, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, (msc_intptr_t)add); +} +static inline uintptr_t mi_atomic_fetch_sub_explicit(_Atomic(uintptr_t)*p, uintptr_t sub, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchangeAdd)((volatile msc_intptr_t*)p, -((msc_intptr_t)sub)); +} +static inline uintptr_t mi_atomic_fetch_and_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedAnd)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline uintptr_t mi_atomic_fetch_or_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedOr)((volatile msc_intptr_t*)p, (msc_intptr_t)x); +} +static inline bool mi_atomic_compare_exchange_strong_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + (void)(mo1); (void)(mo2); + uintptr_t read = (uintptr_t)MI_64(_InterlockedCompareExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)desired, (msc_intptr_t)(*expected)); + if (read == *expected) { + return true; + } + else { + *expected = read; + return false; + } +} +static inline bool mi_atomic_compare_exchange_weak_explicit(_Atomic(uintptr_t)*p, uintptr_t* expected, uintptr_t desired, mi_memory_order mo1, mi_memory_order mo2) { + return mi_atomic_compare_exchange_strong_explicit(p, expected, desired, mo1, mo2); +} +static inline uintptr_t mi_atomic_exchange_explicit(_Atomic(uintptr_t)*p, uintptr_t exchange, mi_memory_order mo) { + (void)(mo); + return (uintptr_t)MI_64(_InterlockedExchange)((volatile msc_intptr_t*)p, (msc_intptr_t)exchange); +} +static inline void mi_atomic_thread_fence(mi_memory_order mo) { + (void)(mo); + _Atomic(uintptr_t) x = 0; + mi_atomic_exchange_explicit(&x, 1, mo); +} +static inline uintptr_t mi_atomic_load_explicit(_Atomic(uintptr_t) const* p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) + return *p; +#else + uintptr_t x = *p; + if (mo > mi_memory_order_relaxed) { + while (!mi_atomic_compare_exchange_weak_explicit((_Atomic(uintptr_t)*)p, &x, x, mo, mi_memory_order_relaxed)) { /* nothing */ }; + } + return x; +#endif +} +static inline void mi_atomic_store_explicit(_Atomic(uintptr_t)*p, uintptr_t x, mi_memory_order mo) { + (void)(mo); +#if defined(_M_IX86) || defined(_M_X64) + *p = x; +#else + mi_atomic_exchange_explicit(p, x, mo); +#endif +} +static inline int64_t mi_atomic_loadi64_explicit(_Atomic(int64_t)*p, mi_memory_order mo) { + (void)(mo); +#if defined(_M_X64) + return *p; +#else + int64_t old = *p; + int64_t x = old; + while ((old = InterlockedCompareExchange64(p, x, old)) != x) { + x = old; + } + return x; +#endif +} +static inline void mi_atomic_storei64_explicit(_Atomic(int64_t)*p, int64_t x, mi_memory_order mo) { + (void)(mo); +#if defined(x_M_IX86) || defined(_M_X64) + *p = x; +#else + InterlockedExchange64(p, x); +#endif +} + +// These are used by the statistics +static inline int64_t mi_atomic_addi64_relaxed(volatile _Atomic(int64_t)*p, int64_t add) { +#ifdef _WIN64 + return (int64_t)mi_atomic_addi((int64_t*)p, add); +#else + int64_t current; + int64_t sum; + do { + current = *p; + sum = current + add; + } while (_InterlockedCompareExchange64(p, sum, current) != current); + return current; +#endif +} +static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t x) { + int64_t current; + do { + current = *p; + } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); +} + +static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { + mi_atomic_addi64_relaxed(p, i); +} + +static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { + int64_t read = _InterlockedCompareExchange64(p, des, *exp); + if (read == *exp) { + return true; + } + else { + *exp = read; + return false; + } +} + +// The pointer macros cast to `uintptr_t`. +#define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) +#define mi_atomic_store_ptr_release(tp,p,x) mi_atomic_store_release((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_store_ptr_relaxed(tp,p,x) mi_atomic_store_relaxed((_Atomic(uintptr_t)*)(p),(uintptr_t)(x)) +#define mi_atomic_cas_ptr_weak_release(tp,p,exp,des) mi_atomic_cas_weak_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_weak_acq_rel(tp,p,exp,des) mi_atomic_cas_weak_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_cas_ptr_strong_release(tp,p,exp,des) mi_atomic_cas_strong_release((_Atomic(uintptr_t)*)(p),(uintptr_t*)exp,(uintptr_t)des) +#define mi_atomic_exchange_ptr_release(tp,p,x) (tp*)mi_atomic_exchange_release((_Atomic(uintptr_t)*)(p),(uintptr_t)x) +#define mi_atomic_exchange_ptr_acq_rel(tp,p,x) (tp*)mi_atomic_exchange_acq_rel((_Atomic(uintptr_t)*)(p),(uintptr_t)x) + +#define mi_atomic_loadi64_acquire(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(loadi64_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(storei64_explicit)(p,x,mi_memory_order(relaxed)) + + +#endif + + +// Atomically add a signed value; returns the previous value. +static inline intptr_t mi_atomic_addi(_Atomic(intptr_t)*p, intptr_t add) { + return (intptr_t)mi_atomic_add_acq_rel((_Atomic(uintptr_t)*)p, (uintptr_t)add); +} + +// Atomically subtract a signed value; returns the previous value. +static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { + return (intptr_t)mi_atomic_addi(p, -sub); +} + +typedef _Atomic(uintptr_t) mi_atomic_once_t; + +// Returns true only on the first invocation +static inline bool mi_atomic_once( mi_atomic_once_t* once ) { + if (mi_atomic_load_relaxed(once) != 0) return false; // quick test + uintptr_t expected = 0; + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 +} + +typedef _Atomic(uintptr_t) mi_atomic_guard_t; + +// Allows only one thread to execute at a time +#define mi_atomic_guard(guard) \ + uintptr_t _mi_guard_expected = 0; \ + for(bool _mi_guard_once = true; \ + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ + (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) + + + +// Yield +#if defined(__cplusplus) +#include +static inline void mi_atomic_yield(void) { + std::this_thread::yield(); +} +#elif defined(_WIN32) +#define WIN32_LEAN_AND_MEAN +#include +static inline void mi_atomic_yield(void) { + YieldProcessor(); +} +#elif defined(__SSE2__) +#include +static inline void mi_atomic_yield(void) { + _mm_pause(); +} +#elif (defined(__GNUC__) || defined(__clang__)) && \ + (defined(__x86_64__) || defined(__i386__) || \ + defined(__aarch64__) || defined(__arm__) || \ + defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__)) +#if defined(__x86_64__) || defined(__i386__) +static inline void mi_atomic_yield(void) { + __asm__ volatile ("pause" ::: "memory"); +} +#elif defined(__aarch64__) +static inline void mi_atomic_yield(void) { + __asm__ volatile("wfe"); +} +#elif defined(__arm__) +#if __ARM_ARCH >= 7 +static inline void mi_atomic_yield(void) { + __asm__ volatile("yield" ::: "memory"); +} +#else +static inline void mi_atomic_yield(void) { + __asm__ volatile ("nop" ::: "memory"); +} +#endif +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) +#ifdef __APPLE__ +static inline void mi_atomic_yield(void) { + __asm__ volatile ("or r27,r27,r27" ::: "memory"); +} +#else +static inline void mi_atomic_yield(void) { + __asm__ __volatile__ ("or 27,27,27" ::: "memory"); +} +#endif +#endif +#elif defined(__sun) +// Fallback for other archs +#include +static inline void mi_atomic_yield(void) { + smt_pause(); +} +#elif defined(__wasi__) +#include +static inline void mi_atomic_yield(void) { + sched_yield(); +} +#else +#include +static inline void mi_atomic_yield(void) { + sleep(0); +} +#endif + + +#endif // __MIMALLOC_ATOMIC_H diff --git a/Include/internal/mimalloc/mimalloc/internal.h b/Include/internal/mimalloc/mimalloc/internal.h new file mode 100644 index 0000000000000000000000000000000000000000..1c16152d914509ffb6d6b8da4d778a318b84a45c --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/internal.h @@ -0,0 +1,969 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_INTERNAL_H +#define MIMALLOC_INTERNAL_H + + +// -------------------------------------------------------------------------- +// This file contains the interal API's of mimalloc and various utility +// functions and macros. +// -------------------------------------------------------------------------- + +#include "types.h" +#include "track.h" + +#if (MI_DEBUG>0) +#define mi_trace_message(...) _mi_trace_message(__VA_ARGS__) +#else +#define mi_trace_message(...) +#endif + +#if defined(__EMSCRIPTEN__) && !defined(__wasi__) +#define __wasi__ +#endif + +#if defined(__cplusplus) +#define mi_decl_externc extern "C" +#else +#define mi_decl_externc +#endif + +// pthreads +#if !defined(_WIN32) && !defined(__wasi__) +#define MI_USE_PTHREADS +#include +#endif + +// "options.c" +void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message); +void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...); +void _mi_warning_message(const char* fmt, ...); +void _mi_verbose_message(const char* fmt, ...); +void _mi_trace_message(const char* fmt, ...); +void _mi_options_init(void); +void _mi_error_message(int err, const char* fmt, ...); + +// random.c +void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); +void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); +uintptr_t _mi_random_next(mi_random_ctx_t* ctx); +uintptr_t _mi_heap_random_next(mi_heap_t* heap); +uintptr_t _mi_os_random_weak(uintptr_t extra_seed); +static inline uintptr_t _mi_random_shuffle(uintptr_t x); + +// init.c +extern mi_decl_cache_align mi_stats_t _mi_stats_main; +extern mi_decl_cache_align const mi_page_t _mi_page_empty; +bool _mi_is_main_thread(void); +size_t _mi_current_thread_count(void); +bool _mi_preloading(void); // true while the C runtime is not initialized yet +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +void _mi_thread_done(mi_heap_t* heap); +void _mi_thread_data_collect(void); +void _mi_tld_init(mi_tld_t* tld, mi_heap_t* bheap); + +// os.c +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); + +size_t _mi_os_page_size(void); +size_t _mi_os_good_alloc_size(size_t size); +bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); + +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); +bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_protect(void* addr, size_t size); +bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); + +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); +bool _mi_os_use_large_page(size_t size, size_t alignment); +size_t _mi_os_large_page_size(void); + +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); + +// arena.c +mi_arena_id_t _mi_arena_id_none(void); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_contains(const void* p); +void _mi_arena_collect(bool force_purge, mi_stats_t* stats); +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); + +// "segment-map.c" +void _mi_segment_map_allocated_at(const mi_segment_t* segment); +void _mi_segment_map_freed_at(const mi_segment_t* segment); + +// "segment.c" +extern mi_abandoned_pool_t _mi_abandoned_default; // global abandoned pool +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); +void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); +bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); +void _mi_segment_thread_collect(mi_segments_tld_t* tld); +bool _mi_abandoned_pool_visit_blocks(mi_abandoned_pool_t* pool, uint8_t page_tag, bool visit_blocks, mi_block_visit_fun* visitor, void* arg); + + +#if MI_HUGE_PAGE_ABANDON +void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#else +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#endif + +uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page +void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); +void _mi_abandoned_await_readers(mi_abandoned_pool_t *pool); +void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld); + +// "page.c" +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; + +void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks +void _mi_page_unfull(mi_page_t* page); +void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page +void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread... +void _mi_heap_delayed_free_all(mi_heap_t* heap); +bool _mi_heap_delayed_free_partial(mi_heap_t* heap); +void _mi_heap_collect_retired(mi_heap_t* heap, bool force); + +void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never); +size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append); +void _mi_deferred_free(mi_heap_t* heap, bool force); + +void _mi_page_free_collect(mi_page_t* page,bool force); +void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments + +size_t _mi_bin_size(uint8_t bin); // for stats +uint8_t _mi_bin(size_t size); // for stats + +// "heap.c" +void _mi_heap_init_ex(mi_heap_t* heap, mi_tld_t* tld, mi_arena_id_t arena_id, bool no_reclaim, uint8_t tag); +void _mi_heap_destroy_pages(mi_heap_t* heap); +void _mi_heap_collect_abandon(mi_heap_t* heap); +void _mi_heap_set_default_direct(mi_heap_t* heap); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); +void _mi_heap_area_init(mi_heap_area_t* area, mi_page_t* page); +bool _mi_heap_area_visit_blocks(const mi_heap_area_t* area, mi_page_t *page, mi_block_visit_fun* visitor, void* arg); + +// "stats.c" +void _mi_stats_done(mi_stats_t* stats); +mi_msecs_t _mi_clock_now(void); +mi_msecs_t _mi_clock_end(mi_msecs_t start); +mi_msecs_t _mi_clock_start(void); + +// "alloc.c" +void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` +void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` +void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; +mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); +bool _mi_free_delayed_block(mi_block_t* block); +void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size); + +// option.c, c primitives +char _mi_toupper(char c); +int _mi_strnicmp(const char* s, const char* t, size_t n); +void _mi_strlcpy(char* dest, const char* src, size_t dest_size); +void _mi_strlcat(char* dest, const char* src, size_t dest_size); +size_t _mi_strlen(const char* s); +size_t _mi_strnlen(const char* s, size_t max_len); + + +#if MI_DEBUG>1 +bool _mi_page_is_valid(mi_page_t* page); +#endif + + +// ------------------------------------------------------ +// Branches +// ------------------------------------------------------ + +#if defined(__GNUC__) || defined(__clang__) +#define mi_unlikely(x) (__builtin_expect(!!(x),false)) +#define mi_likely(x) (__builtin_expect(!!(x),true)) +#elif (defined(__cplusplus) && (__cplusplus >= 202002L)) || (defined(_MSVC_LANG) && _MSVC_LANG >= 202002L) +#define mi_unlikely(x) (x) [[unlikely]] +#define mi_likely(x) (x) [[likely]] +#else +#define mi_unlikely(x) (x) +#define mi_likely(x) (x) +#endif + +#ifndef __has_builtin +#define __has_builtin(x) 0 +#endif + + +/* ----------------------------------------------------------- + Error codes passed to `_mi_fatal_error` + All are recoverable but EFAULT is a serious error and aborts by default in secure mode. + For portability define undefined error codes using common Unix codes: + +----------------------------------------------------------- */ +#include +#ifndef EAGAIN // double free +#define EAGAIN (11) +#endif +#ifndef ENOMEM // out of memory +#define ENOMEM (12) +#endif +#ifndef EFAULT // corrupted free-list or meta-data +#define EFAULT (14) +#endif +#ifndef EINVAL // trying to free an invalid pointer +#define EINVAL (22) +#endif +#ifndef EOVERFLOW // count*size overflow +#define EOVERFLOW (75) +#endif + + +/* ----------------------------------------------------------- + Inlined definitions +----------------------------------------------------------- */ +#define MI_UNUSED(x) (void)(x) +#if (MI_DEBUG>0) +#define MI_UNUSED_RELEASE(x) +#else +#define MI_UNUSED_RELEASE(x) MI_UNUSED(x) +#endif + +#define MI_INIT4(x) x(),x(),x(),x() +#define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x) +#define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x) +#define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x) +#define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x) +#define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x) +#define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) + + +#include +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + +// Is `x` a power of two? (0 is considered a power of two) +static inline bool _mi_is_power_of_two(uintptr_t x) { + return ((x & (x - 1)) == 0); +} + +// Is a pointer aligned? +static inline bool _mi_is_aligned(void* p, size_t alignment) { + mi_assert_internal(alignment != 0); + return (((uintptr_t)p % alignment) == 0); +} + +// Align upwards +static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return ((sz + mask) & ~mask); + } + else { + return (((sz + mask)/alignment)*alignment); + } +} + +// Align downwards +static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) { + mi_assert_internal(alignment != 0); + uintptr_t mask = alignment - 1; + if ((alignment & mask) == 0) { // power of two? + return (sz & ~mask); + } + else { + return ((sz / alignment) * alignment); + } +} + +// Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`. +static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { + mi_assert_internal(divider != 0); + return (divider == 0 ? size : ((size + divider - 1) / divider)); +} + +// Is memory zero initialized? +static inline bool mi_mem_is_zero(const void* p, size_t size) { + for (size_t i = 0; i < size; i++) { + if (((uint8_t*)p)[i] != 0) return false; + } + return true; +} + + +// Align a byte size to a size in _machine words_, +// i.e. byte size == `wsize*sizeof(void*)`. +static inline size_t _mi_wsize_from_size(size_t size) { + mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t)); + return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t); +} + +// Overflow detecting multiply +#if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5)) +#include // UINT_MAX, ULONG_MAX +#if defined(_CLOCK_T) // for Illumos +#undef _CLOCK_T +#endif +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #if (SIZE_MAX == ULONG_MAX) + return __builtin_umull_overflow(count, size, (unsigned long *)total); + #elif (SIZE_MAX == UINT_MAX) + return __builtin_umul_overflow(count, size, (unsigned int *)total); + #else + return __builtin_umulll_overflow(count, size, (unsigned long long *)total); + #endif +} +#else /* __builtin_umul_overflow is unavailable */ +static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) { + #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX) + *total = count * size; + // note: gcc/clang optimize this to directly check the overflow flag + return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW) && size > 0 && (SIZE_MAX / size) < count); +} +#endif + +// Safe multiply `count*size` into `total`; return `true` on overflow. +static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) { + if (count==1) { // quick check for the case where count is one (common for C++ allocators) + *total = size; + return false; + } + else if mi_unlikely(mi_mul_overflow(count, size, total)) { + #if MI_DEBUG > 0 + _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size); + #endif + *total = SIZE_MAX; + return true; + } + else return false; +} + + +/*---------------------------------------------------------------------------------------- + Heap functions +------------------------------------------------------------------------------------------- */ + +extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap + +static inline bool mi_heap_is_backing(const mi_heap_t* heap) { + return (heap->tld->heap_backing == heap); +} + +static inline bool mi_heap_is_initialized(mi_heap_t* heap) { + mi_assert_internal(heap != NULL); + return (heap != &_mi_heap_empty); +} + +static inline uintptr_t _mi_ptr_cookie(const void* p) { + extern mi_heap_t _mi_heap_main; + mi_assert_internal(_mi_heap_main.cookie != 0); + return ((uintptr_t)p ^ _mi_heap_main.cookie); +} + +/* ----------------------------------------------------------- + Pages +----------------------------------------------------------- */ + +static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) { + mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE)); + const size_t idx = _mi_wsize_from_size(size); + mi_assert_internal(idx < MI_PAGES_DIRECT); + return heap->pages_free_direct[idx]; +} + +// Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. +static inline mi_segment_t* _mi_ptr_segment(const void* p) { + mi_assert_internal(p != NULL); + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); +} + +static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { + mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0); + return (mi_page_t*)(s); +} + +static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) { + mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0); + return (mi_slice_t*)(p); +} + +// Segment belonging to a page +static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) { + mi_segment_t* segment = _mi_ptr_segment(page); + mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries)); + return segment; +} + +static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { + mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset); + mi_assert_internal(start >= _mi_ptr_segment(slice)->slices); + mi_assert_internal(start->slice_offset == 0); + mi_assert_internal(start + start->slice_count > slice); + return start; +} + +// Get the page containing the pointer (performance critical as it is called in mi_free) +static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + mi_assert_internal(p > (void*)segment); + ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); + size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; + mi_assert_internal(idx <= segment->slice_entries); + mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; + mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data + mi_assert_internal(slice->slice_offset == 0); + mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries); + return mi_slice_to_page(slice); +} + +// Quick page start for initialized pages +static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) { + return _mi_segment_page_start(segment, page, page_size); +} + +// Get the page containing the pointer +static inline mi_page_t* _mi_ptr_page(void* p) { + return _mi_segment_page_of(_mi_ptr_segment(p), p); +} + +// Get the block size of a page (special case for huge objects) +static inline size_t mi_page_block_size(const mi_page_t* page) { + const size_t bsize = page->xblock_size; + mi_assert_internal(bsize > 0); + if mi_likely(bsize < MI_HUGE_BLOCK_SIZE) { + return bsize; + } + else { + size_t psize; + _mi_segment_page_start(_mi_page_segment(page), page, &psize); + return psize; + } +} + +static inline bool mi_page_is_huge(const mi_page_t* page) { + return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); +} + +// Get the usable block size of a page without fixed padding. +// This may still include internal padding due to alignment and rounding up size classes. +static inline size_t mi_page_usable_block_size(const mi_page_t* page) { + return mi_page_block_size(page) - MI_PADDING_SIZE; +} + +// size of a segment +static inline size_t mi_segment_size(mi_segment_t* segment) { + return segment->segment_slices * MI_SEGMENT_SLICE_SIZE; +} + +static inline uint8_t* mi_segment_end(mi_segment_t* segment) { + return (uint8_t*)segment + mi_segment_size(segment); +} + +// Thread free access +static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) { + return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3); +} + +static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) { + return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3); +} + +// Heap access +static inline mi_heap_t* mi_page_heap(const mi_page_t* page) { + return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap)); +} + +static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) { + mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING); + mi_atomic_store_release(&page->xheap,(uintptr_t)heap); +} + +// Thread free flag helpers +static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) { + return (mi_block_t*)(tf & ~0x03); +} +static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) { + return (mi_delayed_t)(tf & 0x03); +} +static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) { + return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed); +} +static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) { + return mi_tf_make(mi_tf_block(tf),delayed); +} +static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) { + return mi_tf_make(block, mi_tf_delayed(tf)); +} + +// are all blocks in a page freed? +// note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`. +static inline bool mi_page_all_free(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->used == 0); +} + +// are there any available blocks? +static inline bool mi_page_has_any_available(const mi_page_t* page) { + mi_assert_internal(page != NULL && page->reserved > 0); + return (page->used < page->reserved || (mi_page_thread_free(page) != NULL)); +} + +// are there immediately available blocks, i.e. blocks available on the free list. +static inline bool mi_page_immediate_available(const mi_page_t* page) { + mi_assert_internal(page != NULL); + return (page->free != NULL); +} + +// is more than 7/8th of a page in use? +static inline bool mi_page_mostly_used(const mi_page_t* page) { + if (page==NULL) return true; + uint16_t frac = page->reserved / 8U; + return (page->reserved - page->used <= frac); +} + +static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) { + return &((mi_heap_t*)heap)->pages[_mi_bin(size)]; +} + + + +//----------------------------------------------------------- +// Page flags +//----------------------------------------------------------- +static inline bool mi_page_is_in_full(const mi_page_t* page) { + return page->flags.x.in_full; +} + +static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) { + page->flags.x.in_full = in_full; +} + +static inline bool mi_page_has_aligned(const mi_page_t* page) { + return page->flags.x.has_aligned; +} + +static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) { + page->flags.x.has_aligned = has_aligned; +} + + +/* ------------------------------------------------------------------- +Encoding/Decoding the free list next pointers + +This is to protect against buffer overflow exploits where the +free list is mutated. Many hardened allocators xor the next pointer `p` +with a secret key `k1`, as `p^k1`. This prevents overwriting with known +values but might be still too weak: if the attacker can guess +the pointer `p` this can reveal `k1` (since `p^k1^p == k1`). +Moreover, if multiple blocks can be read as well, the attacker can +xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot +about the pointers (and subsequently `k1`). + +Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<> (MI_INTPTR_BITS - shift)))); +} +static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) { + shift %= MI_INTPTR_BITS; + return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift)))); +} + +static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) { + void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]); + return (p==null ? NULL : p); +} + +static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) { + uintptr_t x = (uintptr_t)(p==NULL ? null : p); + return mi_rotl(x ^ keys[1], keys[0]) + keys[0]; +} + +static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) { + mi_track_mem_defined(block,sizeof(mi_block_t)); + mi_block_t* next; + #ifdef MI_ENCODE_FREELIST + next = (mi_block_t*)mi_ptr_decode(null, mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next), keys); + #else + MI_UNUSED(keys); MI_UNUSED(null); + next = (mi_block_t*)mi_atomic_load_relaxed((_Atomic(mi_encoded_t)*)&block->next); + #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); + return next; +} + +static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { + mi_track_mem_undefined(block,sizeof(mi_block_t)); + #ifdef MI_ENCODE_FREELIST + mi_atomic_store_relaxed(&block->next, mi_ptr_encode(null, next, keys)); + #else + MI_UNUSED(keys); MI_UNUSED(null); + mi_atomic_store_relaxed(&block->next, (mi_encoded_t)next); + #endif + mi_track_mem_noaccess(block,sizeof(mi_block_t)); +} + +static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) { + #ifdef MI_ENCODE_FREELIST + mi_block_t* next = mi_block_nextx(page,block,page->keys); + // check for free list corruption: is `next` at least in the same page? + // TODO: check if `next` is `page->block_size` aligned? + if mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next)) { + _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next); + next = NULL; + } + return next; + #else + MI_UNUSED(page); + return mi_block_nextx(page,block,NULL); + #endif +} + +static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) { + #ifdef MI_ENCODE_FREELIST + mi_block_set_nextx(page,block,next, page->keys); + #else + MI_UNUSED(page); + mi_block_set_nextx(page,block,next,NULL); + #endif +} + + +// ------------------------------------------------------------------- +// commit mask +// ------------------------------------------------------------------- + +static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + cm->mask[i] = 0; + } +} + +static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + cm->mask[i] = ~((size_t)0); + } +} + +static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if (cm->mask[i] != 0) return false; + } + return true; +} + +static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) { + for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) { + if (cm->mask[i] != ~((size_t)0)) return false; + } + return true; +} + +// defined in `segment.c`: +size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total); +size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); + +#define mi_commit_mask_foreach(cm,idx,count) \ + idx = 0; \ + while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) { + +#define mi_commit_mask_foreach_end() \ + idx += count; \ + } + + + +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.initially_committed = committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return memid; +} + + +// ------------------------------------------------------------------- +// Fast "random" shuffle +// ------------------------------------------------------------------- + +static inline uintptr_t _mi_random_shuffle(uintptr_t x) { + if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros +#if (MI_INTPTR_SIZE==8) + // by Sebastiano Vigna, see: + x ^= x >> 30; + x *= 0xbf58476d1ce4e5b9UL; + x ^= x >> 27; + x *= 0x94d049bb133111ebUL; + x ^= x >> 31; +#elif (MI_INTPTR_SIZE==4) + // by Chris Wellons, see: + x ^= x >> 16; + x *= 0x7feb352dUL; + x ^= x >> 15; + x *= 0x846ca68bUL; + x ^= x >> 16; +#endif + return x; +} + +// ------------------------------------------------------------------- +// Optimize numa node access for the common case (= one node) +// ------------------------------------------------------------------- + +int _mi_os_numa_node_get(mi_os_tld_t* tld); +size_t _mi_os_numa_node_count_get(void); + +extern _Atomic(size_t) _mi_numa_node_count; +static inline int _mi_os_numa_node(mi_os_tld_t* tld) { + if mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1) { return 0; } + else return _mi_os_numa_node_get(tld); +} +static inline size_t _mi_os_numa_node_count(void) { + const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count); + if mi_likely(count > 0) { return count; } + else return _mi_os_numa_node_count_get(); +} + + + +// ----------------------------------------------------------------------- +// Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero) +// ----------------------------------------------------------------------- + +#if defined(__GNUC__) + +#include // LONG_MAX +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_clzl(x); +#else + return __builtin_clzll(x); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (INTPTR_MAX == LONG_MAX) + return __builtin_ctzl(x); +#else + return __builtin_ctzll(x); +#endif +} + +#elif defined(_MSC_VER) + +#include // LONG_MAX +#include // BitScanReverse64 +#define MI_HAVE_FAST_BITSCAN +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanReverse(&idx, x); +#else + _BitScanReverse64(&idx, x); +#endif + return ((MI_INTPTR_BITS - 1) - idx); +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; + unsigned long idx; +#if (INTPTR_MAX == LONG_MAX) + _BitScanForward(&idx, x); +#else + _BitScanForward64(&idx, x); +#endif + return idx; +} + +#else +static inline size_t mi_ctz32(uint32_t x) { + // de Bruijn multiplication, see + static const unsigned char debruijn[32] = { + 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8, + 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9 + }; + if (x==0) return 32; + return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27]; +} +static inline size_t mi_clz32(uint32_t x) { + // de Bruijn multiplication, see + static const uint8_t debruijn[32] = { + 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1, + 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0 + }; + if (x==0) return 32; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27]; +} + +static inline size_t mi_clz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_clz32((uint32_t)x); +#else + size_t count = mi_clz32((uint32_t)(x >> 32)); + if (count < 32) return count; + return (32 + mi_clz32((uint32_t)x)); +#endif +} +static inline size_t mi_ctz(uintptr_t x) { + if (x==0) return MI_INTPTR_BITS; +#if (MI_INTPTR_BITS <= 32) + return mi_ctz32((uint32_t)x); +#else + size_t count = mi_ctz32((uint32_t)x); + if (count < 32) return count; + return (32 + mi_ctz32((uint32_t)(x>>32))); +#endif +} + +#endif + +// "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero) +static inline size_t mi_bsr(uintptr_t x) { + return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x)); +} + + +// --------------------------------------------------------------------------------- +// Provide our own `_mi_memcpy` for potential performance optimizations. +// +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. +// --------------------------------------------------------------------------------- + +#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) +#include +extern bool _mi_cpu_has_fsrm; +static inline void _mi_memcpy(void* dst, const void* src, size_t n) { + if (_mi_cpu_has_fsrm) { + __movsb((unsigned char*)dst, (const unsigned char*)src, n); + } + else { + memcpy(dst, src, n); + } +} +static inline void _mi_memzero(void* dst, size_t n) { + if (_mi_cpu_has_fsrm) { + __stosb((unsigned char*)dst, 0, n); + } + else { + memset(dst, 0, n); + } +} +#else +static inline void _mi_memcpy(void* dst, const void* src, size_t n) { + memcpy(dst, src, n); +} +static inline void _mi_memzero(void* dst, size_t n) { + memset(dst, 0, n); +} +#endif + +// ------------------------------------------------------------------------------- +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned +// This is used for example in `mi_realloc`. +// ------------------------------------------------------------------------------- + +#if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) +// On GCC/CLang we provide a hint that the pointers are word aligned. +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE); + _mi_memcpy(adst, asrc, n); +} + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); + _mi_memzero(adst, n); +} +#else +// Default fallback on `_mi_memcpy` +static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { + mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); + _mi_memcpy(dst, src, n); +} + +static inline void _mi_memzero_aligned(void* dst, size_t n) { + mi_assert_internal((uintptr_t)dst % MI_INTPTR_SIZE == 0); + _mi_memzero(dst, n); +} +#endif + + +#endif diff --git a/Include/internal/mimalloc/mimalloc/prim.h b/Include/internal/mimalloc/mimalloc/prim.h new file mode 100644 index 0000000000000000000000000000000000000000..322ab29e6b41c24bfaaa94c6d5af103f71bfc285 --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/prim.h @@ -0,0 +1,329 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_PRIM_H +#define MIMALLOC_PRIM_H + + +// -------------------------------------------------------------------------- +// This file specifies the primitive portability API. +// Each OS/host needs to implement these primitives, see `src/prim` +// for implementations on Window, macOS, WASI, and Linux/Unix. +// +// note: on all primitive functions, we always have result parameters != NUL, and: +// addr != NULL and page aligned +// size > 0 and page aligned +// return value is an error code an int where 0 is success. +// -------------------------------------------------------------------------- + +// OS memory configuration +typedef struct mi_os_mem_config_s { + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) +} mi_os_mem_config_t; + +// Initialize +void _mi_prim_mem_init( mi_os_mem_config_t* config ); + +// Free OS memory +int _mi_prim_free(void* addr, size_t size ); + +// Allocate OS memory. Return NULL on error. +// The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// which will later be committed explicitly using `_mi_prim_commit`. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: !commit => !allow_large +// try_alignment >= _mi_os_page_size() and a power of 2 +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); + +// Commit memory. Returns error code or 0 on success. +// For example, on Linux this would make the memory PROT_READ|PROT_WRITE. +// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) +int _mi_prim_commit(void* addr, size_t size, bool* is_zero); + +// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true +// if the memory would need to be re-committed. For example, on Windows this is always true, +// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. +// pre: needs_recommit != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); + +// Reset memory. The range keeps being accessible but the content might be reset. +// Returns error code or 0 on success. +int _mi_prim_reset(void* addr, size_t size); + +// Protect memory. Returns error code or 0 on success. +int _mi_prim_protect(void* addr, size_t size, bool protect); + +// Allocate huge (1GiB) pages possibly associated with a NUMA node. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) +// pre: size > 0 and a multiple of 1GiB. +// numa_node is either negative (don't care), or a numa node number. +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); + +// Return the current NUMA node +size_t _mi_prim_numa_node(void); + +// Return the number of logical NUMA nodes +size_t _mi_prim_numa_node_count(void); + +// Clock ticks +mi_msecs_t _mi_prim_clock_now(void); + +// Return process information (only for statistics) +typedef struct mi_process_info_s { + mi_msecs_t elapsed; + mi_msecs_t utime; + mi_msecs_t stime; + size_t current_rss; + size_t peak_rss; + size_t current_commit; + size_t peak_commit; + size_t page_faults; +} mi_process_info_t; + +void _mi_prim_process_info(mi_process_info_t* pinfo); + +// Default stderr output. (only for warnings etc. with verbose enabled) +// msg != NULL && _mi_strlen(msg) > 0 +void _mi_prim_out_stderr( const char* msg ); + +// Get an environment variable. (only for options) +// name != NULL, result != NULL, result_size >= 64 +bool _mi_prim_getenv(const char* name, char* result, size_t result_size); + + +// Fill a buffer with strong randomness; return `false` on error or if +// there is no strong randomization available. +bool _mi_prim_random_buf(void* buf, size_t buf_len); + +// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination. +void _mi_prim_thread_init_auto_done(void); + +// Called on process exit and may take action to clean up resources associated with the thread auto done. +void _mi_prim_thread_done_auto_done(void); + +// Called when the default heap for a thread changes +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap); + + +//------------------------------------------------------------------- +// Thread id: `_mi_prim_thread_id()` +// +// Getting the thread id should be performant as it is called in the +// fast path of `_mi_free` and we specialize for various platforms as +// inlined definitions. Regular code should call `init.c:_mi_thread_id()`. +// We only require _mi_prim_thread_id() to return a unique id +// for each thread (unequal to zero). +//------------------------------------------------------------------- + +// defined in `init.c`; do not use these directly +extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from +extern bool _mi_process_is_initialized; // has mi_process_init been called? + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept; + +#ifdef MI_PRIM_THREAD_ID + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return MI_PRIM_THREAD_ID(); +} + +#elif defined(_WIN32) + +#define WIN32_LEAN_AND_MEAN +#include +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + // Windows: works on Intel and ARM in both 32- and 64-bit + return (uintptr_t)NtCurrentTeb(); +} + +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// both the OS and libc implementation so we use specific tests for each main platform. +// If you test on another platform and it works please send a PR :-) +// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. +#elif defined(__GNUC__) && ( \ + (defined(__GLIBC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \ + || (defined(__APPLE__) && (defined(__x86_64__) || defined(__aarch64__))) \ + || (defined(__BIONIC__) && (defined(__x86_64__) || defined(__i386__) || (defined(__arm__) && __ARM_ARCH >= 7) || defined(__aarch64__))) \ + || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \ + ) + +static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept { + void* res; + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + res = tcb[slot]; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + res = tcb[slot]; + #endif + return res; +} + +// setting a tls slot is only used on macOS for now +static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept { + const size_t ofs = (slot*sizeof(void*)); + #if defined(__i386__) + __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS + #elif defined(__APPLE__) && defined(__x86_64__) + __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOS uses GS + #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4) + __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI + #elif defined(__x86_64__) + __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS + #elif defined(__arm__) + void** tcb; MI_UNUSED(ofs); + __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb)); + tcb[slot] = value; + #elif defined(__aarch64__) + void** tcb; MI_UNUSED(ofs); + #if defined(__APPLE__) // M1, issue #343 + __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb)); + #else + __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb)); + #endif + tcb[slot] = value; + #endif +} + +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + #if defined(__BIONIC__) + // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id + // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86 + return (uintptr_t)mi_prim_tls_slot(1); + #else + // in all our other targets, slot 0 is the thread id + // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h + // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36 + return (uintptr_t)mi_prim_tls_slot(0); + #endif +} + +#else + +// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms). +static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept { + return (uintptr_t)&_mi_heap_default; +} + +#endif + + + +/* ---------------------------------------------------------------------------------------- +The thread local default heap: `_mi_prim_get_default_heap()` +This is inlined here as it is on the fast path for allocation functions. + +On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a +__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures +that the storage will always be available (allocated on the thread stacks). + +On some platforms though we cannot use that when overriding `malloc` since the underlying +TLS implementation (or the loader) will call itself `malloc` on a first access and recurse. +We try to circumvent this in an efficient way: +- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the + loader itself calls `malloc` even before the modules are initialized. +- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS). +- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323) +------------------------------------------------------------------------------------------- */ + +static inline mi_heap_t* mi_prim_get_default_heap(void); + +#if defined(MI_MALLOC_OVERRIDE) +#if defined(__APPLE__) // macOS + #define MI_TLS_SLOT 89 // seems unused? + // #define MI_TLS_RECURSE_GUARD 1 + // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) + // see +#elif defined(__OpenBSD__) + // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) + // see + #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) + // #elif defined(__DragonFly__) + // #warning "mimalloc is not working correctly on DragonFly yet." + // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) +#elif defined(__ANDROID__) + // See issue #381 + #define MI_TLS_PTHREAD +#endif +#endif + + +#if defined(MI_TLS_SLOT) + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT); + if mi_unlikely(heap == NULL) { + #ifdef __GNUC__ + __asm(""); // prevent conditional load of the address of _mi_heap_empty + #endif + heap = (mi_heap_t*)&_mi_heap_empty; + } + return heap; +} + +#elif defined(MI_TLS_PTHREAD_SLOT_OFS) + +static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) { + pthread_t self = pthread_self(); + #if defined(__DragonFly__) + if (self==NULL) return NULL; + #endif + return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS); +} + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot(); + if mi_unlikely(pheap == NULL) return _mi_heap_main_get(); + mi_heap_t* heap = *pheap; + if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty; + return heap; +} + +#elif defined(MI_TLS_PTHREAD) + +extern pthread_key_t _mi_heap_default_key; +static inline mi_heap_t* mi_prim_get_default_heap(void) { + mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); + return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); +} + +#else // default using a thread local variable; used on most platforms. + +static inline mi_heap_t* mi_prim_get_default_heap(void) { + #if defined(MI_TLS_RECURSE_GUARD) + if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); + #endif + return _mi_heap_default; +} + +#endif // mi_prim_get_default_heap() + + + +#endif // MIMALLOC_PRIM_H diff --git a/Include/internal/mimalloc/mimalloc/track.h b/Include/internal/mimalloc/mimalloc/track.h new file mode 100644 index 0000000000000000000000000000000000000000..fa1a048d846a9cfcb42bea37813be445847bf344 --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/track.h @@ -0,0 +1,147 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TRACK_H +#define MIMALLOC_TRACK_H + +/* ------------------------------------------------------------------------------------------------------ +Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers. +These can be defined for tracking allocation: + + #define mi_track_malloc_size(p,reqsize,size,zero) + #define mi_track_free_size(p,_size) + +The macros are set up such that the size passed to `mi_track_free_size` +always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`). +The `reqsize` is what the user requested, and `size >= reqsize`. +The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled, +or otherwise it is the usable block size which may be larger than the original request. +Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc). +The `zero` parameter is `true` if the allocated block is zero initialized. + +Optional: + + #define mi_track_align(p,alignedp,offset,size) + #define mi_track_resize(p,oldsize,newsize) + #define mi_track_init() + +The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block. +The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`). +The `mi_track_resize` is currently unused but could be called on reallocations within a block. +`mi_track_init` is called at program start. + +The following macros are for tools like asan and valgrind to track whether memory is +defined, undefined, or not accessible at all: + + #define mi_track_mem_defined(p,size) + #define mi_track_mem_undefined(p,size) + #define mi_track_mem_noaccess(p,size) + +-------------------------------------------------------------------------------------------------------*/ + +#if MI_TRACK_VALGRIND +// valgrind tool + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 // track free of individual blocks on heap_destroy +#define MI_TRACK_TOOL "valgrind" + +#include +#include + +#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) +#define mi_track_free_size(p,_size) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) +#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) +#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) +#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) + +#elif MI_TRACK_ASAN +// address sanitizer + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "asan" + +#include + +#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) + +#elif MI_TRACK_ETW +// windows event tracing + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_HEAP_DESTROY 1 +#define MI_TRACK_TOOL "ETW" + +#define WIN32_LEAN_AND_MEAN +#include +#include "../src/prim/windows/etw.h" + +#define mi_track_init() EventRegistermicrosoft_windows_mimalloc(); +#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size) +#define mi_track_free_size(p,size) EventWriteETW_MI_FREE((UINT64)(p), size) + +#else +// no tracking + +#define MI_TRACK_ENABLED 0 +#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_TOOL "none" + +#define mi_track_malloc_size(p,reqsize,size,zero) +#define mi_track_free_size(p,_size) + +#endif + +// ------------------- +// Utility definitions + +#ifndef mi_track_resize +#define mi_track_resize(p,oldsize,newsize) mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false) +#endif + +#ifndef mi_track_align +#define mi_track_align(p,alignedp,offset,size) mi_track_mem_noaccess(p,offset) +#endif + +#ifndef mi_track_init +#define mi_track_init() +#endif + +#ifndef mi_track_mem_defined +#define mi_track_mem_defined(p,size) +#endif + +#ifndef mi_track_mem_undefined +#define mi_track_mem_undefined(p,size) +#endif + +#ifndef mi_track_mem_noaccess +#define mi_track_mem_noaccess(p,size) +#endif + + +#if MI_PADDING +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)==(reqsize)); \ + mi_track_malloc_size(p,reqsize,reqsize,zero); \ + } +#else +#define mi_track_malloc(p,reqsize,zero) \ + if ((p)!=NULL) { \ + mi_assert_internal(mi_usable_size(p)>=(reqsize)); \ + mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \ + } +#endif + +#endif diff --git a/Include/internal/mimalloc/mimalloc/types.h b/Include/internal/mimalloc/mimalloc/types.h new file mode 100644 index 0000000000000000000000000000000000000000..70c600e920bad198bcf8810ea068c59471433b9f --- /dev/null +++ b/Include/internal/mimalloc/mimalloc/types.h @@ -0,0 +1,721 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ +#pragma once +#ifndef MIMALLOC_TYPES_H +#define MIMALLOC_TYPES_H + +// -------------------------------------------------------------------------- +// This file contains the main type definitions for mimalloc: +// mi_heap_t : all data for a thread-local heap, contains +// lists of all managed heap pages. +// mi_segment_t : a larger chunk of memory (32GiB) from where pages +// are allocated. +// mi_page_t : a mimalloc page (usually 64KiB or 512KiB) from +// where objects are allocated. +// -------------------------------------------------------------------------- + + +#include // ptrdiff_t +#include // uintptr_t, uint16_t, etc +#include "atomic.h" // _Atomic + +#ifdef _MSC_VER +#pragma warning(disable:4214) // bitfield is not int +#endif + +// Minimal alignment necessary. On most platforms 16 bytes are needed +// due to SSE registers for example. This must be at least `sizeof(void*)` +#ifndef MI_MAX_ALIGN_SIZE +#define MI_MAX_ALIGN_SIZE 16 // sizeof(max_align_t) +#endif + +#define MI_CACHE_LINE 64 +#if defined(_MSC_VER) +#pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths) +#pragma warning(disable:26812) // unscoped enum warning +#define mi_decl_noinline __declspec(noinline) +#define mi_decl_thread __declspec(thread) +#define mi_decl_cache_align __declspec(align(MI_CACHE_LINE)) +#elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc +#define mi_decl_noinline __attribute__((noinline)) +#define mi_decl_thread __thread +#define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE))) +#else +#define mi_decl_noinline +#define mi_decl_thread __thread // hope for the best :-) +#define mi_decl_cache_align +#endif + +// ------------------------------------------------------ +// Variants +// ------------------------------------------------------ + +// Define NDEBUG in the release version to disable assertions. +// #define NDEBUG + +// Define MI_TRACK_ to enable tracking support +// #define MI_TRACK_VALGRIND 1 +// #define MI_TRACK_ASAN 1 +// #define MI_TRACK_ETW 1 + +// Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance). +// #define MI_STAT 1 + +// Define MI_SECURE to enable security mitigations +// #define MI_SECURE 1 // guard page around metadata +// #define MI_SECURE 2 // guard page around each mimalloc page +// #define MI_SECURE 3 // encode free lists (detect corrupted free list (buffer overflow), and invalid pointer free) +// #define MI_SECURE 4 // checks for double free. (may be more expensive) + +#if !defined(MI_SECURE) +#define MI_SECURE 0 +#endif + +// Define MI_DEBUG for debug mode +// #define MI_DEBUG 1 // basic assertion checks and statistics, check double free, corrupted free list, and invalid pointer free. +// #define MI_DEBUG 2 // + internal assertion checks +// #define MI_DEBUG 3 // + extensive internal invariant checking (cmake -DMI_DEBUG_FULL=ON) +#if !defined(MI_DEBUG) +#if !defined(NDEBUG) || defined(_DEBUG) +#define MI_DEBUG 2 +#else +#define MI_DEBUG 0 +#endif +#endif + +// Reserve extra padding at the end of each block to be more resilient against heap block overflows. +// The padding can detect buffer overflow on free. +#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW)) +#define MI_PADDING 1 +#endif + +// Check padding bytes; allows byte-precise buffer overflow detection +#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_PADDING_CHECK 1 +#endif + + +// Encoded free lists allow detection of corrupted free lists +// and can detect buffer overflows, modify after free, and double `free`s. +#if (MI_SECURE>=3 || MI_DEBUG>=1) +#define MI_ENCODE_FREELIST 1 +#endif + + +// We used to abandon huge pages but to eagerly deallocate if freed from another thread, +// but that makes it not possible to visit them during a heap walk or include them in a +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from +// another thread so most memory is available until it gets properly freed by the owning thread. +// #define MI_HUGE_PAGE_ABANDON 1 + + +// ------------------------------------------------------ +// Platform specific values +// ------------------------------------------------------ + +// ------------------------------------------------------ +// Size of a pointer. +// We assume that `sizeof(void*)==sizeof(intptr_t)` +// and it holds for all platforms we know of. +// +// However, the C standard only requires that: +// p == (void*)((intptr_t)p)) +// but we also need: +// i == (intptr_t)((void*)i) +// or otherwise one might define an intptr_t type that is larger than a pointer... +// ------------------------------------------------------ + +#if INTPTR_MAX > INT64_MAX +# define MI_INTPTR_SHIFT (4) // assume 128-bit (as on arm CHERI for example) +#elif INTPTR_MAX == INT64_MAX +# define MI_INTPTR_SHIFT (3) +#elif INTPTR_MAX == INT32_MAX +# define MI_INTPTR_SHIFT (2) +#else +#error platform pointers must be 32, 64, or 128 bits +#endif + +#if SIZE_MAX == UINT64_MAX +# define MI_SIZE_SHIFT (3) +typedef int64_t mi_ssize_t; +#elif SIZE_MAX == UINT32_MAX +# define MI_SIZE_SHIFT (2) +typedef int32_t mi_ssize_t; +#else +#error platform objects must be 32 or 64 bits +#endif + +#if (SIZE_MAX/2) > LONG_MAX +# define MI_ZU(x) x##ULL +# define MI_ZI(x) x##LL +#else +# define MI_ZU(x) x##UL +# define MI_ZI(x) x##L +#endif + +#define MI_INTPTR_SIZE (1< 4 +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB +#else +#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit +#endif + +#define MI_SMALL_PAGE_SHIFT (MI_SEGMENT_SLICE_SHIFT) // 64KiB +#define MI_MEDIUM_PAGE_SHIFT ( 3 + MI_SMALL_PAGE_SHIFT) // 512KiB + + +// Derived constants +#define MI_SEGMENT_SIZE (MI_ZU(1)<= 655360) +#error "mimalloc internal: define more bins" +#endif + +// Maximum slice offset (15) +#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) + +// Used as a special value to encode block sizes in 32 bits. +#define MI_HUGE_BLOCK_SIZE ((uint32_t)(2*MI_GiB)) + +// blocks up to this size are always allocated aligned +#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) + +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) + + +// ------------------------------------------------------ +// Mimalloc pages contain allocated blocks +// ------------------------------------------------------ + +// The free lists use encoded next fields +// (Only actually encodes when MI_ENCODED_FREELIST is defined.) +typedef uintptr_t mi_encoded_t; + +// thread id's +typedef size_t mi_threadid_t; + +// free lists contain blocks +typedef struct mi_block_s { + _Atomic(mi_encoded_t) next; +} mi_block_t; + + +// The delayed flags are used for efficient multi-threaded free-ing +typedef enum mi_delayed_e { + MI_USE_DELAYED_FREE = 0, // push on the owning heap thread delayed list + MI_DELAYED_FREEING = 1, // temporary: another thread is accessing the owning heap + MI_NO_DELAYED_FREE = 2, // optimize: push on page local thread free queue if another block is already in the heap thread delayed free list + MI_NEVER_DELAYED_FREE = 3 // sticky, only resets on page reclaim +} mi_delayed_t; + + +// The `in_full` and `has_aligned` page flags are put in a union to efficiently +// test if both are false (`full_aligned == 0`) in the `mi_free` routine. +#if !MI_TSAN +typedef union mi_page_flags_s { + uint8_t full_aligned; + struct { + uint8_t in_full : 1; + uint8_t has_aligned : 1; + } x; +} mi_page_flags_t; +#else +// under thread sanitizer, use a byte for each flag to suppress warning, issue #130 +typedef union mi_page_flags_s { + uint16_t full_aligned; + struct { + uint8_t in_full; + uint8_t has_aligned; + } x; +} mi_page_flags_t; +#endif + +// Thread free list. +// We use the bottom 2 bits of the pointer for mi_delayed_t flags +typedef uintptr_t mi_thread_free_t; + +// A page contains blocks of one specific size (`block_size`). +// Each page has three list of free blocks: +// `free` for blocks that can be allocated, +// `local_free` for freed blocks that are not yet available to `mi_malloc` +// `thread_free` for freed blocks by other threads +// The `local_free` and `thread_free` lists are migrated to the `free` list +// when it is exhausted. The separate `local_free` list is necessary to +// implement a monotonic heartbeat. The `thread_free` list is needed for +// avoiding atomic operations in the common case. +// +// +// `used - |thread_free|` == actual blocks that are in use (alive) +// `used - |thread_free| + |free| + |local_free| == capacity` +// +// We don't count `freed` (as |free|) but use `used` to reduce +// the number of memory accesses in the `mi_page_all_free` function(s). +// +// Notes: +// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) +// - Using `uint16_t` does not seem to slow things down +// - The size is 8 words on 64-bit which helps the page index calculations +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// and 12 are still good for address calculation) +// - To limit the structure size, the `xblock_size` is 32-bits only; for +// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size +// - `thread_free` uses the bottom bits as a delayed-free flags to optimize +// concurrent frees where only the first concurrent free adds to the owning +// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). +// The invariant is that no-delayed-free is only set if there is +// at least one block that will be added, or as already been added, to +// the owning heap `thread_delayed_free` list. This guarantees that pages +// will be freed correctly even if only other threads free blocks. +typedef struct mi_page_s { + // "owned" by the segment + uint32_t slice_count; // slices in this page (0 if not a page) + uint32_t slice_offset; // distance from the actual page data slice (0 if a page) + uint8_t is_committed : 1; // `true` if the page virtual memory is committed + uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized + uint8_t use_qsbr : 1; // delay page freeing using qsbr + uint8_t tag : 4; // tag from the owning heap + uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory + + // layout like this to optimize access in `mi_malloc` and `mi_free` + uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` + uint16_t reserved; // number of blocks reserved in memory + mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) + uint8_t free_is_zero : 1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire : 7; // expiration count for retired blocks + + mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + + #if (MI_ENCODE_FREELIST || MI_PADDING) + uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) or padding canary + #endif + + _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads + _Atomic(uintptr_t) xheap; + + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + +#ifdef Py_GIL_DISABLED + struct llist_node qsbr_node; + uint64_t qsbr_goal; +#endif + + // 64-bit 9 words, 32-bit 12 words, (+2 for secure) + #if MI_INTPTR_SIZE==8 && !defined(Py_GIL_DISABLED) + uintptr_t padding[1]; + #endif +} mi_page_t; + + + +// ------------------------------------------------------ +// Mimalloc segments contain mimalloc pages +// ------------------------------------------------------ + +typedef enum mi_page_kind_e { + MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment + MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment + MI_PAGE_LARGE, // larger blocks go into a page of just one block + MI_PAGE_HUGE, // huge blocks (> 16 MiB) are put into a single page in a single segment. +} mi_page_kind_t; + +typedef enum mi_segment_kind_e { + MI_SEGMENT_NORMAL, // MI_SEGMENT_SIZE size with pages inside. + MI_SEGMENT_HUGE, // > MI_LARGE_SIZE_MAX segment with just one huge page inside. +} mi_segment_kind_t; + +// ------------------------------------------------------ +// A segment holds a commit mask where a bit is set if +// the corresponding MI_COMMIT_SIZE area is committed. +// The MI_COMMIT_SIZE must be a multiple of the slice +// size. If it is equal we have the most fine grained +// decommit (but setting it higher can be more efficient). +// The MI_MINIMAL_COMMIT_SIZE is the minimal amount that will +// be committed in one go which can be set higher than +// MI_COMMIT_SIZE for efficiency (while the decommit mask +// is still tracked in fine-grained MI_COMMIT_SIZE chunks) +// ------------------------------------------------------ + +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) +#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB +#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) +#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS +#define MI_COMMIT_MASK_FIELD_COUNT (MI_COMMIT_MASK_BITS / MI_COMMIT_MASK_FIELD_BITS) + +#if (MI_COMMIT_MASK_BITS != (MI_COMMIT_MASK_FIELD_COUNT * MI_COMMIT_MASK_FIELD_BITS)) +#error "the segment size must be exactly divisible by the (commit size * size_t bits)" +#endif + +typedef struct mi_commit_mask_s { + size_t mask[MI_COMMIT_MASK_FIELD_COUNT]; +} mi_commit_mask_t; + +typedef mi_page_t mi_slice_t; +typedef int64_t mi_msecs_t; + + +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) + MI_MEM_ARENA // allocated from an arena (the usual case) +} mi_memkind_t; + +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + +typedef struct mi_memid_os_info { + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // the arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + +typedef struct mi_memid_s { + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized + mi_memkind_t memkind; +} mi_memid_t; + + +// Segments are large allocated memory blocks (8mb on 64 bit) from +// the OS. Inside segments we allocated fixed size _pages_ that +// contain blocks. +typedef struct mi_segment_s { + // constant fields + mi_memid_t memid; // memory id for arena allocation + bool allow_decommit; + bool allow_purge; + size_t segment_size; + + // segment fields + mi_msecs_t purge_expire; + mi_commit_mask_t purge_mask; + mi_commit_mask_t commit_mask; + + _Atomic(struct mi_segment_s*) abandoned_next; + + // from here is zero initialized + struct mi_segment_s* next; // the list of freed segments in the cache (must be first field, see `segment.c:mi_segment_init`) + + size_t abandoned; // abandoned pages (i.e. the original owning thread stopped) (`abandoned <= used`) + size_t abandoned_visits; // count how often this segment is visited in the abandoned list (to force reclaim it it is too long) + size_t used; // count of pages in use + uintptr_t cookie; // verify addresses in debug mode: `mi_ptr_cookie(segment) == segment->cookie` + + size_t segment_slices; // for huge segments this may be different from `MI_SLICES_PER_SEGMENT` + size_t segment_info_slices; // initial slices we are using segment info and possible guard pages. + + // layout like this to optimize access in `mi_free` + mi_segment_kind_t kind; + size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment +} mi_segment_t; + +typedef uintptr_t mi_tagged_segment_t; + +// Segments unowned by any thread are put in a shared pool +typedef struct mi_abandoned_pool_s { + // This is a list of visited abandoned pages that were full at the time. + // this list migrates to `abandoned` when that becomes NULL. The use of + // this list reduces contention and the rate at which segments are visited. + mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // = NULL + + // The abandoned page list (tagged as it supports pop) + mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL + + // Maintain these for debug purposes (these counts may be a bit off) + mi_decl_cache_align _Atomic(size_t) abandoned_count; + mi_decl_cache_align _Atomic(size_t) abandoned_visited_count; + + // We also maintain a count of current readers of the abandoned list + // in order to prevent resetting/decommitting segment memory if it might + // still be read. + mi_decl_cache_align _Atomic(size_t) abandoned_readers; // = 0 +} mi_abandoned_pool_t; + + +// ------------------------------------------------------ +// Heaps +// Provide first-class heaps to allocate from. +// A heap just owns a set of pages for allocation and +// can only be allocate/reallocate from the thread that created it. +// Freeing blocks can be done from any thread though. +// Per thread, the segments are shared among its heaps. +// Per thread, there is always a default heap that is +// used for allocation; it is initialized to statically +// point to an empty heap to avoid initialization checks +// in the fast path. +// ------------------------------------------------------ + +// Thread local data +typedef struct mi_tld_s mi_tld_t; + +// Pages of a certain block size are held in a queue. +typedef struct mi_page_queue_s { + mi_page_t* first; + mi_page_t* last; + size_t block_size; +} mi_page_queue_t; + +#define MI_BIN_FULL (MI_BIN_HUGE+1) + +// Random context +typedef struct mi_random_cxt_s { + uint32_t input[16]; + uint32_t output[16]; + int output_available; + bool weak; +} mi_random_ctx_t; + + +// In debug mode there is a padding structure at the end of the blocks to check for buffer overflows +#if (MI_PADDING) +typedef struct mi_padding_s { + uint32_t canary; // encoded block value to check validity of the padding (in case of overflow) + uint32_t delta; // padding bytes before the block. (mi_usable_size(p) - delta == exact allocated bytes) +} mi_padding_t; +#define MI_PADDING_SIZE (sizeof(mi_padding_t)) +#define MI_PADDING_WSIZE ((MI_PADDING_SIZE + MI_INTPTR_SIZE - 1) / MI_INTPTR_SIZE) +#else +#define MI_PADDING_SIZE 0 +#define MI_PADDING_WSIZE 0 +#endif + +#define MI_PAGES_DIRECT (MI_SMALL_WSIZE_MAX + MI_PADDING_WSIZE + 1) + + +// A heap owns a set of pages. +struct mi_heap_s { + mi_tld_t* tld; + mi_page_t* pages_free_direct[MI_PAGES_DIRECT]; // optimize: array where every entry points a page with possibly free blocks in the corresponding queue for that size. + mi_page_queue_t pages[MI_BIN_FULL + 1]; // queue of pages for each size class (or "bin") + _Atomic(mi_block_t*) thread_delayed_free; + mi_threadid_t thread_id; // thread this heap belongs too + mi_arena_id_t arena_id; // arena id if the heap belongs to a specific arena (or 0) + uintptr_t cookie; // random cookie to verify pointers (see `_mi_ptr_cookie`) + uintptr_t keys[2]; // two random keys used to encode the `thread_delayed_free` list + mi_random_ctx_t random; // random number context used for secure allocation + size_t page_count; // total number of pages in the `pages` queues. + size_t page_retired_min; // smallest retired index (retired pages are fully free, but still in the page queues) + size_t page_retired_max; // largest retired index into the `pages` array. + mi_heap_t* next; // list of heaps per thread + bool no_reclaim; // `true` if this heap should not reclaim abandoned pages + uint8_t tag; // custom identifier for this heap + uint8_t debug_offset; // number of bytes to preserve when filling freed or uninitialized memory + bool page_use_qsbr; // should freeing pages be delayed using QSBR +}; + + + +// ------------------------------------------------------ +// Debug +// ------------------------------------------------------ + +#if !defined(MI_DEBUG_UNINIT) +#define MI_DEBUG_UNINIT (0xD0) +#endif +#if !defined(MI_DEBUG_FREED) +#define MI_DEBUG_FREED (0xDF) +#endif +#if !defined(MI_DEBUG_PADDING) +#define MI_DEBUG_PADDING (0xDE) +#endif + +#if (MI_DEBUG) +// use our own assertion to print without memory allocation +void _mi_assert_fail(const char* assertion, const char* fname, unsigned int line, const char* func ); +#define mi_assert(expr) ((expr) ? (void)0 : _mi_assert_fail(#expr,__FILE__,__LINE__,__func__)) +#else +#define mi_assert(x) +#endif + +#if (MI_DEBUG>1) +#define mi_assert_internal mi_assert +#else +#define mi_assert_internal(x) +#endif + +#if (MI_DEBUG>2) +#define mi_assert_expensive mi_assert +#else +#define mi_assert_expensive(x) +#endif + +// ------------------------------------------------------ +// Statistics +// ------------------------------------------------------ + +#ifndef MI_STAT +#if (MI_DEBUG>0) +#define MI_STAT 2 +#else +#define MI_STAT 0 +#endif +#endif + +typedef struct mi_stat_count_s { + int64_t allocated; + int64_t freed; + int64_t peak; + int64_t current; +} mi_stat_count_t; + +typedef struct mi_stat_counter_s { + int64_t total; + int64_t count; +} mi_stat_counter_t; + +typedef struct mi_stats_s { + mi_stat_count_t segments; + mi_stat_count_t pages; + mi_stat_count_t reserved; + mi_stat_count_t committed; + mi_stat_count_t reset; + mi_stat_count_t purged; + mi_stat_count_t page_committed; + mi_stat_count_t segments_abandoned; + mi_stat_count_t pages_abandoned; + mi_stat_count_t threads; + mi_stat_count_t normal; + mi_stat_count_t huge; + mi_stat_count_t large; + mi_stat_count_t malloc; + mi_stat_count_t segments_cache; + mi_stat_counter_t pages_extended; + mi_stat_counter_t mmap_calls; + mi_stat_counter_t commit_calls; + mi_stat_counter_t reset_calls; + mi_stat_counter_t purge_calls; + mi_stat_counter_t page_no_retire; + mi_stat_counter_t searches; + mi_stat_counter_t normal_count; + mi_stat_counter_t huge_count; + mi_stat_counter_t large_count; +#if MI_STAT>1 + mi_stat_count_t normal_bins[MI_BIN_HUGE+1]; +#endif +} mi_stats_t; + + +void _mi_stat_increase(mi_stat_count_t* stat, size_t amount); +void _mi_stat_decrease(mi_stat_count_t* stat, size_t amount); +void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount); + +#if (MI_STAT) +#define mi_stat_increase(stat,amount) _mi_stat_increase( &(stat), amount) +#define mi_stat_decrease(stat,amount) _mi_stat_decrease( &(stat), amount) +#define mi_stat_counter_increase(stat,amount) _mi_stat_counter_increase( &(stat), amount) +#else +#define mi_stat_increase(stat,amount) (void)0 +#define mi_stat_decrease(stat,amount) (void)0 +#define mi_stat_counter_increase(stat,amount) (void)0 +#endif + +#define mi_heap_stat_counter_increase(heap,stat,amount) mi_stat_counter_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_increase(heap,stat,amount) mi_stat_increase( (heap)->tld->stats.stat, amount) +#define mi_heap_stat_decrease(heap,stat,amount) mi_stat_decrease( (heap)->tld->stats.stat, amount) + +// ------------------------------------------------------ +// Thread Local data +// ------------------------------------------------------ + +// A "span" is an available range of slices. The span queues keep +// track of slice spans of at most the given `slice_count` (but more than the previous size class). +typedef struct mi_span_queue_s { + mi_slice_t* first; + mi_slice_t* last; + size_t slice_count; +} mi_span_queue_t; + +#define MI_SEGMENT_BIN_MAX (35) // 35 == mi_segment_bin(MI_SLICES_PER_SEGMENT) + +// OS thread local data +typedef struct mi_os_tld_s { + size_t region_idx; // start point for next allocation + mi_stats_t* stats; // points to tld stats +} mi_os_tld_t; + + +// Segments thread local data +typedef struct mi_segments_tld_s { + mi_span_queue_t spans[MI_SEGMENT_BIN_MAX+1]; // free slice spans inside segments + size_t count; // current number of segments; + size_t peak_count; // peak number of segments + size_t current_size; // current size of all segments + size_t peak_size; // peak size of all segments + mi_stats_t* stats; // points to tld stats + mi_os_tld_t* os; // points to os stats + mi_abandoned_pool_t* abandoned; // pool of abandoned segments +} mi_segments_tld_t; + +// Thread local data +struct mi_tld_s { + unsigned long long heartbeat; // monotonic heartbeat count + bool recurse; // true if deferred was called; used to prevent infinite recursion. + mi_heap_t* heap_backing; // backing heap of this thread (cannot be deleted) + mi_heap_t* heaps; // list of heaps in this thread (so we can abandon all when the thread terminates) + mi_segments_tld_t segments; // segment tld + mi_os_tld_t os; // os tld + mi_stats_t stats; // statistics +}; + +#endif diff --git a/Include/internal/pycore_abstract.h b/Include/internal/pycore_abstract.h new file mode 100644 index 0000000000000000000000000000000000000000..3cc0afac4bd5b45490d3edd8e92062bec837da8a --- /dev/null +++ b/Include/internal/pycore_abstract.h @@ -0,0 +1,61 @@ +#ifndef Py_INTERNAL_ABSTRACT_H +#define Py_INTERNAL_ABSTRACT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Fast inlined version of PyIndex_Check() +static inline int +_PyIndex_Check(PyObject *obj) +{ + PyNumberMethods *tp_as_number = Py_TYPE(obj)->tp_as_number; + return (tp_as_number != NULL && tp_as_number->nb_index != NULL); +} + +PyObject *_PyNumber_PowerNoMod(PyObject *lhs, PyObject *rhs); +PyObject *_PyNumber_InPlacePowerNoMod(PyObject *lhs, PyObject *rhs); + +extern int _PyObject_HasLen(PyObject *o); + +/* === Sequence protocol ================================================ */ + +#define PY_ITERSEARCH_COUNT 1 +#define PY_ITERSEARCH_INDEX 2 +#define PY_ITERSEARCH_CONTAINS 3 + +/* Iterate over seq. + + Result depends on the operation: + + PY_ITERSEARCH_COUNT: return # of times obj appears in seq; -1 if + error. + PY_ITERSEARCH_INDEX: return 0-based index of first occurrence of + obj in seq; set ValueError and return -1 if none found; + also return -1 on error. + PY_ITERSEARCH_CONTAINS: return 1 if obj in seq, else 0; -1 on + error. */ +extern Py_ssize_t _PySequence_IterSearch(PyObject *seq, + PyObject *obj, int operation); + +/* === Mapping protocol ================================================= */ + +extern int _PyObject_RealIsInstance(PyObject *inst, PyObject *cls); + +extern int _PyObject_RealIsSubclass(PyObject *derived, PyObject *cls); + +// Convert Python int to Py_ssize_t. Do nothing if the argument is None. +// Export for '_bisect' shared extension. +PyAPI_FUNC(int) _Py_convert_optional_to_ssize_t(PyObject *, void *); + +// Same as PyNumber_Index() but can return an instance of a subclass of int. +// Export for 'math' shared extension. +PyAPI_FUNC(PyObject*) _PyNumber_Index(PyObject *o); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_ABSTRACT_H */ diff --git a/Include/internal/pycore_asdl.h b/Include/internal/pycore_asdl.h new file mode 100644 index 0000000000000000000000000000000000000000..afeada88d13e24ceb69ebc07456964b26ba8e3c1 --- /dev/null +++ b/Include/internal/pycore_asdl.h @@ -0,0 +1,112 @@ +#ifndef Py_INTERNAL_ASDL_H +#define Py_INTERNAL_ASDL_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_pyarena.h" // _PyArena_Malloc() + +typedef PyObject * identifier; +typedef PyObject * string; +typedef PyObject * object; +typedef PyObject * constant; + +/* It would be nice if the code generated by asdl_c.py was completely + independent of Python, but it is a goal the requires too much work + at this stage. So, for example, I'll represent identifiers as + interned Python strings. +*/ + +#define _ASDL_SEQ_HEAD \ + Py_ssize_t size; \ + void **elements; + +typedef struct { + _ASDL_SEQ_HEAD +} asdl_seq; + +typedef struct { + _ASDL_SEQ_HEAD + void *typed_elements[1]; +} asdl_generic_seq; + +typedef struct { + _ASDL_SEQ_HEAD + PyObject *typed_elements[1]; +} asdl_identifier_seq; + +typedef struct { + _ASDL_SEQ_HEAD + int typed_elements[1]; +} asdl_int_seq; + +asdl_generic_seq *_Py_asdl_generic_seq_new(Py_ssize_t size, PyArena *arena); +asdl_identifier_seq *_Py_asdl_identifier_seq_new(Py_ssize_t size, PyArena *arena); +asdl_int_seq *_Py_asdl_int_seq_new(Py_ssize_t size, PyArena *arena); + + +#define GENERATE_ASDL_SEQ_CONSTRUCTOR(NAME, TYPE) \ +asdl_ ## NAME ## _seq *_Py_asdl_ ## NAME ## _seq_new(Py_ssize_t size, PyArena *arena) \ +{ \ + asdl_ ## NAME ## _seq *seq = NULL; \ + size_t n; \ + /* check size is sane */ \ + if (size < 0 || \ + (size && (((size_t)size - 1) > (SIZE_MAX / sizeof(void *))))) { \ + PyErr_NoMemory(); \ + return NULL; \ + } \ + n = (size ? (sizeof(TYPE *) * (size - 1)) : 0); \ + /* check if size can be added safely */ \ + if (n > SIZE_MAX - sizeof(asdl_ ## NAME ## _seq)) { \ + PyErr_NoMemory(); \ + return NULL; \ + } \ + n += sizeof(asdl_ ## NAME ## _seq); \ + seq = (asdl_ ## NAME ## _seq *)_PyArena_Malloc(arena, n); \ + if (!seq) { \ + PyErr_NoMemory(); \ + return NULL; \ + } \ + memset(seq, 0, n); \ + seq->size = size; \ + seq->elements = (void**)seq->typed_elements; \ + return seq; \ +} + +#define asdl_seq_GET_UNTYPED(S, I) _Py_RVALUE((S)->elements[(I)]) +#define asdl_seq_GET(S, I) _Py_RVALUE((S)->typed_elements[(I)]) +#define asdl_seq_LEN(S) _Py_RVALUE(((S) == NULL ? 0 : (S)->size)) + +#ifdef Py_DEBUG +# define asdl_seq_SET(S, I, V) \ + do { \ + Py_ssize_t _asdl_i = (I); \ + assert((S) != NULL); \ + assert(0 <= _asdl_i && _asdl_i < (S)->size); \ + (S)->typed_elements[_asdl_i] = (V); \ + } while (0) +#else +# define asdl_seq_SET(S, I, V) _Py_RVALUE((S)->typed_elements[(I)] = (V)) +#endif + +#ifdef Py_DEBUG +# define asdl_seq_SET_UNTYPED(S, I, V) \ + do { \ + Py_ssize_t _asdl_i = (I); \ + assert((S) != NULL); \ + assert(0 <= _asdl_i && _asdl_i < (S)->size); \ + (S)->elements[_asdl_i] = (V); \ + } while (0) +#else +# define asdl_seq_SET_UNTYPED(S, I, V) _Py_RVALUE((S)->elements[(I)] = (V)) +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_ASDL_H */ diff --git a/Include/internal/pycore_ast.h b/Include/internal/pycore_ast.h new file mode 100644 index 0000000000000000000000000000000000000000..f5bf1205a82be98eb6e8c96c45ff540ebc559c3b --- /dev/null +++ b/Include/internal/pycore_ast.h @@ -0,0 +1,926 @@ +// File automatically generated by Parser/asdl_c.py. + +#ifndef Py_INTERNAL_AST_H +#define Py_INTERNAL_AST_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_asdl.h" // _ASDL_SEQ_HEAD + +typedef struct _mod *mod_ty; + +typedef struct _stmt *stmt_ty; + +typedef struct _expr *expr_ty; + +typedef enum _expr_context { Load=1, Store=2, Del=3 } expr_context_ty; + +typedef enum _boolop { And=1, Or=2 } boolop_ty; + +typedef enum _operator { Add=1, Sub=2, Mult=3, MatMult=4, Div=5, Mod=6, Pow=7, + LShift=8, RShift=9, BitOr=10, BitXor=11, BitAnd=12, + FloorDiv=13 } operator_ty; + +typedef enum _unaryop { Invert=1, Not=2, UAdd=3, USub=4 } unaryop_ty; + +typedef enum _cmpop { Eq=1, NotEq=2, Lt=3, LtE=4, Gt=5, GtE=6, Is=7, IsNot=8, + In=9, NotIn=10 } cmpop_ty; + +typedef struct _comprehension *comprehension_ty; + +typedef struct _excepthandler *excepthandler_ty; + +typedef struct _arguments *arguments_ty; + +typedef struct _arg *arg_ty; + +typedef struct _keyword *keyword_ty; + +typedef struct _alias *alias_ty; + +typedef struct _withitem *withitem_ty; + +typedef struct _match_case *match_case_ty; + +typedef struct _pattern *pattern_ty; + +typedef struct _type_ignore *type_ignore_ty; + +typedef struct _type_param *type_param_ty; + + +typedef struct { + _ASDL_SEQ_HEAD + mod_ty typed_elements[1]; +} asdl_mod_seq; + +asdl_mod_seq *_Py_asdl_mod_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + stmt_ty typed_elements[1]; +} asdl_stmt_seq; + +asdl_stmt_seq *_Py_asdl_stmt_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + expr_ty typed_elements[1]; +} asdl_expr_seq; + +asdl_expr_seq *_Py_asdl_expr_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + comprehension_ty typed_elements[1]; +} asdl_comprehension_seq; + +asdl_comprehension_seq *_Py_asdl_comprehension_seq_new(Py_ssize_t size, PyArena + *arena); + +typedef struct { + _ASDL_SEQ_HEAD + excepthandler_ty typed_elements[1]; +} asdl_excepthandler_seq; + +asdl_excepthandler_seq *_Py_asdl_excepthandler_seq_new(Py_ssize_t size, PyArena + *arena); + +typedef struct { + _ASDL_SEQ_HEAD + arguments_ty typed_elements[1]; +} asdl_arguments_seq; + +asdl_arguments_seq *_Py_asdl_arguments_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + arg_ty typed_elements[1]; +} asdl_arg_seq; + +asdl_arg_seq *_Py_asdl_arg_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + keyword_ty typed_elements[1]; +} asdl_keyword_seq; + +asdl_keyword_seq *_Py_asdl_keyword_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + alias_ty typed_elements[1]; +} asdl_alias_seq; + +asdl_alias_seq *_Py_asdl_alias_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + withitem_ty typed_elements[1]; +} asdl_withitem_seq; + +asdl_withitem_seq *_Py_asdl_withitem_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + match_case_ty typed_elements[1]; +} asdl_match_case_seq; + +asdl_match_case_seq *_Py_asdl_match_case_seq_new(Py_ssize_t size, PyArena + *arena); + +typedef struct { + _ASDL_SEQ_HEAD + pattern_ty typed_elements[1]; +} asdl_pattern_seq; + +asdl_pattern_seq *_Py_asdl_pattern_seq_new(Py_ssize_t size, PyArena *arena); + +typedef struct { + _ASDL_SEQ_HEAD + type_ignore_ty typed_elements[1]; +} asdl_type_ignore_seq; + +asdl_type_ignore_seq *_Py_asdl_type_ignore_seq_new(Py_ssize_t size, PyArena + *arena); + +typedef struct { + _ASDL_SEQ_HEAD + type_param_ty typed_elements[1]; +} asdl_type_param_seq; + +asdl_type_param_seq *_Py_asdl_type_param_seq_new(Py_ssize_t size, PyArena + *arena); + + +enum _mod_kind {Module_kind=1, Interactive_kind=2, Expression_kind=3, + FunctionType_kind=4}; +struct _mod { + enum _mod_kind kind; + union { + struct { + asdl_stmt_seq *body; + asdl_type_ignore_seq *type_ignores; + } Module; + + struct { + asdl_stmt_seq *body; + } Interactive; + + struct { + expr_ty body; + } Expression; + + struct { + asdl_expr_seq *argtypes; + expr_ty returns; + } FunctionType; + + } v; +}; + +enum _stmt_kind {FunctionDef_kind=1, AsyncFunctionDef_kind=2, ClassDef_kind=3, + Return_kind=4, Delete_kind=5, Assign_kind=6, + TypeAlias_kind=7, AugAssign_kind=8, AnnAssign_kind=9, + For_kind=10, AsyncFor_kind=11, While_kind=12, If_kind=13, + With_kind=14, AsyncWith_kind=15, Match_kind=16, + Raise_kind=17, Try_kind=18, TryStar_kind=19, Assert_kind=20, + Import_kind=21, ImportFrom_kind=22, Global_kind=23, + Nonlocal_kind=24, Expr_kind=25, Pass_kind=26, Break_kind=27, + Continue_kind=28}; +struct _stmt { + enum _stmt_kind kind; + union { + struct { + identifier name; + arguments_ty args; + asdl_stmt_seq *body; + asdl_expr_seq *decorator_list; + expr_ty returns; + string type_comment; + asdl_type_param_seq *type_params; + } FunctionDef; + + struct { + identifier name; + arguments_ty args; + asdl_stmt_seq *body; + asdl_expr_seq *decorator_list; + expr_ty returns; + string type_comment; + asdl_type_param_seq *type_params; + } AsyncFunctionDef; + + struct { + identifier name; + asdl_expr_seq *bases; + asdl_keyword_seq *keywords; + asdl_stmt_seq *body; + asdl_expr_seq *decorator_list; + asdl_type_param_seq *type_params; + } ClassDef; + + struct { + expr_ty value; + } Return; + + struct { + asdl_expr_seq *targets; + } Delete; + + struct { + asdl_expr_seq *targets; + expr_ty value; + string type_comment; + } Assign; + + struct { + expr_ty name; + asdl_type_param_seq *type_params; + expr_ty value; + } TypeAlias; + + struct { + expr_ty target; + operator_ty op; + expr_ty value; + } AugAssign; + + struct { + expr_ty target; + expr_ty annotation; + expr_ty value; + int simple; + } AnnAssign; + + struct { + expr_ty target; + expr_ty iter; + asdl_stmt_seq *body; + asdl_stmt_seq *orelse; + string type_comment; + } For; + + struct { + expr_ty target; + expr_ty iter; + asdl_stmt_seq *body; + asdl_stmt_seq *orelse; + string type_comment; + } AsyncFor; + + struct { + expr_ty test; + asdl_stmt_seq *body; + asdl_stmt_seq *orelse; + } While; + + struct { + expr_ty test; + asdl_stmt_seq *body; + asdl_stmt_seq *orelse; + } If; + + struct { + asdl_withitem_seq *items; + asdl_stmt_seq *body; + string type_comment; + } With; + + struct { + asdl_withitem_seq *items; + asdl_stmt_seq *body; + string type_comment; + } AsyncWith; + + struct { + expr_ty subject; + asdl_match_case_seq *cases; + } Match; + + struct { + expr_ty exc; + expr_ty cause; + } Raise; + + struct { + asdl_stmt_seq *body; + asdl_excepthandler_seq *handlers; + asdl_stmt_seq *orelse; + asdl_stmt_seq *finalbody; + } Try; + + struct { + asdl_stmt_seq *body; + asdl_excepthandler_seq *handlers; + asdl_stmt_seq *orelse; + asdl_stmt_seq *finalbody; + } TryStar; + + struct { + expr_ty test; + expr_ty msg; + } Assert; + + struct { + asdl_alias_seq *names; + } Import; + + struct { + identifier module; + asdl_alias_seq *names; + int level; + } ImportFrom; + + struct { + asdl_identifier_seq *names; + } Global; + + struct { + asdl_identifier_seq *names; + } Nonlocal; + + struct { + expr_ty value; + } Expr; + + } v; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +enum _expr_kind {BoolOp_kind=1, NamedExpr_kind=2, BinOp_kind=3, UnaryOp_kind=4, + Lambda_kind=5, IfExp_kind=6, Dict_kind=7, Set_kind=8, + ListComp_kind=9, SetComp_kind=10, DictComp_kind=11, + GeneratorExp_kind=12, Await_kind=13, Yield_kind=14, + YieldFrom_kind=15, Compare_kind=16, Call_kind=17, + FormattedValue_kind=18, JoinedStr_kind=19, Constant_kind=20, + Attribute_kind=21, Subscript_kind=22, Starred_kind=23, + Name_kind=24, List_kind=25, Tuple_kind=26, Slice_kind=27}; +struct _expr { + enum _expr_kind kind; + union { + struct { + boolop_ty op; + asdl_expr_seq *values; + } BoolOp; + + struct { + expr_ty target; + expr_ty value; + } NamedExpr; + + struct { + expr_ty left; + operator_ty op; + expr_ty right; + } BinOp; + + struct { + unaryop_ty op; + expr_ty operand; + } UnaryOp; + + struct { + arguments_ty args; + expr_ty body; + } Lambda; + + struct { + expr_ty test; + expr_ty body; + expr_ty orelse; + } IfExp; + + struct { + asdl_expr_seq *keys; + asdl_expr_seq *values; + } Dict; + + struct { + asdl_expr_seq *elts; + } Set; + + struct { + expr_ty elt; + asdl_comprehension_seq *generators; + } ListComp; + + struct { + expr_ty elt; + asdl_comprehension_seq *generators; + } SetComp; + + struct { + expr_ty key; + expr_ty value; + asdl_comprehension_seq *generators; + } DictComp; + + struct { + expr_ty elt; + asdl_comprehension_seq *generators; + } GeneratorExp; + + struct { + expr_ty value; + } Await; + + struct { + expr_ty value; + } Yield; + + struct { + expr_ty value; + } YieldFrom; + + struct { + expr_ty left; + asdl_int_seq *ops; + asdl_expr_seq *comparators; + } Compare; + + struct { + expr_ty func; + asdl_expr_seq *args; + asdl_keyword_seq *keywords; + } Call; + + struct { + expr_ty value; + int conversion; + expr_ty format_spec; + } FormattedValue; + + struct { + asdl_expr_seq *values; + } JoinedStr; + + struct { + constant value; + string kind; + } Constant; + + struct { + expr_ty value; + identifier attr; + expr_context_ty ctx; + } Attribute; + + struct { + expr_ty value; + expr_ty slice; + expr_context_ty ctx; + } Subscript; + + struct { + expr_ty value; + expr_context_ty ctx; + } Starred; + + struct { + identifier id; + expr_context_ty ctx; + } Name; + + struct { + asdl_expr_seq *elts; + expr_context_ty ctx; + } List; + + struct { + asdl_expr_seq *elts; + expr_context_ty ctx; + } Tuple; + + struct { + expr_ty lower; + expr_ty upper; + expr_ty step; + } Slice; + + } v; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +struct _comprehension { + expr_ty target; + expr_ty iter; + asdl_expr_seq *ifs; + int is_async; +}; + +enum _excepthandler_kind {ExceptHandler_kind=1}; +struct _excepthandler { + enum _excepthandler_kind kind; + union { + struct { + expr_ty type; + identifier name; + asdl_stmt_seq *body; + } ExceptHandler; + + } v; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +struct _arguments { + asdl_arg_seq *posonlyargs; + asdl_arg_seq *args; + arg_ty vararg; + asdl_arg_seq *kwonlyargs; + asdl_expr_seq *kw_defaults; + arg_ty kwarg; + asdl_expr_seq *defaults; +}; + +struct _arg { + identifier arg; + expr_ty annotation; + string type_comment; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +struct _keyword { + identifier arg; + expr_ty value; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +struct _alias { + identifier name; + identifier asname; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +struct _withitem { + expr_ty context_expr; + expr_ty optional_vars; +}; + +struct _match_case { + pattern_ty pattern; + expr_ty guard; + asdl_stmt_seq *body; +}; + +enum _pattern_kind {MatchValue_kind=1, MatchSingleton_kind=2, + MatchSequence_kind=3, MatchMapping_kind=4, + MatchClass_kind=5, MatchStar_kind=6, MatchAs_kind=7, + MatchOr_kind=8}; +struct _pattern { + enum _pattern_kind kind; + union { + struct { + expr_ty value; + } MatchValue; + + struct { + constant value; + } MatchSingleton; + + struct { + asdl_pattern_seq *patterns; + } MatchSequence; + + struct { + asdl_expr_seq *keys; + asdl_pattern_seq *patterns; + identifier rest; + } MatchMapping; + + struct { + expr_ty cls; + asdl_pattern_seq *patterns; + asdl_identifier_seq *kwd_attrs; + asdl_pattern_seq *kwd_patterns; + } MatchClass; + + struct { + identifier name; + } MatchStar; + + struct { + pattern_ty pattern; + identifier name; + } MatchAs; + + struct { + asdl_pattern_seq *patterns; + } MatchOr; + + } v; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + +enum _type_ignore_kind {TypeIgnore_kind=1}; +struct _type_ignore { + enum _type_ignore_kind kind; + union { + struct { + int lineno; + string tag; + } TypeIgnore; + + } v; +}; + +enum _type_param_kind {TypeVar_kind=1, ParamSpec_kind=2, TypeVarTuple_kind=3}; +struct _type_param { + enum _type_param_kind kind; + union { + struct { + identifier name; + expr_ty bound; + expr_ty default_value; + } TypeVar; + + struct { + identifier name; + expr_ty default_value; + } ParamSpec; + + struct { + identifier name; + expr_ty default_value; + } TypeVarTuple; + + } v; + int lineno; + int col_offset; + int end_lineno; + int end_col_offset; +}; + + +// Note: these macros affect function definitions, not only call sites. +mod_ty _PyAST_Module(asdl_stmt_seq * body, asdl_type_ignore_seq * type_ignores, + PyArena *arena); +mod_ty _PyAST_Interactive(asdl_stmt_seq * body, PyArena *arena); +mod_ty _PyAST_Expression(expr_ty body, PyArena *arena); +mod_ty _PyAST_FunctionType(asdl_expr_seq * argtypes, expr_ty returns, PyArena + *arena); +stmt_ty _PyAST_FunctionDef(identifier name, arguments_ty args, asdl_stmt_seq * + body, asdl_expr_seq * decorator_list, expr_ty + returns, string type_comment, asdl_type_param_seq * + type_params, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_AsyncFunctionDef(identifier name, arguments_ty args, + asdl_stmt_seq * body, asdl_expr_seq * + decorator_list, expr_ty returns, string + type_comment, asdl_type_param_seq * + type_params, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_ClassDef(identifier name, asdl_expr_seq * bases, + asdl_keyword_seq * keywords, asdl_stmt_seq * body, + asdl_expr_seq * decorator_list, asdl_type_param_seq * + type_params, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Return(expr_ty value, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Delete(asdl_expr_seq * targets, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Assign(asdl_expr_seq * targets, expr_ty value, string + type_comment, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +stmt_ty _PyAST_TypeAlias(expr_ty name, asdl_type_param_seq * type_params, + expr_ty value, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_AugAssign(expr_ty target, operator_ty op, expr_ty value, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_AnnAssign(expr_ty target, expr_ty annotation, expr_ty value, int + simple, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +stmt_ty _PyAST_For(expr_ty target, expr_ty iter, asdl_stmt_seq * body, + asdl_stmt_seq * orelse, string type_comment, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +stmt_ty _PyAST_AsyncFor(expr_ty target, expr_ty iter, asdl_stmt_seq * body, + asdl_stmt_seq * orelse, string type_comment, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_While(expr_ty test, asdl_stmt_seq * body, asdl_stmt_seq * + orelse, int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_If(expr_ty test, asdl_stmt_seq * body, asdl_stmt_seq * orelse, + int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_With(asdl_withitem_seq * items, asdl_stmt_seq * body, string + type_comment, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +stmt_ty _PyAST_AsyncWith(asdl_withitem_seq * items, asdl_stmt_seq * body, + string type_comment, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Match(expr_ty subject, asdl_match_case_seq * cases, int lineno, + int col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +stmt_ty _PyAST_Raise(expr_ty exc, expr_ty cause, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Try(asdl_stmt_seq * body, asdl_excepthandler_seq * handlers, + asdl_stmt_seq * orelse, asdl_stmt_seq * finalbody, int + lineno, int col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +stmt_ty _PyAST_TryStar(asdl_stmt_seq * body, asdl_excepthandler_seq * handlers, + asdl_stmt_seq * orelse, asdl_stmt_seq * finalbody, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_Assert(expr_ty test, expr_ty msg, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Import(asdl_alias_seq * names, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_ImportFrom(identifier module, asdl_alias_seq * names, int level, + int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_Global(asdl_identifier_seq * names, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Nonlocal(asdl_identifier_seq * names, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +stmt_ty _PyAST_Expr(expr_ty value, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +stmt_ty _PyAST_Pass(int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_Break(int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +stmt_ty _PyAST_Continue(int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_BoolOp(boolop_ty op, asdl_expr_seq * values, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_NamedExpr(expr_ty target, expr_ty value, int lineno, int + col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +expr_ty _PyAST_BinOp(expr_ty left, operator_ty op, expr_ty right, int lineno, + int col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +expr_ty _PyAST_UnaryOp(unaryop_ty op, expr_ty operand, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_Lambda(arguments_ty args, expr_ty body, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_IfExp(expr_ty test, expr_ty body, expr_ty orelse, int lineno, + int col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +expr_ty _PyAST_Dict(asdl_expr_seq * keys, asdl_expr_seq * values, int lineno, + int col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_Set(asdl_expr_seq * elts, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_ListComp(expr_ty elt, asdl_comprehension_seq * generators, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_SetComp(expr_ty elt, asdl_comprehension_seq * generators, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_DictComp(expr_ty key, expr_ty value, asdl_comprehension_seq * + generators, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +expr_ty _PyAST_GeneratorExp(expr_ty elt, asdl_comprehension_seq * generators, + int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_Await(expr_ty value, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +expr_ty _PyAST_Yield(expr_ty value, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +expr_ty _PyAST_YieldFrom(expr_ty value, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_Compare(expr_ty left, asdl_int_seq * ops, asdl_expr_seq * + comparators, int lineno, int col_offset, int end_lineno, + int end_col_offset, PyArena *arena); +expr_ty _PyAST_Call(expr_ty func, asdl_expr_seq * args, asdl_keyword_seq * + keywords, int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_FormattedValue(expr_ty value, int conversion, expr_ty + format_spec, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_JoinedStr(asdl_expr_seq * values, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena *arena); +expr_ty _PyAST_Constant(constant value, string kind, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_Attribute(expr_ty value, identifier attr, expr_context_ty ctx, + int lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_Subscript(expr_ty value, expr_ty slice, expr_context_ty ctx, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +expr_ty _PyAST_Starred(expr_ty value, expr_context_ty ctx, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_Name(identifier id, expr_context_ty ctx, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_List(asdl_expr_seq * elts, expr_context_ty ctx, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_Tuple(asdl_expr_seq * elts, expr_context_ty ctx, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +expr_ty _PyAST_Slice(expr_ty lower, expr_ty upper, expr_ty step, int lineno, + int col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +comprehension_ty _PyAST_comprehension(expr_ty target, expr_ty iter, + asdl_expr_seq * ifs, int is_async, + PyArena *arena); +excepthandler_ty _PyAST_ExceptHandler(expr_ty type, identifier name, + asdl_stmt_seq * body, int lineno, int + col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +arguments_ty _PyAST_arguments(asdl_arg_seq * posonlyargs, asdl_arg_seq * args, + arg_ty vararg, asdl_arg_seq * kwonlyargs, + asdl_expr_seq * kw_defaults, arg_ty kwarg, + asdl_expr_seq * defaults, PyArena *arena); +arg_ty _PyAST_arg(identifier arg, expr_ty annotation, string type_comment, int + lineno, int col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +keyword_ty _PyAST_keyword(identifier arg, expr_ty value, int lineno, int + col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +alias_ty _PyAST_alias(identifier name, identifier asname, int lineno, int + col_offset, int end_lineno, int end_col_offset, PyArena + *arena); +withitem_ty _PyAST_withitem(expr_ty context_expr, expr_ty optional_vars, + PyArena *arena); +match_case_ty _PyAST_match_case(pattern_ty pattern, expr_ty guard, + asdl_stmt_seq * body, PyArena *arena); +pattern_ty _PyAST_MatchValue(expr_ty value, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +pattern_ty _PyAST_MatchSingleton(constant value, int lineno, int col_offset, + int end_lineno, int end_col_offset, PyArena + *arena); +pattern_ty _PyAST_MatchSequence(asdl_pattern_seq * patterns, int lineno, int + col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +pattern_ty _PyAST_MatchMapping(asdl_expr_seq * keys, asdl_pattern_seq * + patterns, identifier rest, int lineno, int + col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +pattern_ty _PyAST_MatchClass(expr_ty cls, asdl_pattern_seq * patterns, + asdl_identifier_seq * kwd_attrs, asdl_pattern_seq + * kwd_patterns, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +pattern_ty _PyAST_MatchStar(identifier name, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +pattern_ty _PyAST_MatchAs(pattern_ty pattern, identifier name, int lineno, int + col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +pattern_ty _PyAST_MatchOr(asdl_pattern_seq * patterns, int lineno, int + col_offset, int end_lineno, int end_col_offset, + PyArena *arena); +type_ignore_ty _PyAST_TypeIgnore(int lineno, string tag, PyArena *arena); +type_param_ty _PyAST_TypeVar(identifier name, expr_ty bound, expr_ty + default_value, int lineno, int col_offset, int + end_lineno, int end_col_offset, PyArena *arena); +type_param_ty _PyAST_ParamSpec(identifier name, expr_ty default_value, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); +type_param_ty _PyAST_TypeVarTuple(identifier name, expr_ty default_value, int + lineno, int col_offset, int end_lineno, int + end_col_offset, PyArena *arena); + + +PyObject* PyAST_mod2obj(mod_ty t); +mod_ty PyAST_obj2mod(PyObject* ast, PyArena* arena, int mode); +int PyAST_Check(PyObject* obj); + +extern int _PyAST_Validate(mod_ty); + +/* _PyAST_ExprAsUnicode is defined in ast_unparse.c */ +extern PyObject* _PyAST_ExprAsUnicode(expr_ty); + +/* Return the borrowed reference to the first literal string in the + sequence of statements or NULL if it doesn't start from a literal string. + Doesn't set exception. */ +extern PyObject* _PyAST_GetDocString(asdl_stmt_seq *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_AST_H */ diff --git a/Include/internal/pycore_ast_state.h b/Include/internal/pycore_ast_state.h new file mode 100644 index 0000000000000000000000000000000000000000..09ae95465495c01e32a78d00eac8989786762b08 --- /dev/null +++ b/Include/internal/pycore_ast_state.h @@ -0,0 +1,268 @@ +// File automatically generated by Parser/asdl_c.py. + +#ifndef Py_INTERNAL_AST_STATE_H +#define Py_INTERNAL_AST_STATE_H + +#include "pycore_lock.h" // _PyOnceFlag + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +struct ast_state { + _PyOnceFlag once; + int finalized; + PyObject *AST_type; + PyObject *Add_singleton; + PyObject *Add_type; + PyObject *And_singleton; + PyObject *And_type; + PyObject *AnnAssign_type; + PyObject *Assert_type; + PyObject *Assign_type; + PyObject *AsyncFor_type; + PyObject *AsyncFunctionDef_type; + PyObject *AsyncWith_type; + PyObject *Attribute_type; + PyObject *AugAssign_type; + PyObject *Await_type; + PyObject *BinOp_type; + PyObject *BitAnd_singleton; + PyObject *BitAnd_type; + PyObject *BitOr_singleton; + PyObject *BitOr_type; + PyObject *BitXor_singleton; + PyObject *BitXor_type; + PyObject *BoolOp_type; + PyObject *Break_type; + PyObject *Call_type; + PyObject *ClassDef_type; + PyObject *Compare_type; + PyObject *Constant_type; + PyObject *Continue_type; + PyObject *Del_singleton; + PyObject *Del_type; + PyObject *Delete_type; + PyObject *DictComp_type; + PyObject *Dict_type; + PyObject *Div_singleton; + PyObject *Div_type; + PyObject *Eq_singleton; + PyObject *Eq_type; + PyObject *ExceptHandler_type; + PyObject *Expr_type; + PyObject *Expression_type; + PyObject *FloorDiv_singleton; + PyObject *FloorDiv_type; + PyObject *For_type; + PyObject *FormattedValue_type; + PyObject *FunctionDef_type; + PyObject *FunctionType_type; + PyObject *GeneratorExp_type; + PyObject *Global_type; + PyObject *GtE_singleton; + PyObject *GtE_type; + PyObject *Gt_singleton; + PyObject *Gt_type; + PyObject *IfExp_type; + PyObject *If_type; + PyObject *ImportFrom_type; + PyObject *Import_type; + PyObject *In_singleton; + PyObject *In_type; + PyObject *Interactive_type; + PyObject *Invert_singleton; + PyObject *Invert_type; + PyObject *IsNot_singleton; + PyObject *IsNot_type; + PyObject *Is_singleton; + PyObject *Is_type; + PyObject *JoinedStr_type; + PyObject *LShift_singleton; + PyObject *LShift_type; + PyObject *Lambda_type; + PyObject *ListComp_type; + PyObject *List_type; + PyObject *Load_singleton; + PyObject *Load_type; + PyObject *LtE_singleton; + PyObject *LtE_type; + PyObject *Lt_singleton; + PyObject *Lt_type; + PyObject *MatMult_singleton; + PyObject *MatMult_type; + PyObject *MatchAs_type; + PyObject *MatchClass_type; + PyObject *MatchMapping_type; + PyObject *MatchOr_type; + PyObject *MatchSequence_type; + PyObject *MatchSingleton_type; + PyObject *MatchStar_type; + PyObject *MatchValue_type; + PyObject *Match_type; + PyObject *Mod_singleton; + PyObject *Mod_type; + PyObject *Module_type; + PyObject *Mult_singleton; + PyObject *Mult_type; + PyObject *Name_type; + PyObject *NamedExpr_type; + PyObject *Nonlocal_type; + PyObject *NotEq_singleton; + PyObject *NotEq_type; + PyObject *NotIn_singleton; + PyObject *NotIn_type; + PyObject *Not_singleton; + PyObject *Not_type; + PyObject *Or_singleton; + PyObject *Or_type; + PyObject *ParamSpec_type; + PyObject *Pass_type; + PyObject *Pow_singleton; + PyObject *Pow_type; + PyObject *RShift_singleton; + PyObject *RShift_type; + PyObject *Raise_type; + PyObject *Return_type; + PyObject *SetComp_type; + PyObject *Set_type; + PyObject *Slice_type; + PyObject *Starred_type; + PyObject *Store_singleton; + PyObject *Store_type; + PyObject *Sub_singleton; + PyObject *Sub_type; + PyObject *Subscript_type; + PyObject *TryStar_type; + PyObject *Try_type; + PyObject *Tuple_type; + PyObject *TypeAlias_type; + PyObject *TypeIgnore_type; + PyObject *TypeVarTuple_type; + PyObject *TypeVar_type; + PyObject *UAdd_singleton; + PyObject *UAdd_type; + PyObject *USub_singleton; + PyObject *USub_type; + PyObject *UnaryOp_type; + PyObject *While_type; + PyObject *With_type; + PyObject *YieldFrom_type; + PyObject *Yield_type; + PyObject *__dict__; + PyObject *__doc__; + PyObject *__match_args__; + PyObject *__module__; + PyObject *_attributes; + PyObject *_fields; + PyObject *alias_type; + PyObject *annotation; + PyObject *arg; + PyObject *arg_type; + PyObject *args; + PyObject *argtypes; + PyObject *arguments_type; + PyObject *asname; + PyObject *ast; + PyObject *attr; + PyObject *bases; + PyObject *body; + PyObject *boolop_type; + PyObject *bound; + PyObject *cases; + PyObject *cause; + PyObject *cls; + PyObject *cmpop_type; + PyObject *col_offset; + PyObject *comparators; + PyObject *comprehension_type; + PyObject *context_expr; + PyObject *conversion; + PyObject *ctx; + PyObject *decorator_list; + PyObject *default_value; + PyObject *defaults; + PyObject *elt; + PyObject *elts; + PyObject *end_col_offset; + PyObject *end_lineno; + PyObject *exc; + PyObject *excepthandler_type; + PyObject *expr_context_type; + PyObject *expr_type; + PyObject *finalbody; + PyObject *format_spec; + PyObject *func; + PyObject *generators; + PyObject *guard; + PyObject *handlers; + PyObject *id; + PyObject *ifs; + PyObject *is_async; + PyObject *items; + PyObject *iter; + PyObject *key; + PyObject *keys; + PyObject *keyword_type; + PyObject *keywords; + PyObject *kind; + PyObject *kw_defaults; + PyObject *kwarg; + PyObject *kwd_attrs; + PyObject *kwd_patterns; + PyObject *kwonlyargs; + PyObject *left; + PyObject *level; + PyObject *lineno; + PyObject *lower; + PyObject *match_case_type; + PyObject *mod_type; + PyObject *module; + PyObject *msg; + PyObject *name; + PyObject *names; + PyObject *op; + PyObject *operand; + PyObject *operator_type; + PyObject *ops; + PyObject *optional_vars; + PyObject *orelse; + PyObject *pattern; + PyObject *pattern_type; + PyObject *patterns; + PyObject *posonlyargs; + PyObject *rest; + PyObject *returns; + PyObject *right; + PyObject *simple; + PyObject *slice; + PyObject *step; + PyObject *stmt_type; + PyObject *subject; + PyObject *tag; + PyObject *target; + PyObject *targets; + PyObject *test; + PyObject *type; + PyObject *type_comment; + PyObject *type_ignore_type; + PyObject *type_ignores; + PyObject *type_param_type; + PyObject *type_params; + PyObject *unaryop_type; + PyObject *upper; + PyObject *value; + PyObject *values; + PyObject *vararg; + PyObject *withitem_type; +}; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_AST_STATE_H */ + diff --git a/Include/internal/pycore_atexit.h b/Include/internal/pycore_atexit.h new file mode 100644 index 0000000000000000000000000000000000000000..72c66a059395009b6985158b42221ef9c7774d5c --- /dev/null +++ b/Include/internal/pycore_atexit.h @@ -0,0 +1,67 @@ +#ifndef Py_INTERNAL_ATEXIT_H +#define Py_INTERNAL_ATEXIT_H + +#include "pycore_lock.h" // PyMutex + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +//############### +// runtime atexit + +typedef void (*atexit_callbackfunc)(void); + +struct _atexit_runtime_state { + PyMutex mutex; +#define NEXITFUNCS 32 + atexit_callbackfunc callbacks[NEXITFUNCS]; + int ncallbacks; +}; + + +//################### +// interpreter atexit + +typedef void (*atexit_datacallbackfunc)(void *); + +typedef struct atexit_callback { + atexit_datacallbackfunc func; + void *data; + struct atexit_callback *next; +} atexit_callback; + +typedef struct { + PyObject *func; + PyObject *args; + PyObject *kwargs; +} atexit_py_callback; + +struct atexit_state { + atexit_callback *ll_callbacks; + // Kept for ABI compatibility--do not use! (See GH-127791.) + atexit_callback *last_ll_callback; + + // XXX The rest of the state could be moved to the atexit module state + // and a low-level callback added for it during module exec. + // For the moment we leave it here. + atexit_py_callback **callbacks; + int ncallbacks; + int callback_len; +}; + +// Export for '_interpchannels' shared extension +PyAPI_FUNC(int) _Py_AtExit( + PyInterpreterState *interp, + atexit_datacallbackfunc func, + void *data); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_ATEXIT_H */ diff --git a/Include/internal/pycore_backoff.h b/Include/internal/pycore_backoff.h new file mode 100644 index 0000000000000000000000000000000000000000..0bcca1e769b341f021f97aafb2a5a33512f5d47f --- /dev/null +++ b/Include/internal/pycore_backoff.h @@ -0,0 +1,145 @@ + +#ifndef Py_INTERNAL_BACKOFF_H +#define Py_INTERNAL_BACKOFF_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +#include +#include + + +typedef struct { + union { + struct { + uint16_t backoff : 4; + uint16_t value : 12; + }; + uint16_t as_counter; // For printf("%#x", ...) + }; +} _Py_BackoffCounter; + + +/* 16-bit countdown counters using exponential backoff. + + These are used by the adaptive specializer to count down until + it is time to specialize an instruction. If specialization fails + the counter is reset using exponential backoff. + + Another use is for the Tier 2 optimizer to decide when to create + a new Tier 2 trace (executor). Again, exponential backoff is used. + + The 16-bit counter is structured as a 12-bit unsigned 'value' + and a 4-bit 'backoff' field. When resetting the counter, the + backoff field is incremented (until it reaches a limit) and the + value is set to a bit mask representing the value 2**backoff - 1. + The maximum backoff is 12 (the number of value bits). + + There is an exceptional value which must not be updated, 0xFFFF. +*/ + +#define UNREACHABLE_BACKOFF 0xFFFF + +static inline bool +is_unreachable_backoff_counter(_Py_BackoffCounter counter) +{ + return counter.as_counter == UNREACHABLE_BACKOFF; +} + +static inline _Py_BackoffCounter +make_backoff_counter(uint16_t value, uint16_t backoff) +{ + assert(backoff <= 15); + assert(value <= 0xFFF); + _Py_BackoffCounter result; + result.value = value; + result.backoff = backoff; + return result; +} + +static inline _Py_BackoffCounter +forge_backoff_counter(uint16_t counter) +{ + _Py_BackoffCounter result; + result.as_counter = counter; + return result; +} + +static inline _Py_BackoffCounter +restart_backoff_counter(_Py_BackoffCounter counter) +{ + assert(!is_unreachable_backoff_counter(counter)); + if (counter.backoff < 12) { + return make_backoff_counter((1 << (counter.backoff + 1)) - 1, counter.backoff + 1); + } + else { + return make_backoff_counter((1 << 12) - 1, 12); + } +} + +static inline _Py_BackoffCounter +pause_backoff_counter(_Py_BackoffCounter counter) +{ + return make_backoff_counter(counter.value | 1, counter.backoff); +} + +static inline _Py_BackoffCounter +advance_backoff_counter(_Py_BackoffCounter counter) +{ + if (!is_unreachable_backoff_counter(counter)) { + return make_backoff_counter((counter.value - 1) & 0xFFF, counter.backoff); + } + else { + return counter; + } +} + +static inline bool +backoff_counter_triggers(_Py_BackoffCounter counter) +{ + return counter.value == 0; +} + +/* Initial JUMP_BACKWARD counter. + * This determines when we create a trace for a loop. +* Backoff sequence 16, 32, 64, 128, 256, 512, 1024, 2048, 4096. */ +#define JUMP_BACKWARD_INITIAL_VALUE 16 +#define JUMP_BACKWARD_INITIAL_BACKOFF 4 +static inline _Py_BackoffCounter +initial_jump_backoff_counter(void) +{ + return make_backoff_counter(JUMP_BACKWARD_INITIAL_VALUE, + JUMP_BACKWARD_INITIAL_BACKOFF); +} + +/* Initial exit temperature. + * Must be larger than ADAPTIVE_COOLDOWN_VALUE, + * otherwise when a side exit warms up we may construct + * a new trace before the Tier 1 code has properly re-specialized. + * Backoff sequence 64, 128, 256, 512, 1024, 2048, 4096. */ +#define COLD_EXIT_INITIAL_VALUE 64 +#define COLD_EXIT_INITIAL_BACKOFF 6 + +static inline _Py_BackoffCounter +initial_temperature_backoff_counter(void) +{ + return make_backoff_counter(COLD_EXIT_INITIAL_VALUE, + COLD_EXIT_INITIAL_BACKOFF); +} + +/* Unreachable backoff counter. */ +static inline _Py_BackoffCounter +initial_unreachable_backoff_counter(void) +{ + return forge_backoff_counter(UNREACHABLE_BACKOFF); +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BACKOFF_H */ diff --git a/Include/internal/pycore_bitutils.h b/Include/internal/pycore_bitutils.h new file mode 100644 index 0000000000000000000000000000000000000000..50f69377523818ff1177b648a6efb6427b51ff98 --- /dev/null +++ b/Include/internal/pycore_bitutils.h @@ -0,0 +1,186 @@ +/* Bit and bytes utilities. + + Bytes swap functions, reverse order of bytes: + + - _Py_bswap16(uint16_t) + - _Py_bswap32(uint32_t) + - _Py_bswap64(uint64_t) +*/ + +#ifndef Py_INTERNAL_BITUTILS_H +#define Py_INTERNAL_BITUTILS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#if defined(__GNUC__) \ + && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 8)) + /* __builtin_bswap16() is available since GCC 4.8, + __builtin_bswap32() is available since GCC 4.3, + __builtin_bswap64() is available since GCC 4.3. */ +# define _PY_HAVE_BUILTIN_BSWAP +#endif + +#ifdef _MSC_VER +# include // _byteswap_uint64() +#endif + + +static inline uint16_t +_Py_bswap16(uint16_t word) +{ +#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap16) + return __builtin_bswap16(word); +#elif defined(_MSC_VER) + Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned short)); + return _byteswap_ushort(word); +#else + // Portable implementation which doesn't rely on circular bit shift + return ( ((word & UINT16_C(0x00FF)) << 8) + | ((word & UINT16_C(0xFF00)) >> 8)); +#endif +} + +static inline uint32_t +_Py_bswap32(uint32_t word) +{ +#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap32) + return __builtin_bswap32(word); +#elif defined(_MSC_VER) + Py_BUILD_ASSERT(sizeof(word) == sizeof(unsigned long)); + return _byteswap_ulong(word); +#else + // Portable implementation which doesn't rely on circular bit shift + return ( ((word & UINT32_C(0x000000FF)) << 24) + | ((word & UINT32_C(0x0000FF00)) << 8) + | ((word & UINT32_C(0x00FF0000)) >> 8) + | ((word & UINT32_C(0xFF000000)) >> 24)); +#endif +} + +static inline uint64_t +_Py_bswap64(uint64_t word) +{ +#if defined(_PY_HAVE_BUILTIN_BSWAP) || _Py__has_builtin(__builtin_bswap64) + return __builtin_bswap64(word); +#elif defined(_MSC_VER) + return _byteswap_uint64(word); +#else + // Portable implementation which doesn't rely on circular bit shift + return ( ((word & UINT64_C(0x00000000000000FF)) << 56) + | ((word & UINT64_C(0x000000000000FF00)) << 40) + | ((word & UINT64_C(0x0000000000FF0000)) << 24) + | ((word & UINT64_C(0x00000000FF000000)) << 8) + | ((word & UINT64_C(0x000000FF00000000)) >> 8) + | ((word & UINT64_C(0x0000FF0000000000)) >> 24) + | ((word & UINT64_C(0x00FF000000000000)) >> 40) + | ((word & UINT64_C(0xFF00000000000000)) >> 56)); +#endif +} + + +// Population count: count the number of 1's in 'x' +// (number of bits set to 1), also known as the hamming weight. +// +// Implementation note. CPUID is not used, to test if x86 POPCNT instruction +// can be used, to keep the implementation simple. For example, Visual Studio +// __popcnt() is not used this reason. The clang and GCC builtin function can +// use the x86 POPCNT instruction if the target architecture has SSE4a or +// newer. +static inline int +_Py_popcount32(uint32_t x) +{ +#if (defined(__clang__) || defined(__GNUC__)) + +#if SIZEOF_INT >= 4 + Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned int)); + return __builtin_popcount(x); +#else + // The C standard guarantees that unsigned long will always be big enough + // to hold a uint32_t value without losing information. + Py_BUILD_ASSERT(sizeof(x) <= sizeof(unsigned long)); + return __builtin_popcountl(x); +#endif + +#else + // 32-bit SWAR (SIMD Within A Register) popcount + + // Binary: 0 1 0 1 ... + const uint32_t M1 = 0x55555555; + // Binary: 00 11 00 11. .. + const uint32_t M2 = 0x33333333; + // Binary: 0000 1111 0000 1111 ... + const uint32_t M4 = 0x0F0F0F0F; + + // Put count of each 2 bits into those 2 bits + x = x - ((x >> 1) & M1); + // Put count of each 4 bits into those 4 bits + x = (x & M2) + ((x >> 2) & M2); + // Put count of each 8 bits into those 8 bits + x = (x + (x >> 4)) & M4; + // Sum of the 4 byte counts. + // Take care when considering changes to the next line. Portability and + // correctness are delicate here, thanks to C's "integer promotions" (C99 + // §6.3.1.1p2). On machines where the `int` type has width greater than 32 + // bits, `x` will be promoted to an `int`, and following C's "usual + // arithmetic conversions" (C99 §6.3.1.8), the multiplication will be + // performed as a multiplication of two `unsigned int` operands. In this + // case it's critical that we cast back to `uint32_t` in order to keep only + // the least significant 32 bits. On machines where the `int` type has + // width no greater than 32, the multiplication is of two 32-bit unsigned + // integer types, and the (uint32_t) cast is a no-op. In both cases, we + // avoid the risk of undefined behaviour due to overflow of a + // multiplication of signed integer types. + return (uint32_t)(x * 0x01010101U) >> 24; +#endif +} + + +// Return the index of the most significant 1 bit in 'x'. This is the smallest +// integer k such that x < 2**k. Equivalent to floor(log2(x)) + 1 for x != 0. +static inline int +_Py_bit_length(unsigned long x) +{ +#if (defined(__clang__) || defined(__GNUC__)) + if (x != 0) { + // __builtin_clzl() is available since GCC 3.4. + // Undefined behavior for x == 0. + return (int)sizeof(unsigned long) * 8 - __builtin_clzl(x); + } + else { + return 0; + } +#elif defined(_MSC_VER) + // _BitScanReverse() is documented to search 32 bits. + Py_BUILD_ASSERT(sizeof(unsigned long) <= 4); + unsigned long msb; + if (_BitScanReverse(&msb, x)) { + return (int)msb + 1; + } + else { + return 0; + } +#else + const int BIT_LENGTH_TABLE[32] = { + 0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5 + }; + int msb = 0; + while (x >= 32) { + msb += 6; + x >>= 6; + } + msb += BIT_LENGTH_TABLE[x]; + return msb; +#endif +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BITUTILS_H */ diff --git a/Include/internal/pycore_blocks_output_buffer.h b/Include/internal/pycore_blocks_output_buffer.h new file mode 100644 index 0000000000000000000000000000000000000000..573e10359b7bd271c2da4c9674a643b8e738ae41 --- /dev/null +++ b/Include/internal/pycore_blocks_output_buffer.h @@ -0,0 +1,321 @@ +/* + _BlocksOutputBuffer is used to maintain an output buffer + that has unpredictable size. Suitable for compression/decompression + API (bz2/lzma/zlib) that has stream->next_out and stream->avail_out: + + stream->next_out: point to the next output position. + stream->avail_out: the number of available bytes left in the buffer. + + It maintains a list of bytes object, so there is no overhead of resizing + the buffer. + + Usage: + + 1, Initialize the struct instance like this: + _BlocksOutputBuffer buffer = {.list = NULL}; + Set .list to NULL for _BlocksOutputBuffer_OnError() + + 2, Initialize the buffer use one of these functions: + _BlocksOutputBuffer_InitAndGrow() + _BlocksOutputBuffer_InitWithSize() + + 3, If (avail_out == 0), grow the buffer: + _BlocksOutputBuffer_Grow() + + 4, Get the current outputted data size: + _BlocksOutputBuffer_GetDataSize() + + 5, Finish the buffer, and return a bytes object: + _BlocksOutputBuffer_Finish() + + 6, Clean up the buffer when an error occurred: + _BlocksOutputBuffer_OnError() +*/ + +#ifndef Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H +#define Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "Python.h" + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +typedef struct { + // List of bytes objects + PyObject *list; + // Number of whole allocated size + Py_ssize_t allocated; + // Max length of the buffer, negative number means unlimited length. + Py_ssize_t max_length; +} _BlocksOutputBuffer; + +static const char unable_allocate_msg[] = "Unable to allocate output buffer."; + +/* In 32-bit build, the max block size should <= INT32_MAX. */ +#define OUTPUT_BUFFER_MAX_BLOCK_SIZE (256*1024*1024) + +/* Block size sequence */ +#define KB (1024) +#define MB (1024*1024) +static const Py_ssize_t BUFFER_BLOCK_SIZE[] = + { 32*KB, 64*KB, 256*KB, 1*MB, 4*MB, 8*MB, 16*MB, 16*MB, + 32*MB, 32*MB, 32*MB, 32*MB, 64*MB, 64*MB, 128*MB, 128*MB, + OUTPUT_BUFFER_MAX_BLOCK_SIZE }; +#undef KB +#undef MB + +/* According to the block sizes defined by BUFFER_BLOCK_SIZE, the whole + allocated size growth step is: + 1 32 KB +32 KB + 2 96 KB +64 KB + 3 352 KB +256 KB + 4 1.34 MB +1 MB + 5 5.34 MB +4 MB + 6 13.34 MB +8 MB + 7 29.34 MB +16 MB + 8 45.34 MB +16 MB + 9 77.34 MB +32 MB + 10 109.34 MB +32 MB + 11 141.34 MB +32 MB + 12 173.34 MB +32 MB + 13 237.34 MB +64 MB + 14 301.34 MB +64 MB + 15 429.34 MB +128 MB + 16 557.34 MB +128 MB + 17 813.34 MB +256 MB + 18 1069.34 MB +256 MB + 19 1325.34 MB +256 MB + 20 1581.34 MB +256 MB + 21 1837.34 MB +256 MB + 22 2093.34 MB +256 MB + ... +*/ + +/* Initialize the buffer, and grow the buffer. + + max_length: Max length of the buffer, -1 for unlimited length. + + On success, return allocated size (>=0) + On failure, return -1 +*/ +static inline Py_ssize_t +_BlocksOutputBuffer_InitAndGrow(_BlocksOutputBuffer *buffer, + const Py_ssize_t max_length, + void **next_out) +{ + PyObject *b; + Py_ssize_t block_size; + + // ensure .list was set to NULL + assert(buffer->list == NULL); + + // get block size + if (0 <= max_length && max_length < BUFFER_BLOCK_SIZE[0]) { + block_size = max_length; + } else { + block_size = BUFFER_BLOCK_SIZE[0]; + } + + // the first block + b = PyBytes_FromStringAndSize(NULL, block_size); + if (b == NULL) { + return -1; + } + + // create the list + buffer->list = PyList_New(1); + if (buffer->list == NULL) { + Py_DECREF(b); + return -1; + } + PyList_SET_ITEM(buffer->list, 0, b); + + // set variables + buffer->allocated = block_size; + buffer->max_length = max_length; + + *next_out = PyBytes_AS_STRING(b); + return block_size; +} + +/* Initialize the buffer, with an initial size. + + Check block size limit in the outer wrapper function. For example, some libs + accept UINT32_MAX as the maximum block size, then init_size should <= it. + + On success, return allocated size (>=0) + On failure, return -1 +*/ +static inline Py_ssize_t +_BlocksOutputBuffer_InitWithSize(_BlocksOutputBuffer *buffer, + const Py_ssize_t init_size, + void **next_out) +{ + PyObject *b; + + // ensure .list was set to NULL + assert(buffer->list == NULL); + + // the first block + b = PyBytes_FromStringAndSize(NULL, init_size); + if (b == NULL) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return -1; + } + + // create the list + buffer->list = PyList_New(1); + if (buffer->list == NULL) { + Py_DECREF(b); + return -1; + } + PyList_SET_ITEM(buffer->list, 0, b); + + // set variables + buffer->allocated = init_size; + buffer->max_length = -1; + + *next_out = PyBytes_AS_STRING(b); + return init_size; +} + +/* Grow the buffer. The avail_out must be 0, please check it before calling. + + On success, return allocated size (>=0) + On failure, return -1 +*/ +static inline Py_ssize_t +_BlocksOutputBuffer_Grow(_BlocksOutputBuffer *buffer, + void **next_out, + const Py_ssize_t avail_out) +{ + PyObject *b; + const Py_ssize_t list_len = Py_SIZE(buffer->list); + Py_ssize_t block_size; + + // ensure no gaps in the data + if (avail_out != 0) { + PyErr_SetString(PyExc_SystemError, + "avail_out is non-zero in _BlocksOutputBuffer_Grow()."); + return -1; + } + + // get block size + if (list_len < (Py_ssize_t) Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE)) { + block_size = BUFFER_BLOCK_SIZE[list_len]; + } else { + block_size = BUFFER_BLOCK_SIZE[Py_ARRAY_LENGTH(BUFFER_BLOCK_SIZE) - 1]; + } + + // check max_length + if (buffer->max_length >= 0) { + // if (rest == 0), should not grow the buffer. + Py_ssize_t rest = buffer->max_length - buffer->allocated; + assert(rest > 0); + + // block_size of the last block + if (block_size > rest) { + block_size = rest; + } + } + + // check buffer->allocated overflow + if (block_size > PY_SSIZE_T_MAX - buffer->allocated) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return -1; + } + + // create the block + b = PyBytes_FromStringAndSize(NULL, block_size); + if (b == NULL) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return -1; + } + if (PyList_Append(buffer->list, b) < 0) { + Py_DECREF(b); + return -1; + } + Py_DECREF(b); + + // set variables + buffer->allocated += block_size; + + *next_out = PyBytes_AS_STRING(b); + return block_size; +} + +/* Return the current outputted data size. */ +static inline Py_ssize_t +_BlocksOutputBuffer_GetDataSize(_BlocksOutputBuffer *buffer, + const Py_ssize_t avail_out) +{ + return buffer->allocated - avail_out; +} + +/* Finish the buffer. + + Return a bytes object on success + Return NULL on failure +*/ +static inline PyObject * +_BlocksOutputBuffer_Finish(_BlocksOutputBuffer *buffer, + const Py_ssize_t avail_out) +{ + PyObject *result, *block; + const Py_ssize_t list_len = Py_SIZE(buffer->list); + + // fast path for single block + if ((list_len == 1 && avail_out == 0) || + (list_len == 2 && Py_SIZE(PyList_GET_ITEM(buffer->list, 1)) == avail_out)) + { + block = PyList_GET_ITEM(buffer->list, 0); + Py_INCREF(block); + + Py_CLEAR(buffer->list); + return block; + } + + // final bytes object + result = PyBytes_FromStringAndSize(NULL, buffer->allocated - avail_out); + if (result == NULL) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + return NULL; + } + + // memory copy + if (list_len > 0) { + char *posi = PyBytes_AS_STRING(result); + + // blocks except the last one + Py_ssize_t i = 0; + for (; i < list_len-1; i++) { + block = PyList_GET_ITEM(buffer->list, i); + memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block)); + posi += Py_SIZE(block); + } + // the last block + block = PyList_GET_ITEM(buffer->list, i); + memcpy(posi, PyBytes_AS_STRING(block), Py_SIZE(block) - avail_out); + } else { + assert(Py_SIZE(result) == 0); + } + + Py_CLEAR(buffer->list); + return result; +} + +/* Clean up the buffer when an error occurred. */ +static inline void +_BlocksOutputBuffer_OnError(_BlocksOutputBuffer *buffer) +{ + Py_CLEAR(buffer->list); +} + +#ifdef __cplusplus +} +#endif +#endif /* Py_INTERNAL_BLOCKS_OUTPUT_BUFFER_H */ diff --git a/Include/internal/pycore_brc.h b/Include/internal/pycore_brc.h new file mode 100644 index 0000000000000000000000000000000000000000..3453d83b57ca97e5dcc0ca4c98a1ced4e8b0fc1c --- /dev/null +++ b/Include/internal/pycore_brc.h @@ -0,0 +1,74 @@ +#ifndef Py_INTERNAL_BRC_H +#define Py_INTERNAL_BRC_H + +#include +#include "pycore_llist.h" // struct llist_node +#include "pycore_lock.h" // PyMutex +#include "pycore_object_stack.h" // _PyObjectStack + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED + +// Prime number to avoid correlations with memory addresses. +#define _Py_BRC_NUM_BUCKETS 257 + +// Hash table bucket +struct _brc_bucket { + // Mutex protects both the bucket and thread state queues in this bucket. + PyMutex mutex; + + // Linked list of _PyThreadStateImpl objects hashed to this bucket. + struct llist_node root; +}; + +// Per-interpreter biased reference counting state +struct _brc_state { + // Hash table of thread states by thread-id. Thread states within a bucket + // are chained using a doubly-linked list. + struct _brc_bucket table[_Py_BRC_NUM_BUCKETS]; +}; + +// Per-thread biased reference counting state +struct _brc_thread_state { + // Linked-list of thread states per hash bucket + struct llist_node bucket_node; + + // Thread-id as determined by _PyThread_Id() + uintptr_t tid; + + // Objects with refcounts to be merged (protected by bucket mutex) + _PyObjectStack objects_to_merge; + + // Local stack of objects to be merged (not accessed by other threads) + _PyObjectStack local_objects_to_merge; +}; + +// Initialize/finalize the per-thread biased reference counting state +void _Py_brc_init_thread(PyThreadState *tstate); +void _Py_brc_remove_thread(PyThreadState *tstate); + +// Initialize per-interpreter state +void _Py_brc_init_state(PyInterpreterState *interp); + +void _Py_brc_after_fork(PyInterpreterState *interp); + +// Enqueues an object to be merged by it's owning thread (tid). This +// steals a reference to the object. +void _Py_brc_queue_object(PyObject *ob); + +// Merge the refcounts of queued objects for the current thread. +void _Py_brc_merge_refcounts(PyThreadState *tstate); + +#endif /* Py_GIL_DISABLED */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BRC_H */ diff --git a/Include/internal/pycore_bytes_methods.h b/Include/internal/pycore_bytes_methods.h new file mode 100644 index 0000000000000000000000000000000000000000..059dc2599bbd77e861dbf0649372281e306de75e --- /dev/null +++ b/Include/internal/pycore_bytes_methods.h @@ -0,0 +1,82 @@ +#ifndef Py_LIMITED_API +#ifndef Py_BYTES_CTYPE_H +#define Py_BYTES_CTYPE_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* + * The internal implementation behind PyBytes (bytes) and PyByteArray (bytearray) + * methods of the given names, they operate on ASCII byte strings. + */ +extern PyObject* _Py_bytes_isspace(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isalpha(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isalnum(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isascii(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isdigit(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_islower(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_isupper(const char *cptr, Py_ssize_t len); +extern PyObject* _Py_bytes_istitle(const char *cptr, Py_ssize_t len); + +/* These store their len sized answer in the given preallocated *result arg. */ +extern void _Py_bytes_lower(char *result, const char *cptr, Py_ssize_t len); +extern void _Py_bytes_upper(char *result, const char *cptr, Py_ssize_t len); +extern void _Py_bytes_title(char *result, const char *s, Py_ssize_t len); +extern void _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len); +extern void _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len); + +extern PyObject *_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub, + Py_ssize_t start, Py_ssize_t end); +extern PyObject *_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub, + Py_ssize_t start, Py_ssize_t end); +extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub, + Py_ssize_t start, Py_ssize_t end); +extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub, + Py_ssize_t start, Py_ssize_t end); +extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub, + Py_ssize_t start, Py_ssize_t end); +extern int _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg); +extern PyObject *_Py_bytes_startswith(const char *str, Py_ssize_t len, + PyObject *subobj, Py_ssize_t start, + Py_ssize_t end); +extern PyObject *_Py_bytes_endswith(const char *str, Py_ssize_t len, + PyObject *subobj, Py_ssize_t start, + Py_ssize_t end); + +/* The maketrans() static method. */ +extern PyObject* _Py_bytes_maketrans(Py_buffer *frm, Py_buffer *to); + +/* Shared __doc__ strings. */ +extern const char _Py_isspace__doc__[]; +extern const char _Py_isalpha__doc__[]; +extern const char _Py_isalnum__doc__[]; +extern const char _Py_isascii__doc__[]; +extern const char _Py_isdigit__doc__[]; +extern const char _Py_islower__doc__[]; +extern const char _Py_isupper__doc__[]; +extern const char _Py_istitle__doc__[]; +extern const char _Py_lower__doc__[]; +extern const char _Py_upper__doc__[]; +extern const char _Py_title__doc__[]; +extern const char _Py_capitalize__doc__[]; +extern const char _Py_swapcase__doc__[]; +extern const char _Py_count__doc__[]; +extern const char _Py_find__doc__[]; +extern const char _Py_index__doc__[]; +extern const char _Py_rfind__doc__[]; +extern const char _Py_rindex__doc__[]; +extern const char _Py_startswith__doc__[]; +extern const char _Py_endswith__doc__[]; +extern const char _Py_maketrans__doc__[]; +extern const char _Py_expandtabs__doc__[]; +extern const char _Py_ljust__doc__[]; +extern const char _Py_rjust__doc__[]; +extern const char _Py_center__doc__[]; +extern const char _Py_zfill__doc__[]; + +/* this is needed because some docs are shared from the .o, not static */ +#define PyDoc_STRVAR_shared(name,str) const char name[] = PyDoc_STR(str) + +#endif /* !Py_BYTES_CTYPE_H */ +#endif /* !Py_LIMITED_API */ diff --git a/Include/internal/pycore_bytesobject.h b/Include/internal/pycore_bytesobject.h new file mode 100644 index 0000000000000000000000000000000000000000..8c922a4fb3037ae30b6aa5380dac229026eac263 --- /dev/null +++ b/Include/internal/pycore_bytesobject.h @@ -0,0 +1,152 @@ +#ifndef Py_INTERNAL_BYTESOBJECT_H +#define Py_INTERNAL_BYTESOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyObject* _PyBytes_FormatEx( + const char *format, + Py_ssize_t format_len, + PyObject *args, + int use_bytearray); + +extern PyObject* _PyBytes_FromHex( + PyObject *string, + int use_bytearray); + +// Helper for PyBytes_DecodeEscape that detects invalid escape chars. +// Export for test_peg_generator. +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape2(const char *, Py_ssize_t, + const char *, + int *, const char **); +// Export for binary compatibility. +PyAPI_FUNC(PyObject*) _PyBytes_DecodeEscape(const char *, Py_ssize_t, + const char *, const char **); + + +// Substring Search. +// +// Returns the index of the first occurrence of +// a substring ("needle") in a larger text ("haystack"). +// If the needle is not found, return -1. +// If the needle is found, add offset to the index. +// +// Export for 'mmap' shared extension. +PyAPI_FUNC(Py_ssize_t) +_PyBytes_Find(const char *haystack, Py_ssize_t len_haystack, + const char *needle, Py_ssize_t len_needle, + Py_ssize_t offset); + +// Same as above, but search right-to-left. +// Export for 'mmap' shared extension. +PyAPI_FUNC(Py_ssize_t) +_PyBytes_ReverseFind(const char *haystack, Py_ssize_t len_haystack, + const char *needle, Py_ssize_t len_needle, + Py_ssize_t offset); + + +// Helper function to implement the repeat and inplace repeat methods on a +// buffer. +// +// len_dest is assumed to be an integer multiple of len_src. +// If src equals dest, then assume the operation is inplace. +// +// This method repeately doubles the number of bytes copied to reduce +// the number of invocations of memcpy. +// +// Export for 'array' shared extension. +PyAPI_FUNC(void) +_PyBytes_Repeat(char* dest, Py_ssize_t len_dest, + const char* src, Py_ssize_t len_src); + +/* --- _PyBytesWriter ----------------------------------------------------- */ + +/* The _PyBytesWriter structure is big: it contains an embedded "stack buffer". + A _PyBytesWriter variable must be declared at the end of variables in a + function to optimize the memory allocation on the stack. */ +typedef struct { + /* bytes, bytearray or NULL (when the small buffer is used) */ + PyObject *buffer; + + /* Number of allocated size. */ + Py_ssize_t allocated; + + /* Minimum number of allocated bytes, + incremented by _PyBytesWriter_Prepare() */ + Py_ssize_t min_size; + + /* If non-zero, use a bytearray instead of a bytes object for buffer. */ + int use_bytearray; + + /* If non-zero, overallocate the buffer (default: 0). + This flag must be zero if use_bytearray is non-zero. */ + int overallocate; + + /* Stack buffer */ + int use_small_buffer; + char small_buffer[512]; +} _PyBytesWriter; + +/* Initialize a bytes writer + + By default, the overallocation is disabled. Set the overallocate attribute + to control the allocation of the buffer. + + Export _PyBytesWriter API for '_pickle' shared extension. */ +PyAPI_FUNC(void) _PyBytesWriter_Init(_PyBytesWriter *writer); + +/* Get the buffer content and reset the writer. + Return a bytes object, or a bytearray object if use_bytearray is non-zero. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(PyObject *) _PyBytesWriter_Finish(_PyBytesWriter *writer, + void *str); + +/* Deallocate memory of a writer (clear its internal buffer). */ +PyAPI_FUNC(void) _PyBytesWriter_Dealloc(_PyBytesWriter *writer); + +/* Allocate the buffer to write size bytes. + Return the pointer to the beginning of buffer data. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(void*) _PyBytesWriter_Alloc(_PyBytesWriter *writer, + Py_ssize_t size); + +/* Ensure that the buffer is large enough to write *size* bytes. + Add size to the writer minimum size (min_size attribute). + + str is the current pointer inside the buffer. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(void*) _PyBytesWriter_Prepare(_PyBytesWriter *writer, + void *str, + Py_ssize_t size); + +/* Resize the buffer to make it larger. + The new buffer may be larger than size bytes because of overallocation. + Return the updated current pointer inside the buffer. + Raise an exception and return NULL on error. + + Note: size must be greater than the number of allocated bytes in the writer. + + This function doesn't use the writer minimum size (min_size attribute). + + See also _PyBytesWriter_Prepare(). + */ +PyAPI_FUNC(void*) _PyBytesWriter_Resize(_PyBytesWriter *writer, + void *str, + Py_ssize_t size); + +/* Write bytes. + Raise an exception and return NULL on error. */ +PyAPI_FUNC(void*) _PyBytesWriter_WriteBytes(_PyBytesWriter *writer, + void *str, + const void *bytes, + Py_ssize_t size); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_BYTESOBJECT_H */ diff --git a/Include/internal/pycore_call.h b/Include/internal/pycore_call.h new file mode 100644 index 0000000000000000000000000000000000000000..c92028a01299e2d8090b37fe85947678579aca3e --- /dev/null +++ b/Include/internal/pycore_call.h @@ -0,0 +1,205 @@ +#ifndef Py_INTERNAL_CALL_H +#define Py_INTERNAL_CALL_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_identifier.h" // _Py_Identifier +#include "pycore_pystate.h" // _PyThreadState_GET() + +/* Suggested size (number of positional arguments) for arrays of PyObject* + allocated on a C stack to avoid allocating memory on the heap memory. Such + array is used to pass positional arguments to call functions of the + PyObject_Vectorcall() family. + + The size is chosen to not abuse the C stack and so limit the risk of stack + overflow. The size is also chosen to allow using the small stack for most + function calls of the Python standard library. On 64-bit CPU, it allocates + 40 bytes on the stack. */ +#define _PY_FASTCALL_SMALL_STACK 5 + + +// Export for 'math' shared extension, used via _PyObject_VectorcallTstate() +// static inline function. +PyAPI_FUNC(PyObject*) _Py_CheckFunctionResult( + PyThreadState *tstate, + PyObject *callable, + PyObject *result, + const char *where); + +extern PyObject* _PyObject_Call_Prepend( + PyThreadState *tstate, + PyObject *callable, + PyObject *obj, + PyObject *args, + PyObject *kwargs); + +extern PyObject* _PyObject_VectorcallDictTstate( + PyThreadState *tstate, + PyObject *callable, + PyObject *const *args, + size_t nargsf, + PyObject *kwargs); + +extern PyObject* _PyObject_Call( + PyThreadState *tstate, + PyObject *callable, + PyObject *args, + PyObject *kwargs); + +extern PyObject * _PyObject_CallMethodFormat( + PyThreadState *tstate, + PyObject *callable, + const char *format, + ...); + +// Export for 'array' shared extension +PyAPI_FUNC(PyObject*) _PyObject_CallMethod( + PyObject *obj, + PyObject *name, + const char *format, ...); + +extern PyObject* _PyObject_CallMethodIdObjArgs( + PyObject *obj, + _Py_Identifier *name, + ...); + +static inline PyObject * +_PyObject_VectorcallMethodId( + _Py_Identifier *name, PyObject *const *args, + size_t nargsf, PyObject *kwnames) +{ + PyObject *oname = _PyUnicode_FromId(name); /* borrowed */ + if (!oname) { + return _Py_NULL; + } + return PyObject_VectorcallMethod(oname, args, nargsf, kwnames); +} + +static inline PyObject * +_PyObject_CallMethodIdNoArgs(PyObject *self, _Py_Identifier *name) +{ + size_t nargsf = 1 | PY_VECTORCALL_ARGUMENTS_OFFSET; + return _PyObject_VectorcallMethodId(name, &self, nargsf, _Py_NULL); +} + +static inline PyObject * +_PyObject_CallMethodIdOneArg(PyObject *self, _Py_Identifier *name, PyObject *arg) +{ + PyObject *args[2] = {self, arg}; + size_t nargsf = 2 | PY_VECTORCALL_ARGUMENTS_OFFSET; + assert(arg != NULL); + return _PyObject_VectorcallMethodId(name, args, nargsf, _Py_NULL); +} + + +/* === Vectorcall protocol (PEP 590) ============================= */ + +// Call callable using tp_call. Arguments are like PyObject_Vectorcall(), +// except that nargs is plainly the number of arguments without flags. +// +// Export for 'math' shared extension, used via _PyObject_VectorcallTstate() +// static inline function. +PyAPI_FUNC(PyObject*) _PyObject_MakeTpCall( + PyThreadState *tstate, + PyObject *callable, + PyObject *const *args, Py_ssize_t nargs, + PyObject *keywords); + +// Static inline variant of public PyVectorcall_Function(). +static inline vectorcallfunc +_PyVectorcall_FunctionInline(PyObject *callable) +{ + assert(callable != NULL); + + PyTypeObject *tp = Py_TYPE(callable); + if (!PyType_HasFeature(tp, Py_TPFLAGS_HAVE_VECTORCALL)) { + return NULL; + } + assert(PyCallable_Check(callable)); + + Py_ssize_t offset = tp->tp_vectorcall_offset; + assert(offset > 0); + + vectorcallfunc ptr; + memcpy(&ptr, (char *) callable + offset, sizeof(ptr)); + return ptr; +} + + +/* Call the callable object 'callable' with the "vectorcall" calling + convention. + + args is a C array for positional arguments. + + nargsf is the number of positional arguments plus optionally the flag + PY_VECTORCALL_ARGUMENTS_OFFSET which means that the caller is allowed to + modify args[-1]. + + kwnames is a tuple of keyword names. The values of the keyword arguments + are stored in "args" after the positional arguments (note that the number + of keyword arguments does not change nargsf). kwnames can also be NULL if + there are no keyword arguments. + + keywords must only contain strings and all keys must be unique. + + Return the result on success. Raise an exception and return NULL on + error. */ +static inline PyObject * +_PyObject_VectorcallTstate(PyThreadState *tstate, PyObject *callable, + PyObject *const *args, size_t nargsf, + PyObject *kwnames) +{ + vectorcallfunc func; + PyObject *res; + + assert(kwnames == NULL || PyTuple_Check(kwnames)); + assert(args != NULL || PyVectorcall_NARGS(nargsf) == 0); + + func = _PyVectorcall_FunctionInline(callable); + if (func == NULL) { + Py_ssize_t nargs = PyVectorcall_NARGS(nargsf); + return _PyObject_MakeTpCall(tstate, callable, args, nargs, kwnames); + } + res = func(callable, args, nargsf, kwnames); + return _Py_CheckFunctionResult(tstate, callable, res, NULL); +} + + +static inline PyObject * +_PyObject_CallNoArgsTstate(PyThreadState *tstate, PyObject *func) { + return _PyObject_VectorcallTstate(tstate, func, NULL, 0, NULL); +} + + +// Private static inline function variant of public PyObject_CallNoArgs() +static inline PyObject * +_PyObject_CallNoArgs(PyObject *func) { + EVAL_CALL_STAT_INC_IF_FUNCTION(EVAL_CALL_API, func); + PyThreadState *tstate = _PyThreadState_GET(); + return _PyObject_VectorcallTstate(tstate, func, NULL, 0, NULL); +} + + +extern PyObject *const * +_PyStack_UnpackDict(PyThreadState *tstate, + PyObject *const *args, Py_ssize_t nargs, + PyObject *kwargs, PyObject **p_kwnames); + +extern void _PyStack_UnpackDict_Free( + PyObject *const *stack, + Py_ssize_t nargs, + PyObject *kwnames); + +extern void _PyStack_UnpackDict_FreeNoDecRef( + PyObject *const *stack, + PyObject *kwnames); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CALL_H */ diff --git a/Include/internal/pycore_capsule.h b/Include/internal/pycore_capsule.h new file mode 100644 index 0000000000000000000000000000000000000000..aa2c67f3a8f002ccef0e9b32fe4ac0cf8271164a --- /dev/null +++ b/Include/internal/pycore_capsule.h @@ -0,0 +1,17 @@ +#ifndef Py_INTERNAL_PYCAPSULE_H +#define Py_INTERNAL_PYCAPSULE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Export for '_socket' shared extension +PyAPI_FUNC(int) _PyCapsule_SetTraverse(PyObject *op, traverseproc traverse_func, inquiry clear_func); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYCAPSULE_H */ diff --git a/Include/internal/pycore_cell.h b/Include/internal/pycore_cell.h new file mode 100644 index 0000000000000000000000000000000000000000..27f67d57b2fb794296b4ea86e5497a7d5a6fdc63 --- /dev/null +++ b/Include/internal/pycore_cell.h @@ -0,0 +1,48 @@ +#ifndef Py_INTERNAL_CELL_H +#define Py_INTERNAL_CELL_H + +#include "pycore_critical_section.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Sets the cell contents to `value` and return previous contents. Steals a +// reference to `value`. +static inline PyObject * +PyCell_SwapTakeRef(PyCellObject *cell, PyObject *value) +{ + PyObject *old_value; + Py_BEGIN_CRITICAL_SECTION(cell); + old_value = cell->ob_ref; + cell->ob_ref = value; + Py_END_CRITICAL_SECTION(); + return old_value; +} + +static inline void +PyCell_SetTakeRef(PyCellObject *cell, PyObject *value) +{ + PyObject *old_value = PyCell_SwapTakeRef(cell, value); + Py_XDECREF(old_value); +} + +// Gets the cell contents. Returns a new reference. +static inline PyObject * +PyCell_GetRef(PyCellObject *cell) +{ + PyObject *res; + Py_BEGIN_CRITICAL_SECTION(cell); + res = Py_XNewRef(cell->ob_ref); + Py_END_CRITICAL_SECTION(); + return res; +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CELL_H */ diff --git a/Include/internal/pycore_ceval.h b/Include/internal/pycore_ceval.h new file mode 100644 index 0000000000000000000000000000000000000000..043f5957d481e5ea15cf054299d46055662099cd --- /dev/null +++ b/Include/internal/pycore_ceval.h @@ -0,0 +1,303 @@ +#ifndef Py_INTERNAL_CEVAL_H +#define Py_INTERNAL_CEVAL_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "dynamic_annotations.h" // _Py_ANNOTATE_RWLOCK_CREATE + +#include "pycore_interp.h" // PyInterpreterState.eval_frame +#include "pycore_pystate.h" // _PyThreadState_GET() + +/* Forward declarations */ +struct pyruntimestate; +struct _ceval_runtime_state; + +// Export for '_lsprof' shared extension +PyAPI_FUNC(int) _PyEval_SetProfile(PyThreadState *tstate, Py_tracefunc func, PyObject *arg); + +extern int _PyEval_SetTrace(PyThreadState *tstate, Py_tracefunc func, PyObject *arg); + +extern int _PyEval_SetOpcodeTrace(PyFrameObject *f, bool enable); + +// Helper to look up a builtin object +// Export for 'array' shared extension +PyAPI_FUNC(PyObject*) _PyEval_GetBuiltin(PyObject *); + +extern PyObject* _PyEval_GetBuiltinId(_Py_Identifier *); + +extern void _PyEval_SetSwitchInterval(unsigned long microseconds); +extern unsigned long _PyEval_GetSwitchInterval(void); + +// Export for '_queue' shared extension +PyAPI_FUNC(int) _PyEval_MakePendingCalls(PyThreadState *); + +#ifndef Py_DEFAULT_RECURSION_LIMIT +# define Py_DEFAULT_RECURSION_LIMIT 1000 +#endif + +extern void _Py_FinishPendingCalls(PyThreadState *tstate); +extern void _PyEval_InitState(PyInterpreterState *); +extern void _PyEval_SignalReceived(void); + +// bitwise flags: +#define _Py_PENDING_MAINTHREADONLY 1 +#define _Py_PENDING_RAWFREE 2 + +typedef int _Py_add_pending_call_result; +#define _Py_ADD_PENDING_SUCCESS 0 +#define _Py_ADD_PENDING_FULL -1 + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(_Py_add_pending_call_result) _PyEval_AddPendingCall( + PyInterpreterState *interp, + _Py_pending_call_func func, + void *arg, + int flags); + +#ifdef HAVE_FORK +extern PyStatus _PyEval_ReInitThreads(PyThreadState *tstate); +#endif + +// Used by sys.call_tracing() +extern PyObject* _PyEval_CallTracing(PyObject *func, PyObject *args); + +// Used by sys.get_asyncgen_hooks() +extern PyObject* _PyEval_GetAsyncGenFirstiter(void); +extern PyObject* _PyEval_GetAsyncGenFinalizer(void); + +// Used by sys.set_asyncgen_hooks() +extern int _PyEval_SetAsyncGenFirstiter(PyObject *); +extern int _PyEval_SetAsyncGenFinalizer(PyObject *); + +// Used by sys.get_coroutine_origin_tracking_depth() +// and sys.set_coroutine_origin_tracking_depth() +extern int _PyEval_GetCoroutineOriginTrackingDepth(void); +extern int _PyEval_SetCoroutineOriginTrackingDepth(int depth); + +extern void _PyEval_Fini(void); + + +extern PyObject* _PyEval_GetBuiltins(PyThreadState *tstate); +extern PyObject* _PyEval_BuiltinsFromGlobals( + PyThreadState *tstate, + PyObject *globals); + +// Trampoline API + +typedef struct { + // Callback to initialize the trampoline state + void* (*init_state)(void); + // Callback to register every trampoline being created + void (*write_state)(void* state, const void *code_addr, + unsigned int code_size, PyCodeObject* code); + // Callback to free the trampoline state + int (*free_state)(void* state); +} _PyPerf_Callbacks; + +extern int _PyPerfTrampoline_SetCallbacks(_PyPerf_Callbacks *); +extern void _PyPerfTrampoline_GetCallbacks(_PyPerf_Callbacks *); +extern int _PyPerfTrampoline_Init(int activate); +extern int _PyPerfTrampoline_Fini(void); +extern void _PyPerfTrampoline_FreeArenas(void); +extern int _PyIsPerfTrampolineActive(void); +extern PyStatus _PyPerfTrampoline_AfterFork_Child(void); +#ifdef PY_HAVE_PERF_TRAMPOLINE +extern _PyPerf_Callbacks _Py_perfmap_callbacks; +extern _PyPerf_Callbacks _Py_perfmap_jit_callbacks; +#endif + +static inline PyObject* +_PyEval_EvalFrame(PyThreadState *tstate, struct _PyInterpreterFrame *frame, int throwflag) +{ + EVAL_CALL_STAT_INC(EVAL_CALL_TOTAL); + if (tstate->interp->eval_frame == NULL) { + return _PyEval_EvalFrameDefault(tstate, frame, throwflag); + } + return tstate->interp->eval_frame(tstate, frame, throwflag); +} + +extern PyObject* +_PyEval_Vector(PyThreadState *tstate, + PyFunctionObject *func, PyObject *locals, + PyObject* const* args, size_t argcount, + PyObject *kwnames); + +extern int _PyEval_ThreadsInitialized(void); +extern void _PyEval_InitGIL(PyThreadState *tstate, int own_gil); +extern void _PyEval_FiniGIL(PyInterpreterState *interp); + +extern void _PyEval_AcquireLock(PyThreadState *tstate); + +extern void _PyEval_ReleaseLock(PyInterpreterState *, PyThreadState *, + int final_release); + +#ifdef Py_GIL_DISABLED +// Returns 0 or 1 if the GIL for the given thread's interpreter is disabled or +// enabled, respectively. +// +// The enabled state of the GIL will not change while one or more threads are +// attached. +static inline int +_PyEval_IsGILEnabled(PyThreadState *tstate) +{ + struct _gil_runtime_state *gil = tstate->interp->ceval.gil; + return _Py_atomic_load_int_relaxed(&gil->enabled) != 0; +} + +// Enable or disable the GIL used by the interpreter that owns tstate, which +// must be the current thread. This may affect other interpreters, if the GIL +// is shared. All three functions will be no-ops (and return 0) if the +// interpreter's `enable_gil' config is not _PyConfig_GIL_DEFAULT. +// +// Every call to _PyEval_EnableGILTransient() must be paired with exactly one +// call to either _PyEval_EnableGILPermanent() or +// _PyEval_DisableGIL(). _PyEval_EnableGILPermanent() and _PyEval_DisableGIL() +// must only be called while the GIL is enabled from a call to +// _PyEval_EnableGILTransient(). +// +// _PyEval_EnableGILTransient() returns 1 if it enabled the GIL, or 0 if the +// GIL was already enabled, whether transiently or permanently. The caller will +// hold the GIL upon return. +// +// _PyEval_EnableGILPermanent() returns 1 if it permanently enabled the GIL +// (which must already be enabled), or 0 if it was already permanently +// enabled. Once _PyEval_EnableGILPermanent() has been called once, all +// subsequent calls to any of the three functions will be no-ops. +// +// _PyEval_DisableGIL() returns 1 if it disabled the GIL, or 0 if the GIL was +// kept enabled because of another request, whether transient or permanent. +// +// All three functions must be called by an attached thread (this implies that +// if the GIL is enabled, the current thread must hold it). +extern int _PyEval_EnableGILTransient(PyThreadState *tstate); +extern int _PyEval_EnableGILPermanent(PyThreadState *tstate); +extern int _PyEval_DisableGIL(PyThreadState *state); +#endif + +extern void _PyEval_DeactivateOpCache(void); + + +/* --- _Py_EnterRecursiveCall() ----------------------------------------- */ + +#ifdef USE_STACKCHECK +/* With USE_STACKCHECK macro defined, trigger stack checks in + _Py_CheckRecursiveCall() on every 64th call to _Py_EnterRecursiveCall. */ +static inline int _Py_MakeRecCheck(PyThreadState *tstate) { + return (tstate->c_recursion_remaining-- < 0 + || (tstate->c_recursion_remaining & 63) == 0); +} +#else +static inline int _Py_MakeRecCheck(PyThreadState *tstate) { + return tstate->c_recursion_remaining-- < 0; +} +#endif + +// Export for '_json' shared extension, used via _Py_EnterRecursiveCall() +// static inline function. +PyAPI_FUNC(int) _Py_CheckRecursiveCall( + PyThreadState *tstate, + const char *where); + +int _Py_CheckRecursiveCallPy( + PyThreadState *tstate); + +static inline int _Py_EnterRecursiveCallTstate(PyThreadState *tstate, + const char *where) { + return (_Py_MakeRecCheck(tstate) && _Py_CheckRecursiveCall(tstate, where)); +} + +static inline void _Py_EnterRecursiveCallTstateUnchecked(PyThreadState *tstate) { + assert(tstate->c_recursion_remaining > 0); + tstate->c_recursion_remaining--; +} + +static inline int _Py_EnterRecursiveCall(const char *where) { + PyThreadState *tstate = _PyThreadState_GET(); + return _Py_EnterRecursiveCallTstate(tstate, where); +} + +static inline void _Py_LeaveRecursiveCallTstate(PyThreadState *tstate) { + tstate->c_recursion_remaining++; +} + +static inline void _Py_LeaveRecursiveCall(void) { + PyThreadState *tstate = _PyThreadState_GET(); + _Py_LeaveRecursiveCallTstate(tstate); +} + +extern struct _PyInterpreterFrame* _PyEval_GetFrame(void); + +PyAPI_FUNC(PyObject *)_Py_MakeCoro(PyFunctionObject *func); + +/* Handle signals, pending calls, GIL drop request + and asynchronous exception */ +PyAPI_FUNC(int) _Py_HandlePending(PyThreadState *tstate); + +extern PyObject * _PyEval_GetFrameLocals(void); + +typedef PyObject *(*conversion_func)(PyObject *); + +PyAPI_DATA(const binaryfunc) _PyEval_BinaryOps[]; +PyAPI_DATA(const conversion_func) _PyEval_ConversionFuncs[]; + +PyAPI_FUNC(int) _PyEval_CheckExceptStarTypeValid(PyThreadState *tstate, PyObject* right); +PyAPI_FUNC(int) _PyEval_CheckExceptTypeValid(PyThreadState *tstate, PyObject* right); +PyAPI_FUNC(int) _PyEval_ExceptionGroupMatch(PyObject* exc_value, PyObject *match_type, PyObject **match, PyObject **rest); +PyAPI_FUNC(void) _PyEval_FormatAwaitableError(PyThreadState *tstate, PyTypeObject *type, int oparg); +PyAPI_FUNC(void) _PyEval_FormatExcCheckArg(PyThreadState *tstate, PyObject *exc, const char *format_str, PyObject *obj); +PyAPI_FUNC(void) _PyEval_FormatExcUnbound(PyThreadState *tstate, PyCodeObject *co, int oparg); +PyAPI_FUNC(void) _PyEval_FormatKwargsError(PyThreadState *tstate, PyObject *func, PyObject *kwargs); +PyAPI_FUNC(PyObject *)_PyEval_MatchClass(PyThreadState *tstate, PyObject *subject, PyObject *type, Py_ssize_t nargs, PyObject *kwargs); +PyAPI_FUNC(PyObject *)_PyEval_MatchKeys(PyThreadState *tstate, PyObject *map, PyObject *keys); +PyAPI_FUNC(int) _PyEval_UnpackIterable(PyThreadState *tstate, PyObject *v, int argcnt, int argcntafter, PyObject **sp); +PyAPI_FUNC(void) _PyEval_MonitorRaise(PyThreadState *tstate, _PyInterpreterFrame *frame, _Py_CODEUNIT *instr); +PyAPI_FUNC(void) _PyEval_FrameClearAndPop(PyThreadState *tstate, _PyInterpreterFrame *frame); + + +/* Bits that can be set in PyThreadState.eval_breaker */ +#define _PY_GIL_DROP_REQUEST_BIT (1U << 0) +#define _PY_SIGNALS_PENDING_BIT (1U << 1) +#define _PY_CALLS_TO_DO_BIT (1U << 2) +#define _PY_ASYNC_EXCEPTION_BIT (1U << 3) +#define _PY_GC_SCHEDULED_BIT (1U << 4) +#define _PY_EVAL_PLEASE_STOP_BIT (1U << 5) +#define _PY_EVAL_EXPLICIT_MERGE_BIT (1U << 6) + +/* Reserve a few bits for future use */ +#define _PY_EVAL_EVENTS_BITS 8 +#define _PY_EVAL_EVENTS_MASK ((1 << _PY_EVAL_EVENTS_BITS)-1) + +static inline void +_Py_set_eval_breaker_bit(PyThreadState *tstate, uintptr_t bit) +{ + _Py_atomic_or_uintptr(&tstate->eval_breaker, bit); +} + +static inline void +_Py_unset_eval_breaker_bit(PyThreadState *tstate, uintptr_t bit) +{ + _Py_atomic_and_uintptr(&tstate->eval_breaker, ~bit); +} + +static inline int +_Py_eval_breaker_bit_is_set(PyThreadState *tstate, uintptr_t bit) +{ + uintptr_t b = _Py_atomic_load_uintptr_relaxed(&tstate->eval_breaker); + return (b & bit) != 0; +} + +// Free-threaded builds use these functions to set or unset a bit on all +// threads in the given interpreter. +void _Py_set_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); +void _Py_unset_eval_breaker_bit_all(PyInterpreterState *interp, uintptr_t bit); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CEVAL_H */ diff --git a/Include/internal/pycore_ceval_state.h b/Include/internal/pycore_ceval_state.h new file mode 100644 index 0000000000000000000000000000000000000000..009a1ea41eb9857d84fc2440db1376196f88c695 --- /dev/null +++ b/Include/internal/pycore_ceval_state.h @@ -0,0 +1,134 @@ +#ifndef Py_INTERNAL_CEVAL_STATE_H +#define Py_INTERNAL_CEVAL_STATE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_gil.h" // struct _gil_runtime_state + + +typedef int (*_Py_pending_call_func)(void *); + +struct _pending_call { + _Py_pending_call_func func; + void *arg; + int flags; +}; + +#define PENDINGCALLSARRAYSIZE 300 + +#define MAXPENDINGCALLS PENDINGCALLSARRAYSIZE +/* For interpreter-level pending calls, we want to avoid spending too + much time on pending calls in any one thread, so we apply a limit. */ +#if MAXPENDINGCALLS > 100 +# define MAXPENDINGCALLSLOOP 100 +#else +# define MAXPENDINGCALLSLOOP MAXPENDINGCALLS +#endif + +/* We keep the number small to preserve as much compatibility + as possible with earlier versions. */ +#define MAXPENDINGCALLS_MAIN 32 +/* For the main thread, we want to make sure all pending calls are + run at once, for the sake of prompt signal handling. This is + unlikely to cause any problems since there should be very few + pending calls for the main thread. */ +#define MAXPENDINGCALLSLOOP_MAIN 0 + +struct _pending_calls { + PyThreadState *handling_thread; + PyMutex mutex; + /* Request for running pending calls. */ + int32_t npending; + /* The maximum allowed number of pending calls. + If the queue fills up to this point then _PyEval_AddPendingCall() + will return _Py_ADD_PENDING_FULL. */ + int32_t max; + /* We don't want a flood of pending calls to interrupt any one thread + for too long, so we keep a limit on the number handled per pass. + A value of 0 means there is no limit (other than the maximum + size of the list of pending calls). */ + int32_t maxloop; + struct _pending_call calls[PENDINGCALLSARRAYSIZE]; + int first; + int next; +}; + + +typedef enum { + PERF_STATUS_FAILED = -1, // Perf trampoline is in an invalid state + PERF_STATUS_NO_INIT = 0, // Perf trampoline is not initialized + PERF_STATUS_OK = 1, // Perf trampoline is ready to be executed +} perf_status_t; + +#ifdef PY_HAVE_PERF_TRAMPOLINE +struct code_arena_st; + +struct trampoline_api_st { + void* (*init_state)(void); + void (*write_state)(void* state, const void *code_addr, + unsigned int code_size, PyCodeObject* code); + int (*free_state)(void* state); + void *state; + Py_ssize_t code_padding; +}; +#endif + + +struct _ceval_runtime_state { + struct { +#ifdef PY_HAVE_PERF_TRAMPOLINE + perf_status_t status; + int perf_trampoline_type; + Py_ssize_t extra_code_index; + struct code_arena_st *code_arena; + struct trampoline_api_st trampoline_api; + FILE *map_file; + Py_ssize_t persist_after_fork; +#else + int _not_used; +#endif + } perf; + /* Pending calls to be made only on the main thread. */ + // The signal machinery falls back on this + // so it must be especially stable and efficient. + // For example, we use a preallocated array + // for the list of pending calls. + struct _pending_calls pending_mainthread; + PyMutex sys_trace_profile_mutex; +}; + + +#ifdef PY_HAVE_PERF_TRAMPOLINE +# define _PyEval_RUNTIME_PERF_INIT \ + { \ + .status = PERF_STATUS_NO_INIT, \ + .extra_code_index = -1, \ + .persist_after_fork = 0, \ + } +#else +# define _PyEval_RUNTIME_PERF_INIT {0} +#endif + + +struct _ceval_state { + /* This variable holds the global instrumentation version. When a thread is + running, this value is overlaid onto PyThreadState.eval_breaker so that + changes in the instrumentation version will trigger the eval breaker. */ + uintptr_t instrumentation_version; + int recursion_limit; + struct _gil_runtime_state *gil; + int own_gil; + struct _pending_calls pending; +}; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CEVAL_STATE_H */ diff --git a/Include/internal/pycore_code.h b/Include/internal/pycore_code.h new file mode 100644 index 0000000000000000000000000000000000000000..1fb8cc473c6e98b942d1a083f9fd96cb40f98004 --- /dev/null +++ b/Include/internal/pycore_code.h @@ -0,0 +1,596 @@ +#ifndef Py_INTERNAL_CODE_H +#define Py_INTERNAL_CODE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_backoff.h" // _Py_BackoffCounter + + +/* Each instruction in a code object is a fixed-width value, + * currently 2 bytes: 1-byte opcode + 1-byte oparg. The EXTENDED_ARG + * opcode allows for larger values but the current limit is 3 uses + * of EXTENDED_ARG (see Python/compile.c), for a maximum + * 32-bit value. This aligns with the note in Python/compile.c + * (compiler_addop_i_line) indicating that the max oparg value is + * 2**32 - 1, rather than INT_MAX. + */ + +typedef union { + uint16_t cache; + struct { + uint8_t code; + uint8_t arg; + } op; + _Py_BackoffCounter counter; // First cache entry of specializable op +} _Py_CODEUNIT; + +#define _PyCode_CODE(CO) _Py_RVALUE((_Py_CODEUNIT *)(CO)->co_code_adaptive) +#define _PyCode_NBYTES(CO) (Py_SIZE(CO) * (Py_ssize_t)sizeof(_Py_CODEUNIT)) + + +/* These macros only remain defined for compatibility. */ +#define _Py_OPCODE(word) ((word).op.code) +#define _Py_OPARG(word) ((word).op.arg) + +static inline _Py_CODEUNIT +_py_make_codeunit(uint8_t opcode, uint8_t oparg) +{ + // No designated initialisers because of C++ compat + _Py_CODEUNIT word; + word.op.code = opcode; + word.op.arg = oparg; + return word; +} + +static inline void +_py_set_opcode(_Py_CODEUNIT *word, uint8_t opcode) +{ + word->op.code = opcode; +} + +#define _Py_MAKE_CODEUNIT(opcode, oparg) _py_make_codeunit((opcode), (oparg)) +#define _Py_SET_OPCODE(word, opcode) _py_set_opcode(&(word), (opcode)) + + +// We hide some of the newer PyCodeObject fields behind macros. +// This helps with backporting certain changes to 3.12. +#define _PyCode_HAS_EXECUTORS(CODE) \ + (CODE->co_executors != NULL) +#define _PyCode_HAS_INSTRUMENTATION(CODE) \ + (CODE->_co_instrumentation_version > 0) + +struct _py_code_state { + PyMutex mutex; + // Interned constants from code objects. Used by the free-threaded build. + struct _Py_hashtable_t *constants; +}; + +extern PyStatus _PyCode_Init(PyInterpreterState *interp); +extern void _PyCode_Fini(PyInterpreterState *interp); + +#define CODE_MAX_WATCHERS 8 + +/* PEP 659 + * Specialization and quickening structs and helper functions + */ + + +// Inline caches. If you change the number of cache entries for an instruction, +// you must *also* update the number of cache entries in Lib/opcode.py and bump +// the magic number in Lib/importlib/_bootstrap_external.py! + +#define CACHE_ENTRIES(cache) (sizeof(cache)/sizeof(_Py_CODEUNIT)) + +typedef struct { + _Py_BackoffCounter counter; + uint16_t module_keys_version; + uint16_t builtin_keys_version; + uint16_t index; +} _PyLoadGlobalCache; + +#define INLINE_CACHE_ENTRIES_LOAD_GLOBAL CACHE_ENTRIES(_PyLoadGlobalCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyBinaryOpCache; + +#define INLINE_CACHE_ENTRIES_BINARY_OP CACHE_ENTRIES(_PyBinaryOpCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyUnpackSequenceCache; + +#define INLINE_CACHE_ENTRIES_UNPACK_SEQUENCE \ + CACHE_ENTRIES(_PyUnpackSequenceCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyCompareOpCache; + +#define INLINE_CACHE_ENTRIES_COMPARE_OP CACHE_ENTRIES(_PyCompareOpCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyBinarySubscrCache; + +#define INLINE_CACHE_ENTRIES_BINARY_SUBSCR CACHE_ENTRIES(_PyBinarySubscrCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PySuperAttrCache; + +#define INLINE_CACHE_ENTRIES_LOAD_SUPER_ATTR CACHE_ENTRIES(_PySuperAttrCache) + +typedef struct { + _Py_BackoffCounter counter; + uint16_t version[2]; + uint16_t index; +} _PyAttrCache; + +typedef struct { + _Py_BackoffCounter counter; + uint16_t type_version[2]; + union { + uint16_t keys_version[2]; + uint16_t dict_offset; + }; + uint16_t descr[4]; +} _PyLoadMethodCache; + + +// MUST be the max(_PyAttrCache, _PyLoadMethodCache) +#define INLINE_CACHE_ENTRIES_LOAD_ATTR CACHE_ENTRIES(_PyLoadMethodCache) + +#define INLINE_CACHE_ENTRIES_STORE_ATTR CACHE_ENTRIES(_PyAttrCache) + +typedef struct { + _Py_BackoffCounter counter; + uint16_t func_version[2]; +} _PyCallCache; + +#define INLINE_CACHE_ENTRIES_CALL CACHE_ENTRIES(_PyCallCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyStoreSubscrCache; + +#define INLINE_CACHE_ENTRIES_STORE_SUBSCR CACHE_ENTRIES(_PyStoreSubscrCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyForIterCache; + +#define INLINE_CACHE_ENTRIES_FOR_ITER CACHE_ENTRIES(_PyForIterCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PySendCache; + +#define INLINE_CACHE_ENTRIES_SEND CACHE_ENTRIES(_PySendCache) + +typedef struct { + _Py_BackoffCounter counter; + uint16_t version[2]; +} _PyToBoolCache; + +#define INLINE_CACHE_ENTRIES_TO_BOOL CACHE_ENTRIES(_PyToBoolCache) + +typedef struct { + _Py_BackoffCounter counter; +} _PyContainsOpCache; + +#define INLINE_CACHE_ENTRIES_CONTAINS_OP CACHE_ENTRIES(_PyContainsOpCache) + +// Borrowed references to common callables: +struct callable_cache { + PyObject *isinstance; + PyObject *len; + PyObject *list_append; + PyObject *object__getattribute__; +}; + +/* "Locals plus" for a code object is the set of locals + cell vars + + * free vars. This relates to variable names as well as offsets into + * the "fast locals" storage array of execution frames. The compiler + * builds the list of names, their offsets, and the corresponding + * kind of local. + * + * Those kinds represent the source of the initial value and the + * variable's scope (as related to closures). A "local" is an + * argument or other variable defined in the current scope. A "free" + * variable is one that is defined in an outer scope and comes from + * the function's closure. A "cell" variable is a local that escapes + * into an inner function as part of a closure, and thus must be + * wrapped in a cell. Any "local" can also be a "cell", but the + * "free" kind is mutually exclusive with both. + */ + +// Note that these all fit within a byte, as do combinations. +// Later, we will use the smaller numbers to differentiate the different +// kinds of locals (e.g. pos-only arg, varkwargs, local-only). +#define CO_FAST_HIDDEN 0x10 +#define CO_FAST_LOCAL 0x20 +#define CO_FAST_CELL 0x40 +#define CO_FAST_FREE 0x80 + +typedef unsigned char _PyLocals_Kind; + +static inline _PyLocals_Kind +_PyLocals_GetKind(PyObject *kinds, int i) +{ + assert(PyBytes_Check(kinds)); + assert(0 <= i && i < PyBytes_GET_SIZE(kinds)); + char *ptr = PyBytes_AS_STRING(kinds); + return (_PyLocals_Kind)(ptr[i]); +} + +static inline void +_PyLocals_SetKind(PyObject *kinds, int i, _PyLocals_Kind kind) +{ + assert(PyBytes_Check(kinds)); + assert(0 <= i && i < PyBytes_GET_SIZE(kinds)); + char *ptr = PyBytes_AS_STRING(kinds); + ptr[i] = (char) kind; +} + + +struct _PyCodeConstructor { + /* metadata */ + PyObject *filename; + PyObject *name; + PyObject *qualname; + int flags; + + /* the code */ + PyObject *code; + int firstlineno; + PyObject *linetable; + + /* used by the code */ + PyObject *consts; + PyObject *names; + + /* mapping frame offsets to information */ + PyObject *localsplusnames; // Tuple of strings + PyObject *localspluskinds; // Bytes object, one byte per variable + + /* args (within varnames) */ + int argcount; + int posonlyargcount; + // XXX Replace argcount with posorkwargcount (argcount - posonlyargcount). + int kwonlyargcount; + + /* needed to create the frame */ + int stacksize; + + /* used by the eval loop */ + PyObject *exceptiontable; +}; + +// Using an "arguments struct" like this is helpful for maintainability +// in a case such as this with many parameters. It does bear a risk: +// if the struct changes and callers are not updated properly then the +// compiler will not catch problems (like a missing argument). This can +// cause hard-to-debug problems. The risk is mitigated by the use of +// check_code() in codeobject.c. However, we may decide to switch +// back to a regular function signature. Regardless, this approach +// wouldn't be appropriate if this weren't a strictly internal API. +// (See the comments in https://github.com/python/cpython/pull/26258.) +extern int _PyCode_Validate(struct _PyCodeConstructor *); +extern PyCodeObject* _PyCode_New(struct _PyCodeConstructor *); + + +/* Private API */ + +/* Getters for internal PyCodeObject data. */ +extern PyObject* _PyCode_GetVarnames(PyCodeObject *); +extern PyObject* _PyCode_GetCellvars(PyCodeObject *); +extern PyObject* _PyCode_GetFreevars(PyCodeObject *); +extern PyObject* _PyCode_GetCode(PyCodeObject *); + +/** API for initializing the line number tables. */ +extern int _PyCode_InitAddressRange(PyCodeObject* co, PyCodeAddressRange *bounds); + +/** Out of process API for initializing the location table. */ +extern void _PyLineTable_InitAddressRange( + const char *linetable, + Py_ssize_t length, + int firstlineno, + PyCodeAddressRange *range); + +/** API for traversing the line number table. */ +extern int _PyLineTable_NextAddressRange(PyCodeAddressRange *range); +extern int _PyLineTable_PreviousAddressRange(PyCodeAddressRange *range); + +/** API for executors */ +extern void _PyCode_Clear_Executors(PyCodeObject *code); + +#ifdef Py_GIL_DISABLED +// gh-115999 tracks progress on addressing this. +#define ENABLE_SPECIALIZATION 0 +#else +#define ENABLE_SPECIALIZATION 1 +#endif + +/* Specialization functions */ + +extern void _Py_Specialize_LoadSuperAttr(PyObject *global_super, PyObject *cls, + _Py_CODEUNIT *instr, int load_method); +extern void _Py_Specialize_LoadAttr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); +extern void _Py_Specialize_StoreAttr(PyObject *owner, _Py_CODEUNIT *instr, + PyObject *name); +extern void _Py_Specialize_LoadGlobal(PyObject *globals, PyObject *builtins, + _Py_CODEUNIT *instr, PyObject *name); +extern void _Py_Specialize_BinarySubscr(PyObject *sub, PyObject *container, + _Py_CODEUNIT *instr); +extern void _Py_Specialize_StoreSubscr(PyObject *container, PyObject *sub, + _Py_CODEUNIT *instr); +extern void _Py_Specialize_Call(PyObject *callable, _Py_CODEUNIT *instr, + int nargs); +extern void _Py_Specialize_BinaryOp(PyObject *lhs, PyObject *rhs, _Py_CODEUNIT *instr, + int oparg, PyObject **locals); +extern void _Py_Specialize_CompareOp(PyObject *lhs, PyObject *rhs, + _Py_CODEUNIT *instr, int oparg); +extern void _Py_Specialize_UnpackSequence(PyObject *seq, _Py_CODEUNIT *instr, + int oparg); +extern void _Py_Specialize_ForIter(PyObject *iter, _Py_CODEUNIT *instr, int oparg); +extern void _Py_Specialize_Send(PyObject *receiver, _Py_CODEUNIT *instr); +extern void _Py_Specialize_ToBool(PyObject *value, _Py_CODEUNIT *instr); +extern void _Py_Specialize_ContainsOp(PyObject *value, _Py_CODEUNIT *instr); + +#ifdef Py_STATS + +#include "pycore_bitutils.h" // _Py_bit_length + +#define STAT_INC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name++; } while (0) +#define STAT_DEC(opname, name) do { if (_Py_stats) _Py_stats->opcode_stats[opname].specialization.name--; } while (0) +#define OPCODE_EXE_INC(opname) do { if (_Py_stats) _Py_stats->opcode_stats[opname].execution_count++; } while (0) +#define CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.name++; } while (0) +#define OBJECT_STAT_INC(name) do { if (_Py_stats) _Py_stats->object_stats.name++; } while (0) +#define OBJECT_STAT_INC_COND(name, cond) \ + do { if (_Py_stats && cond) _Py_stats->object_stats.name++; } while (0) +#define EVAL_CALL_STAT_INC(name) do { if (_Py_stats) _Py_stats->call_stats.eval_calls[name]++; } while (0) +#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) \ + do { if (_Py_stats && PyFunction_Check(callable)) _Py_stats->call_stats.eval_calls[name]++; } while (0) +#define GC_STAT_ADD(gen, name, n) do { if (_Py_stats) _Py_stats->gc_stats[(gen)].name += (n); } while (0) +#define OPT_STAT_INC(name) do { if (_Py_stats) _Py_stats->optimization_stats.name++; } while (0) +#define UOP_STAT_INC(opname, name) do { if (_Py_stats) { assert(opname < 512); _Py_stats->optimization_stats.opcode[opname].name++; } } while (0) +#define UOP_PAIR_INC(uopcode, lastuop) \ + do { \ + if (lastuop && _Py_stats) { \ + _Py_stats->optimization_stats.opcode[lastuop].pair_count[uopcode]++; \ + } \ + lastuop = uopcode; \ + } while (0) +#define OPT_UNSUPPORTED_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.unsupported_opcode[opname]++; } while (0) +#define OPT_ERROR_IN_OPCODE(opname) do { if (_Py_stats) _Py_stats->optimization_stats.error_in_opcode[opname]++; } while (0) +#define OPT_HIST(length, name) \ + do { \ + if (_Py_stats) { \ + int bucket = _Py_bit_length(length >= 1 ? length - 1 : 0); \ + bucket = (bucket >= _Py_UOP_HIST_SIZE) ? _Py_UOP_HIST_SIZE - 1 : bucket; \ + _Py_stats->optimization_stats.name[bucket]++; \ + } \ + } while (0) +#define RARE_EVENT_STAT_INC(name) do { if (_Py_stats) _Py_stats->rare_event_stats.name++; } while (0) + +// Export for '_opcode' shared extension +PyAPI_FUNC(PyObject*) _Py_GetSpecializationStats(void); + +#else +#define STAT_INC(opname, name) ((void)0) +#define STAT_DEC(opname, name) ((void)0) +#define OPCODE_EXE_INC(opname) ((void)0) +#define CALL_STAT_INC(name) ((void)0) +#define OBJECT_STAT_INC(name) ((void)0) +#define OBJECT_STAT_INC_COND(name, cond) ((void)0) +#define EVAL_CALL_STAT_INC(name) ((void)0) +#define EVAL_CALL_STAT_INC_IF_FUNCTION(name, callable) ((void)0) +#define GC_STAT_ADD(gen, name, n) ((void)0) +#define OPT_STAT_INC(name) ((void)0) +#define UOP_STAT_INC(opname, name) ((void)0) +#define UOP_PAIR_INC(uopcode, lastuop) ((void)0) +#define OPT_UNSUPPORTED_OPCODE(opname) ((void)0) +#define OPT_ERROR_IN_OPCODE(opname) ((void)0) +#define OPT_HIST(length, name) ((void)0) +#define RARE_EVENT_STAT_INC(name) ((void)0) +#endif // !Py_STATS + +// Utility functions for reading/writing 32/64-bit values in the inline caches. +// Great care should be taken to ensure that these functions remain correct and +// performant! They should compile to just "move" instructions on all supported +// compilers and platforms. + +// We use memcpy to let the C compiler handle unaligned accesses and endianness +// issues for us. It also seems to produce better code than manual copying for +// most compilers (see https://blog.regehr.org/archives/959 for more info). + +static inline void +write_u32(uint16_t *p, uint32_t val) +{ + memcpy(p, &val, sizeof(val)); +} + +static inline void +write_u64(uint16_t *p, uint64_t val) +{ + memcpy(p, &val, sizeof(val)); +} + +static inline void +write_obj(uint16_t *p, PyObject *val) +{ + memcpy(p, &val, sizeof(val)); +} + +static inline uint16_t +read_u16(uint16_t *p) +{ + return *p; +} + +static inline uint32_t +read_u32(uint16_t *p) +{ + uint32_t val; + memcpy(&val, p, sizeof(val)); + return val; +} + +static inline uint64_t +read_u64(uint16_t *p) +{ + uint64_t val; + memcpy(&val, p, sizeof(val)); + return val; +} + +static inline PyObject * +read_obj(uint16_t *p) +{ + PyObject *val; + memcpy(&val, p, sizeof(val)); + return val; +} + +/* See Objects/exception_handling_notes.txt for details. + */ +static inline unsigned char * +parse_varint(unsigned char *p, int *result) { + int val = p[0] & 63; + while (p[0] & 64) { + p++; + val = (val << 6) | (p[0] & 63); + } + *result = val; + return p+1; +} + +static inline int +write_varint(uint8_t *ptr, unsigned int val) +{ + int written = 1; + while (val >= 64) { + *ptr++ = 64 | (val & 63); + val >>= 6; + written++; + } + *ptr = (uint8_t)val; + return written; +} + +static inline int +write_signed_varint(uint8_t *ptr, int val) +{ + unsigned int uval; + if (val < 0) { + // (unsigned int)(-val) has an undefined behavior for INT_MIN + uval = ((0 - (unsigned int)val) << 1) | 1; + } + else { + uval = (unsigned int)val << 1; + } + return write_varint(ptr, uval); +} + +static inline int +write_location_entry_start(uint8_t *ptr, int code, int length) +{ + assert((code & 15) == code); + *ptr = 128 | (uint8_t)(code << 3) | (uint8_t)(length - 1); + return 1; +} + + +/** Counters + * The first 16-bit value in each inline cache is a counter. + * + * When counting executions until the next specialization attempt, + * exponential backoff is used to reduce the number of specialization failures. + * See pycore_backoff.h for more details. + * On a specialization failure, the backoff counter is restarted. + */ + +#include "pycore_backoff.h" + +// A value of 1 means that we attempt to specialize the *second* time each +// instruction is executed. Executing twice is a much better indicator of +// "hotness" than executing once, but additional warmup delays only prevent +// specialization. Most types stabilize by the second execution, too: +#define ADAPTIVE_WARMUP_VALUE 1 +#define ADAPTIVE_WARMUP_BACKOFF 1 + +// A value of 52 means that we attempt to re-specialize after 53 misses (a prime +// number, useful for avoiding artifacts if every nth value is a different type +// or something). Setting the backoff to 0 means that the counter is reset to +// the same state as a warming-up instruction (value == 1, backoff == 1) after +// deoptimization. This isn't strictly necessary, but it is bit easier to reason +// about when thinking about the opcode transitions as a state machine: +#define ADAPTIVE_COOLDOWN_VALUE 52 +#define ADAPTIVE_COOLDOWN_BACKOFF 0 + +// Can't assert this in pycore_backoff.h because of header order dependencies +#if COLD_EXIT_INITIAL_VALUE <= ADAPTIVE_COOLDOWN_VALUE +# error "Cold exit value should be larger than adaptive cooldown value" +#endif + +static inline _Py_BackoffCounter +adaptive_counter_bits(uint16_t value, uint16_t backoff) { + return make_backoff_counter(value, backoff); +} + +static inline _Py_BackoffCounter +adaptive_counter_warmup(void) { + return adaptive_counter_bits(ADAPTIVE_WARMUP_VALUE, + ADAPTIVE_WARMUP_BACKOFF); +} + +static inline _Py_BackoffCounter +adaptive_counter_cooldown(void) { + return adaptive_counter_bits(ADAPTIVE_COOLDOWN_VALUE, + ADAPTIVE_COOLDOWN_BACKOFF); +} + +static inline _Py_BackoffCounter +adaptive_counter_backoff(_Py_BackoffCounter counter) { + return restart_backoff_counter(counter); +} + + +/* Comparison bit masks. */ + +/* Note this evaluates its arguments twice each */ +#define COMPARISON_BIT(x, y) (1 << (2 * ((x) >= (y)) + ((x) <= (y)))) + +/* + * The following bits are chosen so that the value of + * COMPARSION_BIT(left, right) + * masked by the values below will be non-zero if the + * comparison is true, and zero if it is false */ + +/* This is for values that are unordered, ie. NaN, not types that are unordered, e.g. sets */ +#define COMPARISON_UNORDERED 1 + +#define COMPARISON_LESS_THAN 2 +#define COMPARISON_GREATER_THAN 4 +#define COMPARISON_EQUALS 8 + +#define COMPARISON_NOT_EQUALS (COMPARISON_UNORDERED | COMPARISON_LESS_THAN | COMPARISON_GREATER_THAN) + +extern int _Py_Instrument(PyCodeObject *co, PyInterpreterState *interp); + +extern int _Py_GetBaseOpcode(PyCodeObject *code, int offset); + +extern int _PyInstruction_GetLength(PyCodeObject *code, int offset); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CODE_H */ diff --git a/Include/internal/pycore_codecs.h b/Include/internal/pycore_codecs.h new file mode 100644 index 0000000000000000000000000000000000000000..5e2d5c5ce9d868a2b87824e458028e2b65ad922b --- /dev/null +++ b/Include/internal/pycore_codecs.h @@ -0,0 +1,86 @@ +#ifndef Py_INTERNAL_CODECS_H +#define Py_INTERNAL_CODECS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex + +/* Initialize codecs-related state for the given interpreter, including + registering the first codec search function. Must be called before any other + PyCodec-related functions, and while only one thread is active. */ +extern PyStatus _PyCodec_InitRegistry(PyInterpreterState *interp); + +/* Finalize codecs-related state for the given interpreter. No PyCodec-related + functions other than PyCodec_Unregister() may be called after this. */ +extern void _PyCodec_Fini(PyInterpreterState *interp); + +extern PyObject* _PyCodec_Lookup(const char *encoding); + +/* Text codec specific encoding and decoding API. + + Checks the encoding against a list of codecs which do not + implement a str<->bytes encoding before attempting the + operation. + + Please note that these APIs are internal and should not + be used in Python C extensions. + + XXX (ncoghlan): should we make these, or something like them, public + in Python 3.5+? + + */ +extern PyObject* _PyCodec_LookupTextEncoding( + const char *encoding, + const char *alternate_command); + +extern PyObject* _PyCodec_EncodeText( + PyObject *object, + const char *encoding, + const char *errors); + +extern PyObject* _PyCodec_DecodeText( + PyObject *object, + const char *encoding, + const char *errors); + +/* These two aren't actually text encoding specific, but _io.TextIOWrapper + * is the only current API consumer. + */ +extern PyObject* _PyCodecInfo_GetIncrementalDecoder( + PyObject *codec_info, + const char *errors); + +extern PyObject* _PyCodecInfo_GetIncrementalEncoder( + PyObject *codec_info, + const char *errors); + +// Per-interpreter state used by codecs.c. +struct codecs_state { + // A list of callable objects used to search for codecs. + PyObject *search_path; + + // A dict mapping codec names to codecs returned from a callable in + // search_path. + PyObject *search_cache; + + // A dict mapping error handling strategies to functions to implement them. + PyObject *error_registry; + +#ifdef Py_GIL_DISABLED + // Used to safely delete a specific item from search_path. + PyMutex search_path_mutex; +#endif + + // Whether or not the rest of the state is initialized. + int initialized; +}; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CODECS_H */ diff --git a/Include/internal/pycore_compile.h b/Include/internal/pycore_compile.h new file mode 100644 index 0000000000000000000000000000000000000000..3c21f83a18b52acb264ca6fa09ae602634654244 --- /dev/null +++ b/Include/internal/pycore_compile.h @@ -0,0 +1,118 @@ +#ifndef Py_INTERNAL_COMPILE_H +#define Py_INTERNAL_COMPILE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_symtable.h" // _Py_SourceLocation +#include "pycore_instruction_sequence.h" + +struct _arena; // Type defined in pycore_pyarena.h +struct _mod; // Type defined in pycore_ast.h + +// Export for 'test_peg_generator' shared extension +PyAPI_FUNC(PyCodeObject*) _PyAST_Compile( + struct _mod *mod, + PyObject *filename, + PyCompilerFlags *flags, + int optimize, + struct _arena *arena); + +/* AST optimizations */ +extern int _PyCompile_AstOptimize( + struct _mod *mod, + PyObject *filename, + PyCompilerFlags *flags, + int optimize, + struct _arena *arena); + +struct _Py_SourceLocation; + +extern int _PyAST_Optimize( + struct _mod *, + struct _arena *arena, + int optimize, + int ff_features); + + +typedef struct { + PyObject *u_name; + PyObject *u_qualname; /* dot-separated qualified name (lazy) */ + + /* The following fields are dicts that map objects to + the index of them in co_XXX. The index is used as + the argument for opcodes that refer to those collections. + */ + PyObject *u_consts; /* all constants */ + PyObject *u_names; /* all names */ + PyObject *u_varnames; /* local variables */ + PyObject *u_cellvars; /* cell variables */ + PyObject *u_freevars; /* free variables */ + PyObject *u_fasthidden; /* dict; keys are names that are fast-locals only + temporarily within an inlined comprehension. When + value is True, treat as fast-local. */ + + Py_ssize_t u_argcount; /* number of arguments for block */ + Py_ssize_t u_posonlyargcount; /* number of positional only arguments for block */ + Py_ssize_t u_kwonlyargcount; /* number of keyword only arguments for block */ + + int u_firstlineno; /* the first lineno of the block */ +} _PyCompile_CodeUnitMetadata; + + +/* Utility for a number of growing arrays used in the compiler */ +int _PyCompile_EnsureArrayLargeEnough( + int idx, + void **array, + int *alloc, + int default_alloc, + size_t item_size); + +int _PyCompile_ConstCacheMergeOne(PyObject *const_cache, PyObject **obj); + + +// Export for '_opcode' extension module +PyAPI_FUNC(int) _PyCompile_OpcodeIsValid(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasArg(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasConst(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasName(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasJump(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasFree(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasLocal(int opcode); +PyAPI_FUNC(int) _PyCompile_OpcodeHasExc(int opcode); + +PyAPI_FUNC(PyObject*) _PyCompile_GetUnaryIntrinsicName(int index); +PyAPI_FUNC(PyObject*) _PyCompile_GetBinaryIntrinsicName(int index); + +/* Access compiler internals for unit testing */ + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyCompile_CleanDoc(PyObject *doc); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyCompile_CodeGen( + PyObject *ast, + PyObject *filename, + PyCompilerFlags *flags, + int optimize, + int compile_mode); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyCompile_OptimizeCfg( + PyObject *instructions, + PyObject *consts, + int nlocals); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyCodeObject*) +_PyCompile_Assemble(_PyCompile_CodeUnitMetadata *umd, PyObject *filename, + PyObject *instructions); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_COMPILE_H */ diff --git a/Include/internal/pycore_complexobject.h b/Include/internal/pycore_complexobject.h new file mode 100644 index 0000000000000000000000000000000000000000..54713536eedc462ce82400fd0fa0767dd9c0751d --- /dev/null +++ b/Include/internal/pycore_complexobject.h @@ -0,0 +1,25 @@ +#ifndef Py_INTERNAL_COMPLEXOBJECT_H +#define Py_INTERNAL_COMPLEXOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_unicodeobject.h" // _PyUnicodeWriter + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +extern int _PyComplex_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_COMPLEXOBJECT_H diff --git a/Include/internal/pycore_condvar.h b/Include/internal/pycore_condvar.h new file mode 100644 index 0000000000000000000000000000000000000000..55271f0a4116ba03fe5314751adb2cb4e0067a6c --- /dev/null +++ b/Include/internal/pycore_condvar.h @@ -0,0 +1,93 @@ +#ifndef Py_INTERNAL_CONDVAR_H +#define Py_INTERNAL_CONDVAR_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_pythread.h" // _POSIX_THREADS + + +#ifdef _POSIX_THREADS +/* + * POSIX support + */ +#define Py_HAVE_CONDVAR + +#ifdef HAVE_PTHREAD_H +# include // pthread_mutex_t +#endif + +#define PyMUTEX_T pthread_mutex_t +#define PyCOND_T pthread_cond_t + +#elif defined(NT_THREADS) +/* + * Windows (XP, 2003 server and later, as well as (hopefully) CE) support + * + * Emulated condition variables ones that work with XP and later, plus + * example native support on VISTA and onwards. + */ +#define Py_HAVE_CONDVAR + +/* include windows if it hasn't been done before */ +#ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +#endif +#include // CRITICAL_SECTION + +/* options */ +/* emulated condition variables are provided for those that want + * to target Windows XP or earlier. Modify this macro to enable them. + */ +#ifndef _PY_EMULATED_WIN_CV +#define _PY_EMULATED_WIN_CV 0 /* use non-emulated condition variables */ +#endif + +/* fall back to emulation if targeting earlier than Vista */ +#if !defined NTDDI_VISTA || NTDDI_VERSION < NTDDI_VISTA +#undef _PY_EMULATED_WIN_CV +#define _PY_EMULATED_WIN_CV 1 +#endif + +#if _PY_EMULATED_WIN_CV + +typedef CRITICAL_SECTION PyMUTEX_T; + +/* The ConditionVariable object. From XP onwards it is easily emulated + with a Semaphore. + Semaphores are available on Windows XP (2003 server) and later. + We use a Semaphore rather than an auto-reset event, because although + an auto-reset event might appear to solve the lost-wakeup bug (race + condition between releasing the outer lock and waiting) because it + maintains state even though a wait hasn't happened, there is still + a lost wakeup problem if more than one thread are interrupted in the + critical place. A semaphore solves that, because its state is + counted, not Boolean. + Because it is ok to signal a condition variable with no one + waiting, we need to keep track of the number of + waiting threads. Otherwise, the semaphore's state could rise + without bound. This also helps reduce the number of "spurious wakeups" + that would otherwise happen. + */ + +typedef struct _PyCOND_T +{ + HANDLE sem; + int waiting; /* to allow PyCOND_SIGNAL to be a no-op */ +} PyCOND_T; + +#else /* !_PY_EMULATED_WIN_CV */ + +/* Use native Windows primitives if build target is Vista or higher */ + +/* SRWLOCK is faster and better than CriticalSection */ +typedef SRWLOCK PyMUTEX_T; + +typedef CONDITION_VARIABLE PyCOND_T; + +#endif /* _PY_EMULATED_WIN_CV */ + +#endif /* _POSIX_THREADS, NT_THREADS */ + +#endif /* Py_INTERNAL_CONDVAR_H */ diff --git a/Include/internal/pycore_context.h b/Include/internal/pycore_context.h new file mode 100644 index 0000000000000000000000000000000000000000..10c1f1e52be04000b8f49fb782c7e817465b3507 --- /dev/null +++ b/Include/internal/pycore_context.h @@ -0,0 +1,61 @@ +#ifndef Py_INTERNAL_CONTEXT_H +#define Py_INTERNAL_CONTEXT_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyFreeListState +#include "pycore_hamt.h" // PyHamtObject + + +extern PyTypeObject _PyContextTokenMissing_Type; + +/* runtime lifecycle */ + +PyStatus _PyContext_Init(PyInterpreterState *); + + +/* other API */ + +typedef struct { + PyObject_HEAD +} _PyContextTokenMissing; + +struct _pycontextobject { + PyObject_HEAD + PyContext *ctx_prev; + PyHamtObject *ctx_vars; + PyObject *ctx_weakreflist; + int ctx_entered; +}; + + +struct _pycontextvarobject { + PyObject_HEAD + PyObject *var_name; + PyObject *var_default; +#ifndef Py_GIL_DISABLED + PyObject *var_cached; + uint64_t var_cached_tsid; + uint64_t var_cached_tsver; +#endif + Py_hash_t var_hash; +}; + + +struct _pycontexttokenobject { + PyObject_HEAD + PyContext *tok_ctx; + PyContextVar *tok_var; + PyObject *tok_oldval; + int tok_used; +}; + + +// _testinternalcapi.hamt() used by tests. +// Export for '_testcapi' shared extension +PyAPI_FUNC(PyObject*) _PyContext_NewHamtForTests(void); + + +#endif /* !Py_INTERNAL_CONTEXT_H */ diff --git a/Include/internal/pycore_critical_section.h b/Include/internal/pycore_critical_section.h new file mode 100644 index 0000000000000000000000000000000000000000..78cd0d549726608fed94f1a3b2022944d263345f --- /dev/null +++ b/Include/internal/pycore_critical_section.h @@ -0,0 +1,233 @@ +#ifndef Py_INTERNAL_CRITICAL_SECTION_H +#define Py_INTERNAL_CRITICAL_SECTION_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_pystate.h" // _PyThreadState_GET() +#include + +#ifdef __cplusplus +extern "C" { +#endif + +// Tagged pointers to critical sections use the two least significant bits to +// mark if the pointed-to critical section is inactive and whether it is a +// PyCriticalSection2 object. +#define _Py_CRITICAL_SECTION_INACTIVE 0x1 +#define _Py_CRITICAL_SECTION_TWO_MUTEXES 0x2 +#define _Py_CRITICAL_SECTION_MASK 0x3 + +#ifdef Py_GIL_DISABLED +# define Py_BEGIN_CRITICAL_SECTION_MUT(mutex) \ + { \ + PyCriticalSection _py_cs; \ + _PyCriticalSection_BeginMutex(&_py_cs, mutex) + +# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) \ + { \ + PyCriticalSection2 _py_cs2; \ + _PyCriticalSection2_BeginMutex(&_py_cs2, m1, m2) + +// Specialized version of critical section locking to safely use +// PySequence_Fast APIs without the GIL. For performance, the argument *to* +// PySequence_Fast() is provided to the macro, not the *result* of +// PySequence_Fast(), which would require an extra test to determine if the +// lock must be acquired. +# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) \ + { \ + PyObject *_orig_seq = _PyObject_CAST(original); \ + const bool _should_lock_cs = PyList_CheckExact(_orig_seq); \ + PyCriticalSection _cs; \ + if (_should_lock_cs) { \ + _PyCriticalSection_Begin(&_cs, _orig_seq); \ + } + +# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() \ + if (_should_lock_cs) { \ + PyCriticalSection_End(&_cs); \ + } \ + } + +// Asserts that the mutex is locked. The mutex must be held by the +// top-most critical section otherwise there's the possibility +// that the mutex would be swalled out in some code paths. +#define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) \ + _PyCriticalSection_AssertHeld(mutex) + +// Asserts that the mutex for the given object is locked. The mutex must +// be held by the top-most critical section otherwise there's the +// possibility that the mutex would be swalled out in some code paths. +#ifdef Py_DEBUG + +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) \ + if (Py_REFCNT(op) != 1) { \ + _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(&_PyObject_CAST(op)->ob_mutex); \ + } + +#else /* Py_DEBUG */ + +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) + +#endif /* Py_DEBUG */ + +#else /* !Py_GIL_DISABLED */ +// The critical section APIs are no-ops with the GIL. +# define Py_BEGIN_CRITICAL_SECTION_MUT(mut) { +# define Py_BEGIN_CRITICAL_SECTION2_MUT(m1, m2) { +# define Py_BEGIN_CRITICAL_SECTION_SEQUENCE_FAST(original) { +# define Py_END_CRITICAL_SECTION_SEQUENCE_FAST() } +# define _Py_CRITICAL_SECTION_ASSERT_MUTEX_LOCKED(mutex) +# define _Py_CRITICAL_SECTION_ASSERT_OBJECT_LOCKED(op) +#endif /* !Py_GIL_DISABLED */ + +// Resumes the top-most critical section. +PyAPI_FUNC(void) +_PyCriticalSection_Resume(PyThreadState *tstate); + +// (private) slow path for locking the mutex +PyAPI_FUNC(void) +_PyCriticalSection_BeginSlow(PyCriticalSection *c, PyMutex *m); + +PyAPI_FUNC(void) +_PyCriticalSection2_BeginSlow(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2, + int is_m1_locked); + +PyAPI_FUNC(void) +_PyCriticalSection_SuspendAll(PyThreadState *tstate); + +#ifdef Py_GIL_DISABLED + +static inline int +_PyCriticalSection_IsActive(uintptr_t tag) +{ + return tag != 0 && (tag & _Py_CRITICAL_SECTION_INACTIVE) == 0; +} + +static inline void +_PyCriticalSection_BeginMutex(PyCriticalSection *c, PyMutex *m) +{ + if (PyMutex_LockFast(&m->_bits)) { + PyThreadState *tstate = _PyThreadState_GET(); + c->_cs_mutex = m; + c->_cs_prev = tstate->critical_section; + tstate->critical_section = (uintptr_t)c; + } + else { + _PyCriticalSection_BeginSlow(c, m); + } +} + +static inline void +_PyCriticalSection_Begin(PyCriticalSection *c, PyObject *op) +{ + _PyCriticalSection_BeginMutex(c, &op->ob_mutex); +} +#define PyCriticalSection_Begin _PyCriticalSection_Begin + +// Removes the top-most critical section from the thread's stack of critical +// sections. If the new top-most critical section is inactive, then it is +// resumed. +static inline void +_PyCriticalSection_Pop(PyCriticalSection *c) +{ + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t prev = c->_cs_prev; + tstate->critical_section = prev; + + if ((prev & _Py_CRITICAL_SECTION_INACTIVE) != 0) { + _PyCriticalSection_Resume(tstate); + } +} + +static inline void +_PyCriticalSection_End(PyCriticalSection *c) +{ + PyMutex_Unlock(c->_cs_mutex); + _PyCriticalSection_Pop(c); +} +#define PyCriticalSection_End _PyCriticalSection_End + +static inline void +_PyCriticalSection2_BeginMutex(PyCriticalSection2 *c, PyMutex *m1, PyMutex *m2) +{ + if (m1 == m2) { + // If the two mutex arguments are the same, treat this as a critical + // section with a single mutex. + c->_cs_mutex2 = NULL; + _PyCriticalSection_BeginMutex(&c->_cs_base, m1); + return; + } + + if ((uintptr_t)m2 < (uintptr_t)m1) { + // Sort the mutexes so that the lower address is locked first. + // The exact order does not matter, but we need to acquire the mutexes + // in a consistent order to avoid lock ordering deadlocks. + PyMutex *tmp = m1; + m1 = m2; + m2 = tmp; + } + + if (PyMutex_LockFast(&m1->_bits)) { + if (PyMutex_LockFast(&m2->_bits)) { + PyThreadState *tstate = _PyThreadState_GET(); + c->_cs_base._cs_mutex = m1; + c->_cs_mutex2 = m2; + c->_cs_base._cs_prev = tstate->critical_section; + + uintptr_t p = (uintptr_t)c | _Py_CRITICAL_SECTION_TWO_MUTEXES; + tstate->critical_section = p; + } + else { + _PyCriticalSection2_BeginSlow(c, m1, m2, 1); + } + } + else { + _PyCriticalSection2_BeginSlow(c, m1, m2, 0); + } +} + +static inline void +_PyCriticalSection2_Begin(PyCriticalSection2 *c, PyObject *a, PyObject *b) +{ + _PyCriticalSection2_BeginMutex(c, &a->ob_mutex, &b->ob_mutex); +} +#define PyCriticalSection2_Begin _PyCriticalSection2_Begin + +static inline void +_PyCriticalSection2_End(PyCriticalSection2 *c) +{ + if (c->_cs_mutex2) { + PyMutex_Unlock(c->_cs_mutex2); + } + PyMutex_Unlock(c->_cs_base._cs_mutex); + _PyCriticalSection_Pop(&c->_cs_base); +} +#define PyCriticalSection2_End _PyCriticalSection2_End + +static inline void +_PyCriticalSection_AssertHeld(PyMutex *mutex) +{ +#ifdef Py_DEBUG + PyThreadState *tstate = _PyThreadState_GET(); + uintptr_t prev = tstate->critical_section; + if (prev & _Py_CRITICAL_SECTION_TWO_MUTEXES) { + PyCriticalSection2 *cs = (PyCriticalSection2 *)(prev & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && (cs->_cs_base._cs_mutex == mutex || cs->_cs_mutex2 == mutex)); + } + else { + PyCriticalSection *cs = (PyCriticalSection *)(tstate->critical_section & ~_Py_CRITICAL_SECTION_MASK); + assert(cs != NULL && cs->_cs_mutex == mutex); + } + +#endif +} + +#endif /* Py_GIL_DISABLED */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CRITICAL_SECTION_H */ diff --git a/Include/internal/pycore_crossinterp.h b/Include/internal/pycore_crossinterp.h new file mode 100644 index 0000000000000000000000000000000000000000..2dd165eae74850f9b5d2d3f7796978476c27e51f --- /dev/null +++ b/Include/internal/pycore_crossinterp.h @@ -0,0 +1,340 @@ +#ifndef Py_INTERNAL_CROSSINTERP_H +#define Py_INTERNAL_CROSSINTERP_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_pyerrors.h" + +/**************/ +/* exceptions */ +/**************/ + +PyAPI_DATA(PyObject *) PyExc_InterpreterError; +PyAPI_DATA(PyObject *) PyExc_InterpreterNotFoundError; + + +/***************************/ +/* cross-interpreter calls */ +/***************************/ + +typedef int (*_Py_simple_func)(void *); +extern int _Py_CallInInterpreter( + PyInterpreterState *interp, + _Py_simple_func func, + void *arg); +extern int _Py_CallInInterpreterAndRawFree( + PyInterpreterState *interp, + _Py_simple_func func, + void *arg); + + +/**************************/ +/* cross-interpreter data */ +/**************************/ + +typedef struct _xid _PyCrossInterpreterData; +typedef PyObject *(*xid_newobjectfunc)(_PyCrossInterpreterData *); +typedef void (*xid_freefunc)(void *); + +// _PyCrossInterpreterData is similar to Py_buffer as an effectively +// opaque struct that holds data outside the object machinery. This +// is necessary to pass safely between interpreters in the same process. +struct _xid { + // data is the cross-interpreter-safe derivation of a Python object + // (see _PyObject_GetCrossInterpreterData). It will be NULL if the + // new_object func (below) encodes the data. + void *data; + // obj is the Python object from which the data was derived. This + // is non-NULL only if the data remains bound to the object in some + // way, such that the object must be "released" (via a decref) when + // the data is released. In that case the code that sets the field, + // likely a registered "crossinterpdatafunc", is responsible for + // ensuring it owns the reference (i.e. incref). + PyObject *obj; + // interp is the ID of the owning interpreter of the original + // object. It corresponds to the active interpreter when + // _PyObject_GetCrossInterpreterData() was called. This should only + // be set by the cross-interpreter machinery. + // + // We use the ID rather than the PyInterpreterState to avoid issues + // with deleted interpreters. Note that IDs are never re-used, so + // each one will always correspond to a specific interpreter + // (whether still alive or not). + int64_t interpid; + // new_object is a function that returns a new object in the current + // interpreter given the data. The resulting object (a new + // reference) will be equivalent to the original object. This field + // is required. + xid_newobjectfunc new_object; + // free is called when the data is released. If it is NULL then + // nothing will be done to free the data. For some types this is + // okay (e.g. bytes) and for those types this field should be set + // to NULL. However, for most the data was allocated just for + // cross-interpreter use, so it must be freed when + // _PyCrossInterpreterData_Release is called or the memory will + // leak. In that case, at the very least this field should be set + // to PyMem_RawFree (the default if not explicitly set to NULL). + // The call will happen with the original interpreter activated. + xid_freefunc free; +}; + +PyAPI_FUNC(_PyCrossInterpreterData *) _PyCrossInterpreterData_New(void); +PyAPI_FUNC(void) _PyCrossInterpreterData_Free(_PyCrossInterpreterData *data); + +#define _PyCrossInterpreterData_DATA(DATA) ((DATA)->data) +#define _PyCrossInterpreterData_OBJ(DATA) ((DATA)->obj) +#define _PyCrossInterpreterData_INTERPID(DATA) ((DATA)->interpid) +// Users should not need getters for "new_object" or "free". + + +/* defining cross-interpreter data */ + +PyAPI_FUNC(void) _PyCrossInterpreterData_Init( + _PyCrossInterpreterData *data, + PyInterpreterState *interp, void *shared, PyObject *obj, + xid_newobjectfunc new_object); +PyAPI_FUNC(int) _PyCrossInterpreterData_InitWithSize( + _PyCrossInterpreterData *, + PyInterpreterState *interp, const size_t, PyObject *, + xid_newobjectfunc); +PyAPI_FUNC(void) _PyCrossInterpreterData_Clear( + PyInterpreterState *, _PyCrossInterpreterData *); + +// Normally the Init* functions are sufficient. The only time +// additional initialization might be needed is to set the "free" func, +// though that should be infrequent. +#define _PyCrossInterpreterData_SET_FREE(DATA, FUNC) \ + do { \ + (DATA)->free = (FUNC); \ + } while (0) +// Additionally, some shareable types are essentially light wrappers +// around other shareable types. The crossinterpdatafunc of the wrapper +// can often be implemented by calling the wrapped object's +// crossinterpdatafunc and then changing the "new_object" function. +// We have _PyCrossInterpreterData_SET_NEW_OBJECT() here for that, +// but might be better to have a function like +// _PyCrossInterpreterData_AdaptToWrapper() instead. +#define _PyCrossInterpreterData_SET_NEW_OBJECT(DATA, FUNC) \ + do { \ + (DATA)->new_object = (FUNC); \ + } while (0) + + +/* using cross-interpreter data */ + +PyAPI_FUNC(int) _PyObject_CheckCrossInterpreterData(PyObject *); +PyAPI_FUNC(int) _PyObject_GetCrossInterpreterData(PyObject *, _PyCrossInterpreterData *); +PyAPI_FUNC(PyObject *) _PyCrossInterpreterData_NewObject(_PyCrossInterpreterData *); +PyAPI_FUNC(int) _PyCrossInterpreterData_Release(_PyCrossInterpreterData *); +PyAPI_FUNC(int) _PyCrossInterpreterData_ReleaseAndRawFree(_PyCrossInterpreterData *); + + +/* cross-interpreter data registry */ + +// For now we use a global registry of shareable classes. An +// alternative would be to add a tp_* slot for a class's +// crossinterpdatafunc. It would be simpler and more efficient. + +typedef int (*crossinterpdatafunc)(PyThreadState *tstate, PyObject *, + _PyCrossInterpreterData *); + +struct _xidregitem; + +struct _xidregitem { + struct _xidregitem *prev; + struct _xidregitem *next; + /* This can be a dangling pointer, but only if weakref is set. */ + PyTypeObject *cls; + /* This is NULL for builtin types. */ + PyObject *weakref; + size_t refcount; + crossinterpdatafunc getdata; +}; + +struct _xidregistry { + int global; /* builtin types or heap types */ + int initialized; + PyMutex mutex; + struct _xidregitem *head; +}; + +PyAPI_FUNC(int) _PyCrossInterpreterData_RegisterClass(PyTypeObject *, crossinterpdatafunc); +PyAPI_FUNC(int) _PyCrossInterpreterData_UnregisterClass(PyTypeObject *); +PyAPI_FUNC(crossinterpdatafunc) _PyCrossInterpreterData_Lookup(PyObject *); + + +/*****************************/ +/* runtime state & lifecycle */ +/*****************************/ + +struct _xi_runtime_state { + // builtin types + // XXX Remove this field once we have a tp_* slot. + struct _xidregistry registry; +}; + +struct _xi_state { + // heap types + // XXX Remove this field once we have a tp_* slot. + struct _xidregistry registry; + + // heap types + PyObject *PyExc_NotShareableError; +}; + +extern PyStatus _PyXI_Init(PyInterpreterState *interp); +extern void _PyXI_Fini(PyInterpreterState *interp); + +extern PyStatus _PyXI_InitTypes(PyInterpreterState *interp); +extern void _PyXI_FiniTypes(PyInterpreterState *interp); + +#define _PyInterpreterState_GetXIState(interp) (&(interp)->xi) + + +/***************************/ +/* short-term data sharing */ +/***************************/ + +// Ultimately we'd like to preserve enough information about the +// exception and traceback that we could re-constitute (or at least +// simulate, a la traceback.TracebackException), and even chain, a copy +// of the exception in the calling interpreter. + +typedef struct _excinfo { + struct _excinfo_type { + PyTypeObject *builtin; + const char *name; + const char *qualname; + const char *module; + } type; + const char *msg; + const char *errdisplay; +} _PyXI_excinfo; + +PyAPI_FUNC(int) _PyXI_InitExcInfo(_PyXI_excinfo *info, PyObject *exc); +PyAPI_FUNC(PyObject *) _PyXI_FormatExcInfo(_PyXI_excinfo *info); +PyAPI_FUNC(PyObject *) _PyXI_ExcInfoAsObject(_PyXI_excinfo *info); +PyAPI_FUNC(void) _PyXI_ClearExcInfo(_PyXI_excinfo *info); + + +typedef enum error_code { + _PyXI_ERR_NO_ERROR = 0, + _PyXI_ERR_UNCAUGHT_EXCEPTION = -1, + _PyXI_ERR_OTHER = -2, + _PyXI_ERR_NO_MEMORY = -3, + _PyXI_ERR_ALREADY_RUNNING = -4, + _PyXI_ERR_MAIN_NS_FAILURE = -5, + _PyXI_ERR_APPLY_NS_FAILURE = -6, + _PyXI_ERR_NOT_SHAREABLE = -7, +} _PyXI_errcode; + + +typedef struct _sharedexception { + // The originating interpreter. + PyInterpreterState *interp; + // The kind of error to propagate. + _PyXI_errcode code; + // The exception information to propagate, if applicable. + // This is populated only for some error codes, + // but always for _PyXI_ERR_UNCAUGHT_EXCEPTION. + _PyXI_excinfo uncaught; +} _PyXI_error; + +PyAPI_FUNC(PyObject *) _PyXI_ApplyError(_PyXI_error *err); + + +typedef struct xi_session _PyXI_session; +typedef struct _sharedns _PyXI_namespace; + +PyAPI_FUNC(void) _PyXI_FreeNamespace(_PyXI_namespace *ns); +PyAPI_FUNC(_PyXI_namespace *) _PyXI_NamespaceFromNames(PyObject *names); +PyAPI_FUNC(int) _PyXI_FillNamespaceFromDict( + _PyXI_namespace *ns, + PyObject *nsobj, + _PyXI_session *session); +PyAPI_FUNC(int) _PyXI_ApplyNamespace( + _PyXI_namespace *ns, + PyObject *nsobj, + PyObject *dflt); + + +// A cross-interpreter session involves entering an interpreter +// (_PyXI_Enter()), doing some work with it, and finally exiting +// that interpreter (_PyXI_Exit()). +// +// At the boundaries of the session, both entering and exiting, +// data may be exchanged between the previous interpreter and the +// target one in a thread-safe way that does not violate the +// isolation between interpreters. This includes setting objects +// in the target's __main__ module on the way in, and capturing +// uncaught exceptions on the way out. +struct xi_session { + // Once a session has been entered, this is the tstate that was + // current before the session. If it is different from cur_tstate + // then we must have switched interpreters. Either way, this will + // be the current tstate once we exit the session. + PyThreadState *prev_tstate; + // Once a session has been entered, this is the current tstate. + // It must be current when the session exits. + PyThreadState *init_tstate; + // This is true if init_tstate needs cleanup during exit. + int own_init_tstate; + + // This is true if, while entering the session, init_thread took + // "ownership" of the interpreter's __main__ module. This means + // it is the only thread that is allowed to run code there. + // (Caveat: for now, users may still run exec() against the + // __main__ module's dict, though that isn't advisable.) + int running; + // This is a cached reference to the __dict__ of the entered + // interpreter's __main__ module. It is looked up when at the + // beginning of the session as a convenience. + PyObject *main_ns; + + // This is set if the interpreter is entered and raised an exception + // that needs to be handled in some special way during exit. + _PyXI_errcode *error_override; + // This is set if exit captured an exception to propagate. + _PyXI_error *error; + + // -- pre-allocated memory -- + _PyXI_error _error; + _PyXI_errcode _error_override; +}; + +PyAPI_FUNC(int) _PyXI_Enter( + _PyXI_session *session, + PyInterpreterState *interp, + PyObject *nsupdates); +PyAPI_FUNC(void) _PyXI_Exit(_PyXI_session *session); + +PyAPI_FUNC(PyObject *) _PyXI_ApplyCapturedException(_PyXI_session *session); +PyAPI_FUNC(int) _PyXI_HasCapturedException(_PyXI_session *session); + + +/*************/ +/* other API */ +/*************/ + +// Export for _testinternalcapi shared extension +PyAPI_FUNC(PyInterpreterState *) _PyXI_NewInterpreter( + PyInterpreterConfig *config, + long *maybe_whence, + PyThreadState **p_tstate, + PyThreadState **p_save_tstate); +PyAPI_FUNC(void) _PyXI_EndInterpreter( + PyInterpreterState *interp, + PyThreadState *tstate, + PyThreadState **p_save_tstate); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CROSSINTERP_H */ diff --git a/Include/internal/pycore_descrobject.h b/Include/internal/pycore_descrobject.h new file mode 100644 index 0000000000000000000000000000000000000000..3cec59a68a3d2b2af5320fe1435c13a4aa66886c --- /dev/null +++ b/Include/internal/pycore_descrobject.h @@ -0,0 +1,28 @@ +#ifndef Py_INTERNAL_DESCROBJECT_H +#define Py_INTERNAL_DESCROBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +typedef struct { + PyObject_HEAD + PyObject *prop_get; + PyObject *prop_set; + PyObject *prop_del; + PyObject *prop_doc; + PyObject *prop_name; + int getter_doc; +} propertyobject; + +typedef propertyobject _PyPropertyObject; + +extern PyTypeObject _PyMethodWrapper_Type; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_DESCROBJECT_H */ diff --git a/Include/internal/pycore_dict.h b/Include/internal/pycore_dict.h new file mode 100644 index 0000000000000000000000000000000000000000..36da498db2c3e147d0b26fa390f57de2e88f9f16 --- /dev/null +++ b/Include/internal/pycore_dict.h @@ -0,0 +1,340 @@ +#ifndef Py_INTERNAL_DICT_H +#define Py_INTERNAL_DICT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyFreeListState +#include "pycore_identifier.h" // _Py_Identifier +#include "pycore_object.h" // PyManagedDictPointer +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_LOAD_SSIZE_ACQUIRE + +// Unsafe flavor of PyDict_GetItemWithError(): no error checking +extern PyObject* _PyDict_GetItemWithError(PyObject *dp, PyObject *key); + +// Delete an item from a dict if a predicate is true +// Returns -1 on error, 1 if the item was deleted, 0 otherwise +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyDict_DelItemIf(PyObject *mp, PyObject *key, + int (*predicate)(PyObject *value, void *arg), + void *arg); + +// "KnownHash" variants +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyDict_SetItem_KnownHash(PyObject *mp, PyObject *key, + PyObject *item, Py_hash_t hash); +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyDict_DelItem_KnownHash(PyObject *mp, PyObject *key, + Py_hash_t hash); +extern int _PyDict_Contains_KnownHash(PyObject *, PyObject *, Py_hash_t); + +// "Id" variants +extern PyObject* _PyDict_GetItemIdWithError(PyObject *dp, + _Py_Identifier *key); +extern int _PyDict_ContainsId(PyObject *, _Py_Identifier *); +extern int _PyDict_SetItemId(PyObject *dp, _Py_Identifier *key, PyObject *item); +extern int _PyDict_DelItemId(PyObject *mp, _Py_Identifier *key); + +extern int _PyDict_Next( + PyObject *mp, Py_ssize_t *pos, PyObject **key, PyObject **value, Py_hash_t *hash); + +extern int _PyDict_HasOnlyStringKeys(PyObject *mp); + +extern void _PyDict_MaybeUntrack(PyObject *mp); + +// Export for '_ctypes' shared extension +PyAPI_FUNC(Py_ssize_t) _PyDict_SizeOf(PyDictObject *); + +#define _PyDict_HasSplitTable(d) ((d)->ma_values != NULL) + +/* Like PyDict_Merge, but override can be 0, 1 or 2. If override is 0, + the first occurrence of a key wins, if override is 1, the last occurrence + of a key wins, if override is 2, a KeyError with conflicting key as + argument is raised. +*/ +PyAPI_FUNC(int) _PyDict_MergeEx(PyObject *mp, PyObject *other, int override); + +extern void _PyDict_DebugMallocStats(FILE *out); + + +/* _PyDictView */ + +typedef struct { + PyObject_HEAD + PyDictObject *dv_dict; +} _PyDictViewObject; + +extern PyObject* _PyDictView_New(PyObject *, PyTypeObject *); +extern PyObject* _PyDictView_Intersect(PyObject* self, PyObject *other); + +/* other API */ + +typedef struct { + /* Cached hash code of me_key. */ + Py_hash_t me_hash; + PyObject *me_key; + PyObject *me_value; /* This field is only meaningful for combined tables */ +} PyDictKeyEntry; + +typedef struct { + PyObject *me_key; /* The key must be Unicode and have hash. */ + PyObject *me_value; /* This field is only meaningful for combined tables */ +} PyDictUnicodeEntry; + +extern PyDictKeysObject *_PyDict_NewKeysForClass(void); +extern PyObject *_PyDict_FromKeys(PyObject *, PyObject *, PyObject *); + +/* Gets a version number unique to the current state of the keys of dict, if possible. + * Returns the version number, or zero if it was not possible to get a version number. */ +extern uint32_t _PyDictKeys_GetVersionForCurrentState( + PyInterpreterState *interp, PyDictKeysObject *dictkeys); + +extern size_t _PyDict_KeysSize(PyDictKeysObject *keys); + +extern void _PyDictKeys_DecRef(PyDictKeysObject *keys); + +/* _Py_dict_lookup() returns index of entry which can be used like DK_ENTRIES(dk)[index]. + * -1 when no entry found, -3 when compare raises error. + */ +extern Py_ssize_t _Py_dict_lookup(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr); +extern Py_ssize_t _Py_dict_lookup_threadsafe(PyDictObject *mp, PyObject *key, Py_hash_t hash, PyObject **value_addr); + +extern Py_ssize_t _PyDict_LookupIndex(PyDictObject *, PyObject *); +extern Py_ssize_t _PyDictKeys_StringLookup(PyDictKeysObject* dictkeys, PyObject *key); +PyAPI_FUNC(PyObject *)_PyDict_LoadGlobal(PyDictObject *, PyDictObject *, PyObject *); + +/* Consumes references to key and value */ +PyAPI_FUNC(int) _PyDict_SetItem_Take2(PyDictObject *op, PyObject *key, PyObject *value); +extern int _PyDict_SetItem_LockHeld(PyDictObject *dict, PyObject *name, PyObject *value); +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyDict_SetItem_KnownHash_LockHeld(PyDictObject *mp, PyObject *key, + PyObject *value, Py_hash_t hash); +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyDict_GetItemRef_KnownHash_LockHeld(PyDictObject *op, PyObject *key, Py_hash_t hash, PyObject **result); +extern int _PyDict_GetItemRef_KnownHash(PyDictObject *op, PyObject *key, Py_hash_t hash, PyObject **result); +extern int _PyDict_GetItemRef_Unicode_LockHeld(PyDictObject *op, PyObject *key, PyObject **result); +extern int _PyObjectDict_SetItem(PyTypeObject *tp, PyObject *obj, PyObject **dictptr, PyObject *name, PyObject *value); + +extern int _PyDict_Pop_KnownHash( + PyDictObject *dict, + PyObject *key, + Py_hash_t hash, + PyObject **result); + +#define DKIX_EMPTY (-1) +#define DKIX_DUMMY (-2) /* Used internally */ +#define DKIX_ERROR (-3) +#define DKIX_KEY_CHANGED (-4) /* Used internally */ + +typedef enum { + DICT_KEYS_GENERAL = 0, + DICT_KEYS_UNICODE = 1, + DICT_KEYS_SPLIT = 2 +} DictKeysKind; + +/* See dictobject.c for actual layout of DictKeysObject */ +struct _dictkeysobject { + Py_ssize_t dk_refcnt; + + /* Size of the hash table (dk_indices). It must be a power of 2. */ + uint8_t dk_log2_size; + + /* Size of the hash table (dk_indices) by bytes. */ + uint8_t dk_log2_index_bytes; + + /* Kind of keys */ + uint8_t dk_kind; + +#ifdef Py_GIL_DISABLED + /* Lock used to protect shared keys */ + PyMutex dk_mutex; +#endif + + /* Version number -- Reset to 0 by any modification to keys */ + uint32_t dk_version; + + /* Number of usable entries in dk_entries. */ + Py_ssize_t dk_usable; + + /* Number of used entries in dk_entries. */ + Py_ssize_t dk_nentries; + + + /* Actual hash table of dk_size entries. It holds indices in dk_entries, + or DKIX_EMPTY(-1) or DKIX_DUMMY(-2). + + Indices must be: 0 <= indice < USABLE_FRACTION(dk_size). + + The size in bytes of an indice depends on dk_size: + + - 1 byte if dk_size <= 0xff (char*) + - 2 bytes if dk_size <= 0xffff (int16_t*) + - 4 bytes if dk_size <= 0xffffffff (int32_t*) + - 8 bytes otherwise (int64_t*) + + Dynamically sized, SIZEOF_VOID_P is minimum. */ + char dk_indices[]; /* char is required to avoid strict aliasing. */ + + /* "PyDictKeyEntry or PyDictUnicodeEntry dk_entries[USABLE_FRACTION(DK_SIZE(dk))];" array follows: + see the DK_ENTRIES() / DK_UNICODE_ENTRIES() functions below */ +}; + +/* This must be no more than 250, for the prefix size to fit in one byte. */ +#define SHARED_KEYS_MAX_SIZE 30 +#define NEXT_LOG2_SHARED_KEYS_MAX_SIZE 6 + +/* Layout of dict values: + * + * The PyObject *values are preceded by an array of bytes holding + * the insertion order and size. + * [-1] = prefix size. [-2] = used size. size[-2-n...] = insertion order. + */ +struct _dictvalues { + uint8_t capacity; + uint8_t size; + uint8_t embedded; + uint8_t valid; + PyObject *values[1]; +}; + +#define DK_LOG_SIZE(dk) _Py_RVALUE((dk)->dk_log2_size) +#if SIZEOF_VOID_P > 4 +#define DK_SIZE(dk) (((int64_t)1)<dk_indices); + size_t index = (size_t)1 << dk->dk_log2_index_bytes; + return (&indices[index]); +} + +static inline PyDictKeyEntry* DK_ENTRIES(PyDictKeysObject *dk) { + assert(dk->dk_kind == DICT_KEYS_GENERAL); + return (PyDictKeyEntry*)_DK_ENTRIES(dk); +} +static inline PyDictUnicodeEntry* DK_UNICODE_ENTRIES(PyDictKeysObject *dk) { + assert(dk->dk_kind != DICT_KEYS_GENERAL); + return (PyDictUnicodeEntry*)_DK_ENTRIES(dk); +} + +#define DK_IS_UNICODE(dk) ((dk)->dk_kind != DICT_KEYS_GENERAL) + +#define DICT_VERSION_INCREMENT (1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) +#define DICT_WATCHER_MASK ((1 << DICT_MAX_WATCHERS) - 1) +#define DICT_WATCHER_AND_MODIFICATION_MASK ((1 << (DICT_MAX_WATCHERS + DICT_WATCHED_MUTATION_BITS)) - 1) + +#ifdef Py_GIL_DISABLED + +#define THREAD_LOCAL_DICT_VERSION_COUNT 256 +#define THREAD_LOCAL_DICT_VERSION_BATCH THREAD_LOCAL_DICT_VERSION_COUNT * DICT_VERSION_INCREMENT + +static inline uint64_t +dict_next_version(PyInterpreterState *interp) +{ + PyThreadState *tstate = PyThreadState_GET(); + uint64_t cur_progress = (tstate->dict_global_version & + (THREAD_LOCAL_DICT_VERSION_BATCH - 1)); + if (cur_progress == 0) { + uint64_t next = _Py_atomic_add_uint64(&interp->dict_state.global_version, + THREAD_LOCAL_DICT_VERSION_BATCH); + tstate->dict_global_version = next; + } + return tstate->dict_global_version += DICT_VERSION_INCREMENT; +} + +#define DICT_NEXT_VERSION(INTERP) dict_next_version(INTERP) + +#else +#define DICT_NEXT_VERSION(INTERP) \ + ((INTERP)->dict_state.global_version += DICT_VERSION_INCREMENT) +#endif + +void +_PyDict_SendEvent(int watcher_bits, + PyDict_WatchEvent event, + PyDictObject *mp, + PyObject *key, + PyObject *value); + +static inline uint64_t +_PyDict_NotifyEvent(PyInterpreterState *interp, + PyDict_WatchEvent event, + PyDictObject *mp, + PyObject *key, + PyObject *value) +{ + assert(Py_REFCNT((PyObject*)mp) > 0); + int watcher_bits = mp->ma_version_tag & DICT_WATCHER_MASK; + if (watcher_bits) { + RARE_EVENT_STAT_INC(watched_dict_modification); + _PyDict_SendEvent(watcher_bits, event, mp, key, value); + } + return DICT_NEXT_VERSION(interp) | (mp->ma_version_tag & DICT_WATCHER_AND_MODIFICATION_MASK); +} + +extern PyDictObject *_PyObject_MaterializeManagedDict(PyObject *obj); + +PyAPI_FUNC(PyObject *)_PyDict_FromItems( + PyObject *const *keys, Py_ssize_t keys_offset, + PyObject *const *values, Py_ssize_t values_offset, + Py_ssize_t length); + +static inline uint8_t * +get_insertion_order_array(PyDictValues *values) +{ + return (uint8_t *)&values->values[values->capacity]; +} + +static inline void +_PyDictValues_AddToInsertionOrder(PyDictValues *values, Py_ssize_t ix) +{ + assert(ix < SHARED_KEYS_MAX_SIZE); + int size = values->size; + uint8_t *array = get_insertion_order_array(values); + assert(size < values->capacity); + assert(((uint8_t)ix) == ix); + array[size] = (uint8_t)ix; + values->size = size+1; +} + +static inline size_t +shared_keys_usable_size(PyDictKeysObject *keys) +{ + // dk_usable will decrease for each instance that is created and each + // value that is added. dk_nentries will increase for each value that + // is added. We want to always return the right value or larger. + // We therefore increase dk_nentries first and we decrease dk_usable + // second, and conversely here we read dk_usable first and dk_entries + // second (to avoid the case where we read entries before the increment + // and read usable after the decrement) + Py_ssize_t dk_usable = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_usable); + Py_ssize_t dk_nentries = FT_ATOMIC_LOAD_SSIZE_ACQUIRE(keys->dk_nentries); + return dk_nentries + dk_usable; +} + +static inline size_t +_PyInlineValuesSize(PyTypeObject *tp) +{ + PyDictKeysObject *keys = ((PyHeapTypeObject*)tp)->ht_cached_keys; + assert(keys != NULL); + size_t size = shared_keys_usable_size(keys); + size_t prefix_size = _Py_SIZE_ROUND_UP(size, sizeof(PyObject *)); + assert(prefix_size < 256); + return prefix_size + (size + 1) * sizeof(PyObject *); +} + +int +_PyDict_DetachFromObject(PyDictObject *dict, PyObject *obj); + +PyDictObject *_PyObject_MaterializeManagedDict_LockHeld(PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_DICT_H */ diff --git a/Include/internal/pycore_dict_state.h b/Include/internal/pycore_dict_state.h new file mode 100644 index 0000000000000000000000000000000000000000..1a44755c7a01a3a34f908220ef165173ecf18570 --- /dev/null +++ b/Include/internal/pycore_dict_state.h @@ -0,0 +1,32 @@ +#ifndef Py_INTERNAL_DICT_STATE_H +#define Py_INTERNAL_DICT_STATE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#define DICT_MAX_WATCHERS 8 +#define DICT_WATCHED_MUTATION_BITS 4 + +struct _Py_dict_state { + /*Global counter used to set ma_version_tag field of dictionary. + * It is incremented each time that a dictionary is created and each + * time that a dictionary is modified. */ + uint64_t global_version; + uint32_t next_keys_version; + PyDict_WatchCallback watchers[DICT_MAX_WATCHERS]; +}; + +#define _dict_state_INIT \ + { \ + .next_keys_version = 2, \ + } + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_DICT_STATE_H */ diff --git a/Include/internal/pycore_dtoa.h b/Include/internal/pycore_dtoa.h new file mode 100644 index 0000000000000000000000000000000000000000..e4222c5267d6bec25a2e0c954f731963f2b9ac1b --- /dev/null +++ b/Include/internal/pycore_dtoa.h @@ -0,0 +1,75 @@ +#ifndef Py_INTERNAL_DTOA_H +#define Py_INTERNAL_DTOA_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_pymath.h" // _PY_SHORT_FLOAT_REPR + + +typedef uint32_t ULong; + +struct +Bigint { + struct Bigint *next; + int k, maxwds, sign, wds; + ULong x[1]; +}; + +#if defined(Py_USING_MEMORY_DEBUGGER) || _PY_SHORT_FLOAT_REPR == 0 + +struct _dtoa_state { + int _not_used; +}; +#define _dtoa_state_INIT(INTERP) \ + {0} + +#else // !Py_USING_MEMORY_DEBUGGER && _PY_SHORT_FLOAT_REPR != 0 + +/* The size of the Bigint freelist */ +#define Bigint_Kmax 7 + +/* The size of the cached powers of 5 array */ +#define Bigint_Pow5size 8 + +#ifndef PRIVATE_MEM +#define PRIVATE_MEM 2304 +#endif +#define Bigint_PREALLOC_SIZE \ + ((PRIVATE_MEM+sizeof(double)-1)/sizeof(double)) + +struct _dtoa_state { + // p5s is an array of powers of 5 of the form: + // 5**(2**(i+2)) for 0 <= i < Bigint_Pow5size + struct Bigint *p5s[Bigint_Pow5size]; + // XXX This should be freed during runtime fini. + struct Bigint *freelist[Bigint_Kmax+1]; + double preallocated[Bigint_PREALLOC_SIZE]; + double *preallocated_next; +}; +#define _dtoa_state_INIT(INTERP) \ + { \ + .preallocated_next = (INTERP)->dtoa.preallocated, \ + } + +#endif // !Py_USING_MEMORY_DEBUGGER + + +extern double _Py_dg_strtod(const char *str, char **ptr); +extern char* _Py_dg_dtoa(double d, int mode, int ndigits, + int *decpt, int *sign, char **rve); +extern void _Py_dg_freedtoa(char *s); + + +extern PyStatus _PyDtoa_Init(PyInterpreterState *interp); +extern void _PyDtoa_Fini(PyInterpreterState *interp); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_DTOA_H */ diff --git a/Include/internal/pycore_emscripten_signal.h b/Include/internal/pycore_emscripten_signal.h new file mode 100644 index 0000000000000000000000000000000000000000..754193e21dec5a5608200d4302bbfc6c6b1e7eb1 --- /dev/null +++ b/Include/internal/pycore_emscripten_signal.h @@ -0,0 +1,30 @@ +#ifndef Py_EMSCRIPTEN_SIGNAL_H +#define Py_EMSCRIPTEN_SIGNAL_H + +#if defined(__EMSCRIPTEN__) + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +void +_Py_CheckEmscriptenSignals(void); + +void +_Py_CheckEmscriptenSignalsPeriodically(void); + +#define _Py_CHECK_EMSCRIPTEN_SIGNALS() _Py_CheckEmscriptenSignals() + +#define _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY() _Py_CheckEmscriptenSignalsPeriodically() + +extern int Py_EMSCRIPTEN_SIGNAL_HANDLING; +extern int _Py_emscripten_signal_clock; + +#else + +#define _Py_CHECK_EMSCRIPTEN_SIGNALS() +#define _Py_CHECK_EMSCRIPTEN_SIGNALS_PERIODICALLY() + +#endif // defined(__EMSCRIPTEN__) + +#endif // ndef Py_EMSCRIPTEN_SIGNAL_H diff --git a/Include/internal/pycore_emscripten_trampoline.h b/Include/internal/pycore_emscripten_trampoline.h new file mode 100644 index 0000000000000000000000000000000000000000..e519c99ad86ccefcca28aebe3368c079d61fcce1 --- /dev/null +++ b/Include/internal/pycore_emscripten_trampoline.h @@ -0,0 +1,81 @@ +#ifndef Py_EMSCRIPTEN_TRAMPOLINE_H +#define Py_EMSCRIPTEN_TRAMPOLINE_H + +#include "pycore_runtime.h" // _PyRuntimeState + +/** + * C function call trampolines to mitigate bad function pointer casts. + * + * Section 6.3.2.3, paragraph 8 reads: + * + * A pointer to a function of one type may be converted to a pointer to a + * function of another type and back again; the result shall compare equal to + * the original pointer. If a converted pointer is used to call a function + * whose type is not compatible with the pointed-to type, the behavior is + * undefined. + * + * Typical native ABIs ignore additional arguments or fill in missing values + * with 0/NULL in function pointer cast. Compilers do not show warnings when a + * function pointer is explicitly casted to an incompatible type. + * + * Bad fpcasts are an issue in WebAssembly. WASM's indirect_call has strict + * function signature checks. Argument count, types, and return type must match. + * + * Third party code unintentionally rely on problematic fpcasts. The call + * trampoline mitigates common occurrences of bad fpcasts on Emscripten. + */ + +#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + +void _Py_EmscriptenTrampoline_Init(_PyRuntimeState *runtime); + +PyObject* +_PyEM_TrampolineCall_JavaScript(PyCFunctionWithKeywords func, + PyObject* self, + PyObject* args, + PyObject* kw); + +PyObject* +_PyEM_TrampolineCall_Reflection(PyCFunctionWithKeywords func, + PyObject* self, + PyObject* args, + PyObject* kw); + +#define _PyEM_TrampolineCall(meth, self, args, kw) \ + ((_PyRuntime.wasm_type_reflection_available) ? \ + (_PyEM_TrampolineCall_Reflection((PyCFunctionWithKeywords)(meth), (self), (args), (kw))) : \ + (_PyEM_TrampolineCall_JavaScript((PyCFunctionWithKeywords)(meth), (self), (args), (kw)))) + +#define _PyCFunction_TrampolineCall(meth, self, args) \ + _PyEM_TrampolineCall( \ + (*(PyCFunctionWithKeywords)(void(*)(void))(meth)), (self), (args), NULL) + +#define _PyCFunctionWithKeywords_TrampolineCall(meth, self, args, kw) \ + _PyEM_TrampolineCall((meth), (self), (args), (kw)) + +#define descr_set_trampoline_call(set, obj, value, closure) \ + ((int)_PyEM_TrampolineCall((PyCFunctionWithKeywords)(set), (obj), (value), (PyObject*)(closure))) + +#define descr_get_trampoline_call(get, obj, closure) \ + _PyEM_TrampolineCall((PyCFunctionWithKeywords)(get), (obj), (PyObject*)(closure), NULL) + + +#else // defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + +#define _Py_EmscriptenTrampoline_Init(runtime) + +#define _PyCFunction_TrampolineCall(meth, self, args) \ + (meth)((self), (args)) + +#define _PyCFunctionWithKeywords_TrampolineCall(meth, self, args, kw) \ + (meth)((self), (args), (kw)) + +#define descr_set_trampoline_call(set, obj, value, closure) \ + (set)((obj), (value), (closure)) + +#define descr_get_trampoline_call(get, obj, closure) \ + (get)((obj), (closure)) + +#endif // defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + +#endif // ndef Py_EMSCRIPTEN_SIGNAL_H diff --git a/Include/internal/pycore_exceptions.h b/Include/internal/pycore_exceptions.h new file mode 100644 index 0000000000000000000000000000000000000000..26456d1966bbb0eb6a7becedabe8054d49d8dac7 --- /dev/null +++ b/Include/internal/pycore_exceptions.h @@ -0,0 +1,40 @@ +#ifndef Py_INTERNAL_EXCEPTIONS_H +#define Py_INTERNAL_EXCEPTIONS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* runtime lifecycle */ + +extern PyStatus _PyExc_InitState(PyInterpreterState *); +extern PyStatus _PyExc_InitGlobalObjects(PyInterpreterState *); +extern int _PyExc_InitTypes(PyInterpreterState *); +extern void _PyExc_Fini(PyInterpreterState *); + + +/* other API */ + +struct _Py_exc_state { + // The dict mapping from errno codes to OSError subclasses + PyObject *errnomap; + PyBaseExceptionObject *memerrors_freelist; + int memerrors_numfree; +#ifdef Py_GIL_DISABLED + PyMutex memerrors_lock; +#endif + // The ExceptionGroup type + PyObject *PyExc_ExceptionGroup; +}; + +extern void _PyExc_ClearExceptionGroupType(PyInterpreterState *); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_EXCEPTIONS_H */ diff --git a/Include/internal/pycore_faulthandler.h b/Include/internal/pycore_faulthandler.h new file mode 100644 index 0000000000000000000000000000000000000000..6dd7d8d7ca9792e200d43c02867fb0bd59f12c53 --- /dev/null +++ b/Include/internal/pycore_faulthandler.h @@ -0,0 +1,99 @@ +#ifndef Py_INTERNAL_FAULTHANDLER_H +#define Py_INTERNAL_FAULTHANDLER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef HAVE_SIGACTION +# include // sigaction +#endif + + +#ifndef MS_WINDOWS + /* register() is useless on Windows, because only SIGSEGV, SIGABRT and + SIGILL can be handled by the process, and these signals can only be used + with enable(), not using register() */ +# define FAULTHANDLER_USER +#endif + + +#ifdef HAVE_SIGACTION +/* Using an alternative stack requires sigaltstack() + and sigaction() SA_ONSTACK */ +# ifdef HAVE_SIGALTSTACK +# define FAULTHANDLER_USE_ALT_STACK +# endif +typedef struct sigaction _Py_sighandler_t; +#else +typedef PyOS_sighandler_t _Py_sighandler_t; +#endif // HAVE_SIGACTION + + +#ifdef FAULTHANDLER_USER +struct faulthandler_user_signal { + int enabled; + PyObject *file; + int fd; + int all_threads; + int chain; + _Py_sighandler_t previous; + PyInterpreterState *interp; +}; +#endif /* FAULTHANDLER_USER */ + + +struct _faulthandler_runtime_state { + struct { + int enabled; + PyObject *file; + int fd; + int all_threads; + PyInterpreterState *interp; +#ifdef MS_WINDOWS + void *exc_handler; +#endif + } fatal_error; + + struct { + PyObject *file; + int fd; + PY_TIMEOUT_T timeout_us; /* timeout in microseconds */ + int repeat; + PyInterpreterState *interp; + int exit; + char *header; + size_t header_len; + /* The main thread always holds this lock. It is only released when + faulthandler_thread() is interrupted before this thread exits, or at + Python exit. */ + PyThread_type_lock cancel_event; + /* released by child thread when joined */ + PyThread_type_lock running; + } thread; + +#ifdef FAULTHANDLER_USER + struct faulthandler_user_signal *user_signals; +#endif + +#ifdef FAULTHANDLER_USE_ALT_STACK + stack_t stack; + stack_t old_stack; +#endif +}; + +#define _faulthandler_runtime_state_INIT \ + { \ + .fatal_error = { \ + .fd = -1, \ + }, \ + } + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FAULTHANDLER_H */ diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h new file mode 100644 index 0000000000000000000000000000000000000000..13f86b01bbfe8ff80069f16b47ad89d2ffc28f7d --- /dev/null +++ b/Include/internal/pycore_fileutils.h @@ -0,0 +1,335 @@ +#ifndef Py_INTERNAL_FILEUTILS_H +#define Py_INTERNAL_FILEUTILS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include // struct lconv + + +/* A routine to check if a file descriptor can be select()-ed. */ +#ifdef _MSC_VER + /* On Windows, any socket fd can be select()-ed, no matter how high */ + #define _PyIsSelectable_fd(FD) (1) +#else + #define _PyIsSelectable_fd(FD) ((unsigned int)(FD) < (unsigned int)FD_SETSIZE) +#endif + +struct _fileutils_state { + int force_ascii; +}; + +typedef enum { + _Py_ERROR_UNKNOWN=0, + _Py_ERROR_STRICT, + _Py_ERROR_SURROGATEESCAPE, + _Py_ERROR_REPLACE, + _Py_ERROR_IGNORE, + _Py_ERROR_BACKSLASHREPLACE, + _Py_ERROR_SURROGATEPASS, + _Py_ERROR_XMLCHARREFREPLACE, + _Py_ERROR_OTHER +} _Py_error_handler; + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(_Py_error_handler) _Py_GetErrorHandler(const char *errors); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(int) _Py_DecodeLocaleEx( + const char *arg, + wchar_t **wstr, + size_t *wlen, + const char **reason, + int current_locale, + _Py_error_handler errors); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(int) _Py_EncodeLocaleEx( + const wchar_t *text, + char **str, + size_t *error_pos, + const char **reason, + int current_locale, + _Py_error_handler errors); + +extern char* _Py_EncodeLocaleRaw( + const wchar_t *text, + size_t *error_pos); + +extern PyObject* _Py_device_encoding(int); + +#if defined(MS_WINDOWS) || defined(__APPLE__) + /* On Windows, the count parameter of read() is an int (bpo-9015, bpo-9611). + On macOS 10.13, read() and write() with more than INT_MAX bytes + fail with EINVAL (bpo-24658). */ +# define _PY_READ_MAX INT_MAX +# define _PY_WRITE_MAX INT_MAX +#else + /* write() should truncate the input to PY_SSIZE_T_MAX bytes, + but it's safer to do it ourself to have a portable behaviour */ +# define _PY_READ_MAX PY_SSIZE_T_MAX +# define _PY_WRITE_MAX PY_SSIZE_T_MAX +#endif + +#ifdef MS_WINDOWS +struct _Py_stat_struct { + uint64_t st_dev; + uint64_t st_ino; + unsigned short st_mode; + int st_nlink; + int st_uid; + int st_gid; + unsigned long st_rdev; + __int64 st_size; + time_t st_atime; + int st_atime_nsec; + time_t st_mtime; + int st_mtime_nsec; + time_t st_ctime; + int st_ctime_nsec; + time_t st_birthtime; + int st_birthtime_nsec; + unsigned long st_file_attributes; + unsigned long st_reparse_tag; + uint64_t st_ino_high; +}; +#else +# define _Py_stat_struct stat +#endif + +// Export for 'mmap' shared extension +PyAPI_FUNC(int) _Py_fstat( + int fd, + struct _Py_stat_struct *status); + +// Export for 'mmap' shared extension +PyAPI_FUNC(int) _Py_fstat_noraise( + int fd, + struct _Py_stat_struct *status); + +// Export for '_tkinter' shared extension +PyAPI_FUNC(int) _Py_stat( + PyObject *path, + struct stat *status); + +// Export for 'select' shared extension (Solaris newDevPollObject()) +PyAPI_FUNC(int) _Py_open( + const char *pathname, + int flags); + +// Export for '_posixsubprocess' shared extension +PyAPI_FUNC(int) _Py_open_noraise( + const char *pathname, + int flags); + +extern FILE* _Py_wfopen( + const wchar_t *path, + const wchar_t *mode); + +extern Py_ssize_t _Py_read( + int fd, + void *buf, + size_t count); + +// Export for 'select' shared extension (Solaris devpoll_flush()) +PyAPI_FUNC(Py_ssize_t) _Py_write( + int fd, + const void *buf, + size_t count); + +// Export for '_posixsubprocess' shared extension +PyAPI_FUNC(Py_ssize_t) _Py_write_noraise( + int fd, + const void *buf, + size_t count); + +#ifdef HAVE_READLINK +extern int _Py_wreadlink( + const wchar_t *path, + wchar_t *buf, + /* Number of characters of 'buf' buffer + including the trailing NUL character */ + size_t buflen); +#endif + +#ifdef HAVE_REALPATH +extern wchar_t* _Py_wrealpath( + const wchar_t *path, + wchar_t *resolved_path, + /* Number of characters of 'resolved_path' buffer + including the trailing NUL character */ + size_t resolved_path_len); +#endif + +extern wchar_t* _Py_wgetcwd( + wchar_t *buf, + /* Number of characters of 'buf' buffer + including the trailing NUL character */ + size_t buflen); + +extern int _Py_get_inheritable(int fd); + +// Export for '_socket' shared extension +PyAPI_FUNC(int) _Py_set_inheritable(int fd, int inheritable, + int *atomic_flag_works); + +// Export for '_posixsubprocess' shared extension +PyAPI_FUNC(int) _Py_set_inheritable_async_safe(int fd, int inheritable, + int *atomic_flag_works); + +// Export for '_socket' shared extension +PyAPI_FUNC(int) _Py_dup(int fd); + +extern int _Py_get_blocking(int fd); + +extern int _Py_set_blocking(int fd, int blocking); + +#ifdef MS_WINDOWS +extern void* _Py_get_osfhandle_noraise(int fd); + +// Export for '_testconsole' shared extension +PyAPI_FUNC(void*) _Py_get_osfhandle(int fd); + +extern int _Py_open_osfhandle_noraise(void *handle, int flags); + +extern int _Py_open_osfhandle(void *handle, int flags); +#endif /* MS_WINDOWS */ + +// This is used after getting NULL back from Py_DecodeLocale(). +#define DECODE_LOCALE_ERR(NAME, LEN) \ + ((LEN) == (size_t)-2) \ + ? _PyStatus_ERR("cannot decode " NAME) \ + : _PyStatus_NO_MEMORY() + +extern int _Py_HasFileSystemDefaultEncodeErrors; + +extern int _Py_DecodeUTF8Ex( + const char *arg, + Py_ssize_t arglen, + wchar_t **wstr, + size_t *wlen, + const char **reason, + _Py_error_handler errors); + +extern int _Py_EncodeUTF8Ex( + const wchar_t *text, + char **str, + size_t *error_pos, + const char **reason, + int raw_malloc, + _Py_error_handler errors); + +extern wchar_t* _Py_DecodeUTF8_surrogateescape( + const char *arg, + Py_ssize_t arglen, + size_t *wlen); + +extern int +_Py_wstat(const wchar_t *, struct stat *); + +extern int _Py_GetForceASCII(void); + +/* Reset "force ASCII" mode (if it was initialized). + + This function should be called when Python changes the LC_CTYPE locale, + so the "force ASCII" mode can be detected again on the new locale + encoding. */ +extern void _Py_ResetForceASCII(void); + + +extern int _Py_GetLocaleconvNumeric( + struct lconv *lc, + PyObject **decimal_point, + PyObject **thousands_sep); + +// Export for '_posixsubprocess' (on macOS) +PyAPI_FUNC(void) _Py_closerange(int first, int last); + +extern wchar_t* _Py_GetLocaleEncoding(void); +extern PyObject* _Py_GetLocaleEncodingObject(void); + +#ifdef HAVE_NON_UNICODE_WCHAR_T_REPRESENTATION +extern int _Py_LocaleUsesNonUnicodeWchar(void); + +extern wchar_t* _Py_DecodeNonUnicodeWchar( + const wchar_t* native, + Py_ssize_t size); + +extern int _Py_EncodeNonUnicodeWchar_InPlace( + wchar_t* unicode, + Py_ssize_t size); +#endif + +extern int _Py_isabs(const wchar_t *path); +extern int _Py_abspath(const wchar_t *path, wchar_t **abspath_p); +#ifdef MS_WINDOWS +extern int _PyOS_getfullpathname(const wchar_t *path, wchar_t **abspath_p); +#endif +extern wchar_t* _Py_join_relfile(const wchar_t *dirname, + const wchar_t *relfile); +extern int _Py_add_relfile(wchar_t *dirname, + const wchar_t *relfile, + size_t bufsize); +extern size_t _Py_find_basename(const wchar_t *filename); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(wchar_t*) _Py_normpath(wchar_t *path, Py_ssize_t size); + +extern wchar_t *_Py_normpath_and_size(wchar_t *path, Py_ssize_t size, Py_ssize_t *length); + +// The Windows Games API family does not provide these functions +// so provide our own implementations. Remove them in case they get added +// to the Games API family +#if defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) +#include // HRESULT + +extern HRESULT PathCchSkipRoot(const wchar_t *pszPath, const wchar_t **ppszRootEnd); +#endif /* defined(MS_WINDOWS_GAMES) && !defined(MS_WINDOWS_DESKTOP) */ + +extern void _Py_skiproot(const wchar_t *path, Py_ssize_t size, Py_ssize_t *drvsize, Py_ssize_t *rootsize); + +// Macros to protect CRT calls against instant termination when passed an +// invalid parameter (bpo-23524). IPH stands for Invalid Parameter Handler. +// Usage: +// +// _Py_BEGIN_SUPPRESS_IPH +// ... +// _Py_END_SUPPRESS_IPH +#if defined _MSC_VER && _MSC_VER >= 1900 + +# include // _set_thread_local_invalid_parameter_handler() + + extern _invalid_parameter_handler _Py_silent_invalid_parameter_handler; +# define _Py_BEGIN_SUPPRESS_IPH \ + { _invalid_parameter_handler _Py_old_handler = \ + _set_thread_local_invalid_parameter_handler(_Py_silent_invalid_parameter_handler); +# define _Py_END_SUPPRESS_IPH \ + _set_thread_local_invalid_parameter_handler(_Py_old_handler); } +#else +# define _Py_BEGIN_SUPPRESS_IPH +# define _Py_END_SUPPRESS_IPH +#endif /* _MSC_VER >= 1900 */ + +// Export for 'select' shared extension (Argument Clinic code) +PyAPI_FUNC(int) _PyLong_FileDescriptor_Converter(PyObject *, void *); + +// Export for test_peg_generator +PyAPI_FUNC(char*) _Py_UniversalNewlineFgetsWithSize(char *, int, FILE*, PyObject *, size_t*); + +extern int _PyFile_Flush(PyObject *); + +#ifndef MS_WINDOWS +extern int _Py_GetTicksPerSecond(long *ticks_per_second); +#endif + +// Export for '_testcapi' shared extension +PyAPI_FUNC(int) _Py_IsValidFD(int fd); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FILEUTILS_H */ diff --git a/Include/internal/pycore_fileutils_windows.h b/Include/internal/pycore_fileutils_windows.h new file mode 100644 index 0000000000000000000000000000000000000000..b79aa9fb4653765cb7f171ade2b1570b5c451e9d --- /dev/null +++ b/Include/internal/pycore_fileutils_windows.h @@ -0,0 +1,98 @@ +#ifndef Py_INTERNAL_FILEUTILS_WINDOWS_H +#define Py_INTERNAL_FILEUTILS_WINDOWS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef MS_WINDOWS + +#if !defined(NTDDI_WIN10_NI) || !(NTDDI_VERSION >= NTDDI_WIN10_NI) +typedef struct _FILE_STAT_BASIC_INFORMATION { + LARGE_INTEGER FileId; + LARGE_INTEGER CreationTime; + LARGE_INTEGER LastAccessTime; + LARGE_INTEGER LastWriteTime; + LARGE_INTEGER ChangeTime; + LARGE_INTEGER AllocationSize; + LARGE_INTEGER EndOfFile; + ULONG FileAttributes; + ULONG ReparseTag; + ULONG NumberOfLinks; + ULONG DeviceType; + ULONG DeviceCharacteristics; + ULONG Reserved; + LARGE_INTEGER VolumeSerialNumber; + FILE_ID_128 FileId128; +} FILE_STAT_BASIC_INFORMATION; + +typedef enum _FILE_INFO_BY_NAME_CLASS { + FileStatByNameInfo, + FileStatLxByNameInfo, + FileCaseSensitiveByNameInfo, + FileStatBasicByNameInfo, + MaximumFileInfoByNameClass +} FILE_INFO_BY_NAME_CLASS; +#endif + +typedef BOOL (WINAPI *PGetFileInformationByName)( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +); + +static inline BOOL _Py_GetFileInformationByName( + PCWSTR FileName, + FILE_INFO_BY_NAME_CLASS FileInformationClass, + PVOID FileInfoBuffer, + ULONG FileInfoBufferSize +) { + static PGetFileInformationByName GetFileInformationByName = NULL; + static int GetFileInformationByName_init = -1; + + if (GetFileInformationByName_init < 0) { + HMODULE hMod = LoadLibraryW(L"api-ms-win-core-file-l2-1-4"); + GetFileInformationByName_init = 0; + if (hMod) { + GetFileInformationByName = (PGetFileInformationByName)GetProcAddress( + hMod, "GetFileInformationByName"); + if (GetFileInformationByName) { + GetFileInformationByName_init = 1; + } else { + FreeLibrary(hMod); + } + } + } + + if (GetFileInformationByName_init <= 0) { + SetLastError(ERROR_NOT_SUPPORTED); + return FALSE; + } + return GetFileInformationByName(FileName, FileInformationClass, FileInfoBuffer, FileInfoBufferSize); +} + +static inline BOOL _Py_GetFileInformationByName_ErrorIsTrustworthy(int error) +{ + switch(error) { + case ERROR_FILE_NOT_FOUND: + case ERROR_PATH_NOT_FOUND: + case ERROR_NOT_READY: + case ERROR_BAD_NET_NAME: + case ERROR_BAD_NETPATH: + case ERROR_BAD_PATHNAME: + case ERROR_INVALID_NAME: + case ERROR_FILENAME_EXCED_RANGE: + return TRUE; + case ERROR_NOT_SUPPORTED: + return FALSE; + } + return FALSE; +} + +#endif + +#endif diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h new file mode 100644 index 0000000000000000000000000000000000000000..f984df695696c3e6ffdf75a39fa11376499d3cca --- /dev/null +++ b/Include/internal/pycore_floatobject.h @@ -0,0 +1,62 @@ +#ifndef Py_INTERNAL_FLOATOBJECT_H +#define Py_INTERNAL_FLOATOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyFreeListState +#include "pycore_unicodeobject.h" // _PyUnicodeWriter + +/* runtime lifecycle */ + +extern void _PyFloat_InitState(PyInterpreterState *); +extern PyStatus _PyFloat_InitTypes(PyInterpreterState *); +extern void _PyFloat_FiniType(PyInterpreterState *); + + +/* other API */ + +enum _py_float_format_type { + _py_float_format_unknown, + _py_float_format_ieee_big_endian, + _py_float_format_ieee_little_endian, +}; + +struct _Py_float_runtime_state { + enum _py_float_format_type float_format; + enum _py_float_format_type double_format; +}; + + + + +PyAPI_FUNC(void) _PyFloat_ExactDealloc(PyObject *op); + + +extern void _PyFloat_DebugMallocStats(FILE* out); + + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +extern int _PyFloat_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); + +extern PyObject* _Py_string_to_number_with_underscores( + const char *str, Py_ssize_t len, const char *what, PyObject *obj, void *arg, + PyObject *(*innerfunc)(const char *, Py_ssize_t, void *)); + +extern double _Py_parse_inf_or_nan(const char *p, char **endptr); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FLOATOBJECT_H */ diff --git a/Include/internal/pycore_flowgraph.h b/Include/internal/pycore_flowgraph.h new file mode 100644 index 0000000000000000000000000000000000000000..819117b83114bcd5cf7d6d64fb04f5a9e07b6b40 --- /dev/null +++ b/Include/internal/pycore_flowgraph.h @@ -0,0 +1,40 @@ +#ifndef Py_INTERNAL_CFG_H +#define Py_INTERNAL_CFG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_compile.h" +#include "pycore_instruction_sequence.h" +#include "pycore_opcode_utils.h" + +struct _PyCfgBuilder; + +int _PyCfgBuilder_UseLabel(struct _PyCfgBuilder *g, _PyJumpTargetLabel lbl); +int _PyCfgBuilder_Addop(struct _PyCfgBuilder *g, int opcode, int oparg, _Py_SourceLocation loc); + +struct _PyCfgBuilder* _PyCfgBuilder_New(void); +void _PyCfgBuilder_Free(struct _PyCfgBuilder *g); +int _PyCfgBuilder_CheckSize(struct _PyCfgBuilder* g); + +int _PyCfg_OptimizeCodeUnit(struct _PyCfgBuilder *g, PyObject *consts, PyObject *const_cache, + int nlocals, int nparams, int firstlineno); + +int _PyCfg_ToInstructionSequence(struct _PyCfgBuilder *g, _PyInstructionSequence *seq); +int _PyCfg_OptimizedCfgToInstructionSequence(struct _PyCfgBuilder *g, _PyCompile_CodeUnitMetadata *umd, + int code_flags, int *stackdepth, int *nlocalsplus, + _PyInstructionSequence *seq); + +PyCodeObject * +_PyAssemble_MakeCodeObject(_PyCompile_CodeUnitMetadata *u, PyObject *const_cache, + PyObject *consts, int maxdepth, _PyInstructionSequence *instrs, + int nlocalsplus, int code_flags, PyObject *filename); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CFG_H */ diff --git a/Include/internal/pycore_format.h b/Include/internal/pycore_format.h new file mode 100644 index 0000000000000000000000000000000000000000..1b8d57539ca505fbc56ecb2245785a8b37b85c9f --- /dev/null +++ b/Include/internal/pycore_format.h @@ -0,0 +1,27 @@ +#ifndef Py_INTERNAL_FORMAT_H +#define Py_INTERNAL_FORMAT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* Format codes + * F_LJUST '-' + * F_SIGN '+' + * F_BLANK ' ' + * F_ALT '#' + * F_ZERO '0' + */ +#define F_LJUST (1<<0) +#define F_SIGN (1<<1) +#define F_BLANK (1<<2) +#define F_ALT (1<<3) +#define F_ZERO (1<<4) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FORMAT_H */ diff --git a/Include/internal/pycore_frame.h b/Include/internal/pycore_frame.h new file mode 100644 index 0000000000000000000000000000000000000000..af181e3760d231c626df1ec36b966c276483dc7d --- /dev/null +++ b/Include/internal/pycore_frame.h @@ -0,0 +1,330 @@ +#ifndef Py_INTERNAL_FRAME_H +#define Py_INTERNAL_FRAME_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +#include // offsetof() +#include "pycore_code.h" // STATS + +/* See Objects/frame_layout.md for an explanation of the frame stack + * including explanation of the PyFrameObject and _PyInterpreterFrame + * structs. */ + + +struct _frame { + PyObject_HEAD + PyFrameObject *f_back; /* previous frame, or NULL */ + struct _PyInterpreterFrame *f_frame; /* points to the frame data */ + PyObject *f_trace; /* Trace function */ + int f_lineno; /* Current line number. Only valid if non-zero */ + char f_trace_lines; /* Emit per-line trace events? */ + char f_trace_opcodes; /* Emit per-opcode trace events? */ + PyObject *f_extra_locals; /* Dict for locals set by users using f_locals, could be NULL */ + /* This is purely for backwards compatibility for PyEval_GetLocals. + PyEval_GetLocals requires a borrowed reference so the actual reference + is stored here */ + PyObject *f_locals_cache; + /* The frame data, if this frame object owns the frame */ + PyObject *_f_frame_data[1]; +}; + +extern PyFrameObject* _PyFrame_New_NoTrack(PyCodeObject *code); + + +/* other API */ + +typedef enum _framestate { + FRAME_CREATED = -3, + FRAME_SUSPENDED = -2, + FRAME_SUSPENDED_YIELD_FROM = -1, + FRAME_EXECUTING = 0, + FRAME_COMPLETED = 1, + FRAME_CLEARED = 4 +} PyFrameState; + +#define FRAME_STATE_SUSPENDED(S) ((S) == FRAME_SUSPENDED || (S) == FRAME_SUSPENDED_YIELD_FROM) +#define FRAME_STATE_FINISHED(S) ((S) >= FRAME_COMPLETED) + +enum _frameowner { + FRAME_OWNED_BY_THREAD = 0, + FRAME_OWNED_BY_GENERATOR = 1, + FRAME_OWNED_BY_FRAME_OBJECT = 2, + FRAME_OWNED_BY_CSTACK = 3, +}; + +typedef struct _PyInterpreterFrame { + PyObject *f_executable; /* Strong reference (code object or None) */ + struct _PyInterpreterFrame *previous; + PyObject *f_funcobj; /* Strong reference. Only valid if not on C stack */ + PyObject *f_globals; /* Borrowed reference. Only valid if not on C stack */ + PyObject *f_builtins; /* Borrowed reference. Only valid if not on C stack */ + PyObject *f_locals; /* Strong reference, may be NULL. Only valid if not on C stack */ + PyFrameObject *frame_obj; /* Strong reference, may be NULL. Only valid if not on C stack */ + _Py_CODEUNIT *instr_ptr; /* Instruction currently executing (or about to begin) */ + int stacktop; /* Offset of TOS from localsplus */ + uint16_t return_offset; /* Only relevant during a function call */ + char owner; + /* Locals and stack */ + PyObject *localsplus[1]; +} _PyInterpreterFrame; + +#define _PyInterpreterFrame_LASTI(IF) \ + ((int)((IF)->instr_ptr - _PyCode_CODE(_PyFrame_GetCode(IF)))) + +static inline PyCodeObject *_PyFrame_GetCode(_PyInterpreterFrame *f) { + assert(PyCode_Check(f->f_executable)); + return (PyCodeObject *)f->f_executable; +} + +static inline PyObject **_PyFrame_Stackbase(_PyInterpreterFrame *f) { + return f->localsplus + _PyFrame_GetCode(f)->co_nlocalsplus; +} + +static inline PyObject *_PyFrame_StackPeek(_PyInterpreterFrame *f) { + assert(f->stacktop > _PyFrame_GetCode(f)->co_nlocalsplus); + assert(f->localsplus[f->stacktop-1] != NULL); + return f->localsplus[f->stacktop-1]; +} + +static inline PyObject *_PyFrame_StackPop(_PyInterpreterFrame *f) { + assert(f->stacktop > _PyFrame_GetCode(f)->co_nlocalsplus); + f->stacktop--; + return f->localsplus[f->stacktop]; +} + +static inline void _PyFrame_StackPush(_PyInterpreterFrame *f, PyObject *value) { + f->localsplus[f->stacktop] = value; + f->stacktop++; +} + +#define FRAME_SPECIALS_SIZE ((int)((sizeof(_PyInterpreterFrame)-1)/sizeof(PyObject *))) + +static inline int +_PyFrame_NumSlotsForCodeObject(PyCodeObject *code) +{ + /* This function needs to remain in sync with the calculation of + * co_framesize in Tools/build/deepfreeze.py */ + assert(code->co_framesize >= FRAME_SPECIALS_SIZE); + return code->co_framesize - FRAME_SPECIALS_SIZE; +} + +static inline void _PyFrame_Copy(_PyInterpreterFrame *src, _PyInterpreterFrame *dest) +{ + assert(src->stacktop >= _PyFrame_GetCode(src)->co_nlocalsplus); + *dest = *src; + for (int i = 1; i < src->stacktop; i++) { + dest->localsplus[i] = src->localsplus[i]; + } + // Don't leave a dangling pointer to the old frame when creating generators + // and coroutines: + dest->previous = NULL; +} + +/* Consumes reference to func and locals. + Does not initialize frame->previous, which happens + when frame is linked into the frame stack. + */ +static inline void +_PyFrame_Initialize( + _PyInterpreterFrame *frame, PyFunctionObject *func, + PyObject *locals, PyCodeObject *code, int null_locals_from) +{ + frame->f_funcobj = (PyObject *)func; + frame->f_executable = Py_NewRef(code); + frame->f_builtins = func->func_builtins; + frame->f_globals = func->func_globals; + frame->f_locals = locals; + frame->stacktop = code->co_nlocalsplus; + frame->frame_obj = NULL; + frame->instr_ptr = _PyCode_CODE(code); + frame->return_offset = 0; + frame->owner = FRAME_OWNED_BY_THREAD; + + for (int i = null_locals_from; i < code->co_nlocalsplus; i++) { + frame->localsplus[i] = NULL; + } +} + +/* Gets the pointer to the locals array + * that precedes this frame. + */ +static inline PyObject** +_PyFrame_GetLocalsArray(_PyInterpreterFrame *frame) +{ + return frame->localsplus; +} + +/* Fetches the stack pointer, and sets stacktop to -1. + Having stacktop <= 0 ensures that invalid + values are not visible to the cycle GC. + We choose -1 rather than 0 to assist debugging. */ +static inline PyObject** +_PyFrame_GetStackPointer(_PyInterpreterFrame *frame) +{ + PyObject **sp = frame->localsplus + frame->stacktop; + frame->stacktop = -1; + return sp; +} + +static inline void +_PyFrame_SetStackPointer(_PyInterpreterFrame *frame, PyObject **stack_pointer) +{ + frame->stacktop = (int)(stack_pointer - frame->localsplus); +} + +/* Determine whether a frame is incomplete. + * A frame is incomplete if it is part way through + * creating cell objects or a generator or coroutine. + * + * Frames on the frame stack are incomplete until the + * first RESUME instruction. + * Frames owned by a generator are always complete. + */ +static inline bool +_PyFrame_IsIncomplete(_PyInterpreterFrame *frame) +{ + if (frame->owner == FRAME_OWNED_BY_CSTACK) { + return true; + } + return frame->owner != FRAME_OWNED_BY_GENERATOR && + frame->instr_ptr < _PyCode_CODE(_PyFrame_GetCode(frame)) + _PyFrame_GetCode(frame)->_co_firsttraceable; +} + +static inline _PyInterpreterFrame * +_PyFrame_GetFirstComplete(_PyInterpreterFrame *frame) +{ + while (frame && _PyFrame_IsIncomplete(frame)) { + frame = frame->previous; + } + return frame; +} + +static inline _PyInterpreterFrame * +_PyThreadState_GetFrame(PyThreadState *tstate) +{ + return _PyFrame_GetFirstComplete(tstate->current_frame); +} + +/* For use by _PyFrame_GetFrameObject + Do not call directly. */ +PyFrameObject * +_PyFrame_MakeAndSetFrameObject(_PyInterpreterFrame *frame); + +/* Gets the PyFrameObject for this frame, lazily + * creating it if necessary. + * Returns a borrowed reference */ +static inline PyFrameObject * +_PyFrame_GetFrameObject(_PyInterpreterFrame *frame) +{ + + assert(!_PyFrame_IsIncomplete(frame)); + PyFrameObject *res = frame->frame_obj; + if (res != NULL) { + return res; + } + return _PyFrame_MakeAndSetFrameObject(frame); +} + +void +_PyFrame_ClearLocals(_PyInterpreterFrame *frame); + +/* Clears all references in the frame. + * If take is non-zero, then the _PyInterpreterFrame frame + * may be transferred to the frame object it references + * instead of being cleared. Either way + * the caller no longer owns the references + * in the frame. + * take should be set to 1 for heap allocated + * frames like the ones in generators and coroutines. + */ +void +_PyFrame_ClearExceptCode(_PyInterpreterFrame * frame); + +int +_PyFrame_Traverse(_PyInterpreterFrame *frame, visitproc visit, void *arg); + +bool +_PyFrame_HasHiddenLocals(_PyInterpreterFrame *frame); + +PyObject * +_PyFrame_GetLocals(_PyInterpreterFrame *frame); + +static inline bool +_PyThreadState_HasStackSpace(PyThreadState *tstate, int size) +{ + assert( + (tstate->datastack_top == NULL && tstate->datastack_limit == NULL) + || + (tstate->datastack_top != NULL && tstate->datastack_limit != NULL) + ); + return tstate->datastack_top != NULL && + size < tstate->datastack_limit - tstate->datastack_top; +} + +extern _PyInterpreterFrame * +_PyThreadState_PushFrame(PyThreadState *tstate, size_t size); + +PyAPI_FUNC(void) _PyThreadState_PopFrame(PyThreadState *tstate, _PyInterpreterFrame *frame); + +/* Pushes a frame without checking for space. + * Must be guarded by _PyThreadState_HasStackSpace() + * Consumes reference to func. */ +static inline _PyInterpreterFrame * +_PyFrame_PushUnchecked(PyThreadState *tstate, PyFunctionObject *func, int null_locals_from) +{ + CALL_STAT_INC(frames_pushed); + PyCodeObject *code = (PyCodeObject *)func->func_code; + _PyInterpreterFrame *new_frame = (_PyInterpreterFrame *)tstate->datastack_top; + tstate->datastack_top += code->co_framesize; + assert(tstate->datastack_top < tstate->datastack_limit); + _PyFrame_Initialize(new_frame, func, NULL, code, null_locals_from); + return new_frame; +} + +/* Pushes a trampoline frame without checking for space. + * Must be guarded by _PyThreadState_HasStackSpace() */ +static inline _PyInterpreterFrame * +_PyFrame_PushTrampolineUnchecked(PyThreadState *tstate, PyCodeObject *code, int stackdepth) +{ + CALL_STAT_INC(frames_pushed); + _PyInterpreterFrame *frame = (_PyInterpreterFrame *)tstate->datastack_top; + tstate->datastack_top += code->co_framesize; + assert(tstate->datastack_top < tstate->datastack_limit); + frame->f_funcobj = Py_None; + frame->f_executable = Py_NewRef(code); +#ifdef Py_DEBUG + frame->f_builtins = NULL; + frame->f_globals = NULL; +#endif + frame->f_locals = NULL; + frame->stacktop = code->co_nlocalsplus + stackdepth; + frame->frame_obj = NULL; + frame->instr_ptr = _PyCode_CODE(code); + frame->owner = FRAME_OWNED_BY_THREAD; + frame->return_offset = 0; + return frame; +} + +static inline +PyGenObject *_PyFrame_GetGenerator(_PyInterpreterFrame *frame) +{ + assert(frame->owner == FRAME_OWNED_BY_GENERATOR); + size_t offset_in_gen = offsetof(PyGenObject, gi_iframe); + return (PyGenObject *)(((char *)frame) - offset_in_gen); +} + +PyAPI_FUNC(_PyInterpreterFrame *) +_PyEvalFramePushAndInit(PyThreadState *tstate, PyFunctionObject *func, + PyObject *locals, PyObject* const* args, + size_t argcount, PyObject *kwnames); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FRAME_H */ diff --git a/Include/internal/pycore_freelist.h b/Include/internal/pycore_freelist.h new file mode 100644 index 0000000000000000000000000000000000000000..e684e084b8bef8e932addd46684083085ab15c68 --- /dev/null +++ b/Include/internal/pycore_freelist.h @@ -0,0 +1,153 @@ +#ifndef Py_INTERNAL_FREELIST_H +#define Py_INTERNAL_FREELIST_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// PyTuple_MAXSAVESIZE - largest tuple to save on free list +// PyTuple_MAXFREELIST - maximum number of tuples of each size to save + +#ifdef WITH_FREELISTS +// with freelists +# define PyTuple_MAXSAVESIZE 20 +# define PyTuple_NFREELISTS PyTuple_MAXSAVESIZE +# define PyTuple_MAXFREELIST 2000 +# define PyList_MAXFREELIST 80 +# define PyDict_MAXFREELIST 80 +# define PyFloat_MAXFREELIST 100 +# define PyContext_MAXFREELIST 255 +# define _PyAsyncGen_MAXFREELIST 80 +# define _PyObjectStackChunk_MAXFREELIST 4 +#else +# define PyTuple_NFREELISTS 0 +# define PyTuple_MAXFREELIST 0 +# define PyList_MAXFREELIST 0 +# define PyDict_MAXFREELIST 0 +# define PyFloat_MAXFREELIST 0 +# define PyContext_MAXFREELIST 0 +# define _PyAsyncGen_MAXFREELIST 0 +# define _PyObjectStackChunk_MAXFREELIST 0 +#endif + +struct _Py_list_freelist { +#ifdef WITH_FREELISTS + PyListObject *items[PyList_MAXFREELIST]; + int numfree; +#endif +}; + +struct _Py_tuple_freelist { +#if WITH_FREELISTS + /* There is one freelist for each size from 1 to PyTuple_MAXSAVESIZE. + The empty tuple is handled separately. + + Each tuple stored in the array is the head of the linked list + (and the next available tuple) for that size. The actual tuple + object is used as the linked list node, with its first item + (ob_item[0]) pointing to the next node (i.e. the previous head). + Each linked list is initially NULL. */ + PyTupleObject *items[PyTuple_NFREELISTS]; + int numfree[PyTuple_NFREELISTS]; +#else + char _unused; // Empty structs are not allowed. +#endif +}; + +struct _Py_float_freelist { +#ifdef WITH_FREELISTS + /* Special free list + free_list is a singly-linked list of available PyFloatObjects, + linked via abuse of their ob_type members. */ + int numfree; + PyFloatObject *items; +#endif +}; + +struct _Py_dict_freelist { +#ifdef WITH_FREELISTS + /* Dictionary reuse scheme to save calls to malloc and free */ + PyDictObject *items[PyDict_MAXFREELIST]; + int numfree; +#endif +}; + +struct _Py_dictkeys_freelist { +#ifdef WITH_FREELISTS + /* Dictionary keys reuse scheme to save calls to malloc and free */ + PyDictKeysObject *items[PyDict_MAXFREELIST]; + int numfree; +#endif +}; + +struct _Py_slice_freelist { +#ifdef WITH_FREELISTS + /* Using a cache is very effective since typically only a single slice is + created and then deleted again. */ + PySliceObject *slice_cache; +#endif +}; + +struct _Py_context_freelist { +#ifdef WITH_FREELISTS + // List of free PyContext objects + PyContext *items; + int numfree; +#endif +}; + +struct _Py_async_gen_freelist { +#ifdef WITH_FREELISTS + /* Freelists boost performance 6-10%; they also reduce memory + fragmentation, as _PyAsyncGenWrappedValue and PyAsyncGenASend + are short-living objects that are instantiated for every + __anext__() call. */ + struct _PyAsyncGenWrappedValue* items[_PyAsyncGen_MAXFREELIST]; + int numfree; +#endif +}; + +struct _Py_async_gen_asend_freelist { +#ifdef WITH_FREELISTS + struct PyAsyncGenASend* items[_PyAsyncGen_MAXFREELIST]; + int numfree; +#endif +}; + +struct _PyObjectStackChunk; + +struct _Py_object_stack_freelist { + struct _PyObjectStackChunk *items; + Py_ssize_t numfree; +}; + +struct _Py_object_freelists { + struct _Py_float_freelist floats; + struct _Py_tuple_freelist tuples; + struct _Py_list_freelist lists; + struct _Py_dict_freelist dicts; + struct _Py_dictkeys_freelist dictkeys; + struct _Py_slice_freelist slices; + struct _Py_context_freelist contexts; + struct _Py_async_gen_freelist async_gens; + struct _Py_async_gen_asend_freelist async_gen_asends; + struct _Py_object_stack_freelist object_stacks; +}; + +extern void _PyObject_ClearFreeLists(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyTuple_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyFloat_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyList_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PySlice_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyDict_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyAsyncGen_ClearFreeLists(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyContext_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); +extern void _PyObjectStackChunk_ClearFreeList(struct _Py_object_freelists *freelists, int is_finalization); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FREELIST_H */ diff --git a/Include/internal/pycore_function.h b/Include/internal/pycore_function.h new file mode 100644 index 0000000000000000000000000000000000000000..6d44e933e8a8cb3ad03291f47a891f60b1938277 --- /dev/null +++ b/Include/internal/pycore_function.h @@ -0,0 +1,55 @@ +#ifndef Py_INTERNAL_FUNCTION_H +#define Py_INTERNAL_FUNCTION_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "pycore_lock.h" + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyObject* _PyFunction_Vectorcall( + PyObject *func, + PyObject *const *stack, + size_t nargsf, + PyObject *kwnames); + +#define FUNC_MAX_WATCHERS 8 + +#define FUNC_VERSION_CACHE_SIZE (1<<12) /* Must be a power of 2 */ + +struct _func_version_cache_item { + PyFunctionObject *func; + PyObject *code; +}; + +struct _py_func_state { +#ifdef Py_GIL_DISABLED + // Protects next_version + PyMutex mutex; +#endif + + uint32_t next_version; + // Borrowed references to function and code objects whose + // func_version % FUNC_VERSION_CACHE_SIZE + // once was equal to the index in the table. + // They are cleared when the function or code object is deallocated. + struct _func_version_cache_item func_version_cache[FUNC_VERSION_CACHE_SIZE]; +}; + +extern PyFunctionObject* _PyFunction_FromConstructor(PyFrameConstructor *constr); + +extern uint32_t _PyFunction_GetVersionForCurrentState(PyFunctionObject *func); +PyAPI_FUNC(void) _PyFunction_SetVersion(PyFunctionObject *func, uint32_t version); +void _PyFunction_ClearCodeByVersion(uint32_t version); +PyFunctionObject *_PyFunction_LookupByVersion(uint32_t version, PyObject **p_code); + +extern PyObject *_Py_set_function_type_params( + PyThreadState* unused, PyObject *func, PyObject *type_params); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_FUNCTION_H */ diff --git a/Include/internal/pycore_gc.h b/Include/internal/pycore_gc.h new file mode 100644 index 0000000000000000000000000000000000000000..357177bcd6fd845852f36e5b11e8dd07249b0e0b --- /dev/null +++ b/Include/internal/pycore_gc.h @@ -0,0 +1,365 @@ +#ifndef Py_INTERNAL_GC_H +#define Py_INTERNAL_GC_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyFreeListState + +/* GC information is stored BEFORE the object structure. */ +typedef struct { + // Pointer to next object in the list. + // 0 means the object is not tracked + uintptr_t _gc_next; + + // Pointer to previous object in the list. + // Lowest two bits are used for flags documented later. + uintptr_t _gc_prev; +} PyGC_Head; + +#define _PyGC_Head_UNUSED PyGC_Head + + +/* Get an object's GC head */ +static inline PyGC_Head* _Py_AS_GC(PyObject *op) { + char *gc = ((char*)op) - sizeof(PyGC_Head); + return (PyGC_Head*)gc; +} + +/* Get the object given the GC head */ +static inline PyObject* _Py_FROM_GC(PyGC_Head *gc) { + char *op = ((char *)gc) + sizeof(PyGC_Head); + return (PyObject *)op; +} + + +/* Bit flags for ob_gc_bits (in Py_GIL_DISABLED builds) + * + * Setting the bits requires a relaxed store. The per-object lock must also be + * held, except when the object is only visible to a single thread (e.g. during + * object initialization or destruction). + * + * Reading the bits requires using a relaxed load, but does not require holding + * the per-object lock. + */ +#ifdef Py_GIL_DISABLED +# define _PyGC_BITS_TRACKED (1) // Tracked by the GC +# define _PyGC_BITS_FINALIZED (2) // tp_finalize was called +# define _PyGC_BITS_UNREACHABLE (4) +# define _PyGC_BITS_FROZEN (8) +# define _PyGC_BITS_SHARED (16) +# define _PyGC_BITS_SHARED_INLINE (32) +# define _PyGC_BITS_DEFERRED (64) // Use deferred reference counting +#endif + +#ifdef Py_GIL_DISABLED + +static inline void +_PyObject_SET_GC_BITS(PyObject *op, uint8_t new_bits) +{ + uint8_t bits = _Py_atomic_load_uint8_relaxed(&op->ob_gc_bits); + _Py_atomic_store_uint8_relaxed(&op->ob_gc_bits, bits | new_bits); +} + +static inline int +_PyObject_HAS_GC_BITS(PyObject *op, uint8_t bits) +{ + return (_Py_atomic_load_uint8_relaxed(&op->ob_gc_bits) & bits) != 0; +} + +static inline void +_PyObject_CLEAR_GC_BITS(PyObject *op, uint8_t bits_to_clear) +{ + uint8_t bits = _Py_atomic_load_uint8_relaxed(&op->ob_gc_bits); + _Py_atomic_store_uint8_relaxed(&op->ob_gc_bits, bits & ~bits_to_clear); +} + +#endif + +/* True if the object is currently tracked by the GC. */ +static inline int _PyObject_GC_IS_TRACKED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_TRACKED); +#else + PyGC_Head *gc = _Py_AS_GC(op); + return (gc->_gc_next != 0); +#endif +} +#define _PyObject_GC_IS_TRACKED(op) _PyObject_GC_IS_TRACKED(_Py_CAST(PyObject*, op)) + +/* True if the object may be tracked by the GC in the future, or already is. + This can be useful to implement some optimizations. */ +static inline int _PyObject_GC_MAY_BE_TRACKED(PyObject *obj) { + if (!PyObject_IS_GC(obj)) { + return 0; + } + if (PyTuple_CheckExact(obj)) { + return _PyObject_GC_IS_TRACKED(obj); + } + return 1; +} + +#ifdef Py_GIL_DISABLED + +/* True if memory the object references is shared between + * multiple threads and needs special purpose when freeing + * those references due to the possibility of in-flight + * lock-free reads occurring. The object is responsible + * for calling _PyMem_FreeDelayed on the referenced + * memory. */ +static inline int _PyObject_GC_IS_SHARED(PyObject *op) { + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED); +} +#define _PyObject_GC_IS_SHARED(op) _PyObject_GC_IS_SHARED(_Py_CAST(PyObject*, op)) + +static inline void _PyObject_GC_SET_SHARED(PyObject *op) { + _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED); +} +#define _PyObject_GC_SET_SHARED(op) _PyObject_GC_SET_SHARED(_Py_CAST(PyObject*, op)) + +/* True if the memory of the object is shared between multiple + * threads and needs special purpose when freeing due to + * the possibility of in-flight lock-free reads occurring. + * Objects with this bit that are GC objects will automatically + * delay-freed by PyObject_GC_Del. */ +static inline int _PyObject_GC_IS_SHARED_INLINE(PyObject *op) { + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_SHARED_INLINE); +} +#define _PyObject_GC_IS_SHARED_INLINE(op) \ + _PyObject_GC_IS_SHARED_INLINE(_Py_CAST(PyObject*, op)) + +static inline void _PyObject_GC_SET_SHARED_INLINE(PyObject *op) { + _PyObject_SET_GC_BITS(op, _PyGC_BITS_SHARED_INLINE); +} +#define _PyObject_GC_SET_SHARED_INLINE(op) \ + _PyObject_GC_SET_SHARED_INLINE(_Py_CAST(PyObject*, op)) + +#endif + +/* Bit flags for _gc_prev */ +/* Bit 0 is set when tp_finalize is called */ +#define _PyGC_PREV_MASK_FINALIZED (1) +/* Bit 1 is set when the object is in generation which is GCed currently. */ +#define _PyGC_PREV_MASK_COLLECTING (2) +/* The (N-2) most significant bits contain the real address. */ +#define _PyGC_PREV_SHIFT (2) +#define _PyGC_PREV_MASK (((uintptr_t) -1) << _PyGC_PREV_SHIFT) + +/* set for debugging information */ +#define _PyGC_DEBUG_STATS (1<<0) /* print collection statistics */ +#define _PyGC_DEBUG_COLLECTABLE (1<<1) /* print collectable objects */ +#define _PyGC_DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */ +#define _PyGC_DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */ +#define _PyGC_DEBUG_LEAK _PyGC_DEBUG_COLLECTABLE | \ + _PyGC_DEBUG_UNCOLLECTABLE | \ + _PyGC_DEBUG_SAVEALL + +typedef enum { + // GC was triggered by heap allocation + _Py_GC_REASON_HEAP, + + // GC was called during shutdown + _Py_GC_REASON_SHUTDOWN, + + // GC was called by gc.collect() or PyGC_Collect() + _Py_GC_REASON_MANUAL +} _PyGC_Reason; + +// Lowest bit of _gc_next is used for flags only in GC. +// But it is always 0 for normal code. +static inline PyGC_Head* _PyGCHead_NEXT(PyGC_Head *gc) { + uintptr_t next = gc->_gc_next; + return (PyGC_Head*)next; +} +static inline void _PyGCHead_SET_NEXT(PyGC_Head *gc, PyGC_Head *next) { + gc->_gc_next = (uintptr_t)next; +} + +// Lowest two bits of _gc_prev is used for _PyGC_PREV_MASK_* flags. +static inline PyGC_Head* _PyGCHead_PREV(PyGC_Head *gc) { + uintptr_t prev = (gc->_gc_prev & _PyGC_PREV_MASK); + return (PyGC_Head*)prev; +} +static inline void _PyGCHead_SET_PREV(PyGC_Head *gc, PyGC_Head *prev) { + uintptr_t uprev = (uintptr_t)prev; + assert((uprev & ~_PyGC_PREV_MASK) == 0); + gc->_gc_prev = ((gc->_gc_prev & ~_PyGC_PREV_MASK) | uprev); +} + +static inline int _PyGC_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_FINALIZED); +#else + PyGC_Head *gc = _Py_AS_GC(op); + return ((gc->_gc_prev & _PyGC_PREV_MASK_FINALIZED) != 0); +#endif +} +static inline void _PyGC_SET_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + _PyObject_SET_GC_BITS(op, _PyGC_BITS_FINALIZED); +#else + PyGC_Head *gc = _Py_AS_GC(op); + gc->_gc_prev |= _PyGC_PREV_MASK_FINALIZED; +#endif +} +static inline void _PyGC_CLEAR_FINALIZED(PyObject *op) { +#ifdef Py_GIL_DISABLED + _PyObject_CLEAR_GC_BITS(op, _PyGC_BITS_FINALIZED); +#else + PyGC_Head *gc = _Py_AS_GC(op); + gc->_gc_prev &= ~_PyGC_PREV_MASK_FINALIZED; +#endif +} + + +/* GC runtime state */ + +/* If we change this, we need to change the default value in the + signature of gc.collect. */ +#define NUM_GENERATIONS 3 +/* + NOTE: about untracking of mutable objects. + + Certain types of container cannot participate in a reference cycle, and + so do not need to be tracked by the garbage collector. Untracking these + objects reduces the cost of garbage collections. However, determining + which objects may be untracked is not free, and the costs must be + weighed against the benefits for garbage collection. + + There are two possible strategies for when to untrack a container: + + i) When the container is created. + ii) When the container is examined by the garbage collector. + + Tuples containing only immutable objects (integers, strings etc, and + recursively, tuples of immutable objects) do not need to be tracked. + The interpreter creates a large number of tuples, many of which will + not survive until garbage collection. It is therefore not worthwhile + to untrack eligible tuples at creation time. + + Instead, all tuples except the empty tuple are tracked when created. + During garbage collection it is determined whether any surviving tuples + can be untracked. A tuple can be untracked if all of its contents are + already not tracked. Tuples are examined for untracking in all garbage + collection cycles. It may take more than one cycle to untrack a tuple. + + Dictionaries containing only immutable objects also do not need to be + tracked. Dictionaries are untracked when created. If a tracked item is + inserted into a dictionary (either as a key or value), the dictionary + becomes tracked. During a full garbage collection (all generations), + the collector will untrack any dictionaries whose contents are not + tracked. + + The module provides the python function is_tracked(obj), which returns + the CURRENT tracking status of the object. Subsequent garbage + collections may change the tracking status of the object. + + Untracking of certain containers was introduced in issue #4688, and + the algorithm was refined in response to issue #14775. +*/ + +struct gc_generation { + PyGC_Head head; + int threshold; /* collection threshold */ + int count; /* count of allocations or collections of younger + generations */ +}; + +/* Running stats per generation */ +struct gc_generation_stats { + /* total number of collections */ + Py_ssize_t collections; + /* total number of collected objects */ + Py_ssize_t collected; + /* total number of uncollectable objects (put into gc.garbage) */ + Py_ssize_t uncollectable; +}; + +struct _gc_runtime_state { + /* List of objects that still need to be cleaned up, singly linked + * via their gc headers' gc_prev pointers. */ + PyObject *trash_delete_later; + /* Current call-stack depth of tp_dealloc calls. */ + int trash_delete_nesting; + + /* Is automatic collection enabled? */ + int enabled; + int debug; + /* linked lists of container objects */ + struct gc_generation generations[NUM_GENERATIONS]; + PyGC_Head *generation0; + /* a permanent generation which won't be collected */ + struct gc_generation permanent_generation; + struct gc_generation_stats generation_stats[NUM_GENERATIONS]; + /* true if we are currently running the collector */ + int collecting; + /* list of uncollectable objects */ + PyObject *garbage; + /* a list of callbacks to be invoked when collection is performed */ + PyObject *callbacks; + + /* This is the number of objects that survived the last full + collection. It approximates the number of long lived objects + tracked by the GC. + + (by "full collection", we mean a collection of the oldest + generation). */ + Py_ssize_t long_lived_total; + /* This is the number of objects that survived all "non-full" + collections, and are awaiting to undergo a full collection for + the first time. */ + Py_ssize_t long_lived_pending; + +#ifdef Py_GIL_DISABLED + /* gh-117783: Deferred reference counting is not fully implemented yet, so + as a temporary measure we treat objects using deferred reference + counting as immortal. The value may be zero, one, or a negative number: + 0: immortalize deferred RC objects once the first thread is created + 1: immortalize all deferred RC objects immediately + <0: suppressed; don't immortalize objects */ + int immortalize; +#endif +}; + +#ifdef Py_GIL_DISABLED +struct _gc_thread_state { + /* Thread-local allocation count. */ + Py_ssize_t alloc_count; +}; +#endif + + +extern void _PyGC_InitState(struct _gc_runtime_state *); + +extern Py_ssize_t _PyGC_Collect(PyThreadState *tstate, int generation, + _PyGC_Reason reason); +extern void _PyGC_CollectNoFail(PyThreadState *tstate); + +/* Freeze objects tracked by the GC and ignore them in future collections. */ +extern void _PyGC_Freeze(PyInterpreterState *interp); +/* Unfreezes objects placing them in the oldest generation */ +extern void _PyGC_Unfreeze(PyInterpreterState *interp); +/* Number of frozen objects */ +extern Py_ssize_t _PyGC_GetFreezeCount(PyInterpreterState *interp); + +extern PyObject *_PyGC_GetObjects(PyInterpreterState *interp, int generation); +extern PyObject *_PyGC_GetReferrers(PyInterpreterState *interp, PyObject *objs); + +// Functions to clear types free lists +extern void _PyGC_ClearAllFreeLists(PyInterpreterState *interp); +extern void _Py_ScheduleGC(PyThreadState *tstate); +extern void _Py_RunGC(PyThreadState *tstate); + +#ifdef Py_GIL_DISABLED +// gh-117783: Immortalize objects that use deferred reference counting +extern void _PyGC_ImmortalizeDeferredObjects(PyInterpreterState *interp); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_GC_H */ diff --git a/Include/internal/pycore_genobject.h b/Include/internal/pycore_genobject.h new file mode 100644 index 0000000000000000000000000000000000000000..9463c822ad86698e64aced58adfc1088cb14ab29 --- /dev/null +++ b/Include/internal/pycore_genobject.h @@ -0,0 +1,32 @@ +#ifndef Py_INTERNAL_GENOBJECT_H +#define Py_INTERNAL_GENOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" + +PyAPI_FUNC(PyObject *)_PyGen_yf(PyGenObject *); +extern void _PyGen_Finalize(PyObject *self); + +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyGen_SetStopIterationValue(PyObject *); + +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyGen_FetchStopIterationValue(PyObject **); + +PyAPI_FUNC(PyObject *)_PyCoro_GetAwaitableIter(PyObject *o); +extern PyObject *_PyAsyncGenValueWrapperNew(PyThreadState *state, PyObject *); + +extern PyTypeObject _PyCoroWrapper_Type; +extern PyTypeObject _PyAsyncGenWrappedValue_Type; +extern PyTypeObject _PyAsyncGenAThrow_Type; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_GENOBJECT_H */ diff --git a/Include/internal/pycore_getopt.h b/Include/internal/pycore_getopt.h new file mode 100644 index 0000000000000000000000000000000000000000..7f0dd13ae577f78491f19b7e8fa3ac8c5042bb11 --- /dev/null +++ b/Include/internal/pycore_getopt.h @@ -0,0 +1,22 @@ +#ifndef Py_INTERNAL_PYGETOPT_H +#define Py_INTERNAL_PYGETOPT_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern int _PyOS_opterr; +extern Py_ssize_t _PyOS_optind; +extern const wchar_t *_PyOS_optarg; + +extern void _PyOS_ResetGetOpt(void); + +typedef struct { + const wchar_t *name; + int has_arg; + int val; +} _PyOS_LongOption; + +extern int _PyOS_GetOpt(Py_ssize_t argc, wchar_t * const *argv, int *longindex); + +#endif /* !Py_INTERNAL_PYGETOPT_H */ diff --git a/Include/internal/pycore_gil.h b/Include/internal/pycore_gil.h new file mode 100644 index 0000000000000000000000000000000000000000..a2de5077371ebae37b9b17e870c3195f0e8b7714 --- /dev/null +++ b/Include/internal/pycore_gil.h @@ -0,0 +1,66 @@ +#ifndef Py_INTERNAL_GIL_H +#define Py_INTERNAL_GIL_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_condvar.h" // PyCOND_T + +#ifndef Py_HAVE_CONDVAR +# error You need either a POSIX-compatible or a Windows system! +#endif + +/* Enable if you want to force the switching of threads at least + every `interval`. */ +#undef FORCE_SWITCHING +#define FORCE_SWITCHING + +struct _gil_runtime_state { +#ifdef Py_GIL_DISABLED + /* If this GIL is disabled, enabled == 0. + + If this GIL is enabled transiently (most likely to initialize a module + of unknown safety), enabled indicates the number of active transient + requests. + + If this GIL is enabled permanently, enabled == INT_MAX. + + It must not be modified directly; use _PyEval_EnableGILTransiently(), + _PyEval_EnableGILPermanently(), and _PyEval_DisableGIL() + + It is always read and written atomically, but a thread can assume its + value will be stable as long as that thread is attached or knows that no + other threads are attached (e.g., during a stop-the-world.). */ + int enabled; +#endif + /* microseconds (the Python API uses seconds, though) */ + unsigned long interval; + /* Last PyThreadState holding / having held the GIL. This helps us + know whether anyone else was scheduled after we dropped the GIL. */ + PyThreadState* last_holder; + /* Whether the GIL is already taken (-1 if uninitialized). This is + atomic because it can be read without any lock taken in ceval.c. */ + int locked; + /* Number of GIL switches since the beginning. */ + unsigned long switch_number; + /* This condition variable allows one or several threads to wait + until the GIL is released. In addition, the mutex also protects + the above variables. */ + PyCOND_T cond; + PyMUTEX_T mutex; +#ifdef FORCE_SWITCHING + /* This condition variable helps the GIL-releasing thread wait for + a GIL-awaiting thread to be scheduled and take the GIL. */ + PyCOND_T switch_cond; + PyMUTEX_T switch_mutex; +#endif +}; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_GIL_H */ diff --git a/Include/internal/pycore_global_objects.h b/Include/internal/pycore_global_objects.h new file mode 100644 index 0000000000000000000000000000000000000000..9d376e7db021efc929ac3f51e7dee0b3176f7b76 --- /dev/null +++ b/Include/internal/pycore_global_objects.h @@ -0,0 +1,105 @@ +#ifndef Py_INTERNAL_GLOBAL_OBJECTS_H +#define Py_INTERNAL_GLOBAL_OBJECTS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_context.h" // _PyContextTokenMissing +#include "pycore_gc.h" // _PyGC_Head_UNUSED +#include "pycore_global_strings.h" // struct _Py_global_strings +#include "pycore_hamt.h" // PyHamtNode_Bitmap +#include "pycore_hashtable.h" // _Py_hashtable_t +#include "pycore_typeobject.h" // pytype_slotdef + + +// These would be in pycore_long.h if it weren't for an include cycle. +#define _PY_NSMALLPOSINTS 257 +#define _PY_NSMALLNEGINTS 5 + + +// Only immutable objects should be considered runtime-global. +// All others must be per-interpreter. + +#define _Py_GLOBAL_OBJECT(NAME) \ + _PyRuntime.static_objects.NAME +#define _Py_SINGLETON(NAME) \ + _Py_GLOBAL_OBJECT(singletons.NAME) + +struct _Py_cached_objects { + // XXX We could statically allocate the hashtable. + _Py_hashtable_t *interned_strings; +}; + +struct _Py_static_objects { + struct { + /* Small integers are preallocated in this array so that they + * can be shared. + * The integers that are preallocated are those in the range + * -_PY_NSMALLNEGINTS (inclusive) to _PY_NSMALLPOSINTS (exclusive). + */ + PyLongObject small_ints[_PY_NSMALLNEGINTS + _PY_NSMALLPOSINTS]; + + PyBytesObject bytes_empty; + struct { + PyBytesObject ob; + char eos; + } bytes_characters[256]; + + struct _Py_global_strings strings; + + _PyGC_Head_UNUSED _tuple_empty_gc_not_used; + PyTupleObject tuple_empty; + + _PyGC_Head_UNUSED _hamt_bitmap_node_empty_gc_not_used; + PyHamtNode_Bitmap hamt_bitmap_node_empty; + _PyContextTokenMissing context_token_missing; + } singletons; +}; + +#define _Py_INTERP_CACHED_OBJECT(interp, NAME) \ + (interp)->cached_objects.NAME + +struct _Py_interp_cached_objects { + PyObject *interned_strings; + + /* AST */ + PyObject *_unused_str_replace_inf; // kept in 3.13 for ABI compatibility + + /* object.__reduce__ */ + PyObject *objreduce; + PyObject *type_slots_pname; + pytype_slotdef *type_slots_ptrs[MAX_EQUIV]; + + /* TypeVar and related types */ + PyTypeObject *generic_type; + PyTypeObject *typevar_type; + PyTypeObject *typevartuple_type; + PyTypeObject *paramspec_type; + PyTypeObject *paramspecargs_type; + PyTypeObject *paramspeckwargs_type; +}; + +#define _Py_INTERP_STATIC_OBJECT(interp, NAME) \ + (interp)->static_objects.NAME +#define _Py_INTERP_SINGLETON(interp, NAME) \ + _Py_INTERP_STATIC_OBJECT(interp, singletons.NAME) + +struct _Py_interp_static_objects { + struct { + int _not_used; + // hamt_empty is here instead of global because of its weakreflist. + _PyGC_Head_UNUSED _hamt_empty_gc_not_used; + PyHamtObject hamt_empty; + PyBaseExceptionObject last_resort_memory_error; + } singletons; +}; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_GLOBAL_OBJECTS_H */ diff --git a/Include/internal/pycore_global_objects_fini_generated.h b/Include/internal/pycore_global_objects_fini_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..cd56ffde9e555e607ad3c98c81c7e4b6441286a9 --- /dev/null +++ b/Include/internal/pycore_global_objects_fini_generated.h @@ -0,0 +1,1554 @@ +#ifndef Py_INTERNAL_GLOBAL_OBJECTS_FINI_GENERATED_INIT_H +#define Py_INTERNAL_GLOBAL_OBJECTS_FINI_GENERATED_INIT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_DEBUG +static inline void +_PyStaticObject_CheckRefcnt(PyObject *obj) { + if (Py_REFCNT(obj) < _Py_IMMORTAL_REFCNT) { + fprintf(stderr, "Immortal Object has less refcnt than expected.\n"); + _PyObject_Dump(obj); + } +} +#endif + +/* The following is auto-generated by Tools/build/generate_global_objects.py. */ +#ifdef Py_DEBUG +static inline void +_PyStaticObjects_CheckRefcnt(PyInterpreterState *interp) { + /* generated runtime-global */ + // (see pycore_runtime_init_generated.h) + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + -5]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + -4]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + -3]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + -2]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + -1]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 0]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 1]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 2]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 3]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 4]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 5]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 6]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 7]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 8]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 9]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 10]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 11]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 12]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 13]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 14]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 15]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 16]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 17]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 18]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 19]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 20]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 21]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 22]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 23]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 24]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 25]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 26]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 27]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 28]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 29]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 30]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 31]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 32]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 33]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 34]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 35]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 36]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 37]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 38]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 39]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 40]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 41]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 42]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 43]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 44]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 45]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 46]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 47]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 48]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 49]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 50]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 51]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 52]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 53]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 54]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 55]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 56]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 57]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 58]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 59]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 60]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 61]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 62]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 63]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 64]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 65]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 66]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 67]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 68]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 69]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 70]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 71]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 72]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 73]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 74]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 75]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 76]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 77]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 78]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 79]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 80]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 81]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 82]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 83]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 84]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 85]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 86]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 87]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 88]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 89]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 90]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 91]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 92]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 93]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 94]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 95]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 96]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 97]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 98]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 99]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 100]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 101]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 102]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 103]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 104]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 105]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 106]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 107]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 108]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 109]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 110]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 111]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 112]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 113]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 114]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 115]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 116]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 117]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 118]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 119]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 120]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 121]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 122]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 123]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 124]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 125]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 126]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 127]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 129]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 130]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 131]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 132]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 133]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 134]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 135]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 136]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 137]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 138]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 139]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 140]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 141]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 142]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 143]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 144]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 145]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 146]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 147]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 148]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 149]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 150]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 151]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 152]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 153]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 154]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 155]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 156]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 157]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 158]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 159]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 160]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 161]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 162]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 163]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 164]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 165]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 166]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 167]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 168]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 169]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 170]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 171]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 172]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 173]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 174]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 175]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 176]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 177]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 178]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 179]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 180]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 181]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 182]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 183]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 184]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 185]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 186]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 187]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 188]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 189]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 190]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 191]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 192]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 193]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 194]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 195]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 196]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 197]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 198]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 199]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 200]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 201]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 202]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 203]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 204]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 205]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 206]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 207]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 208]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 209]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 210]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 211]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 212]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 213]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 214]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 215]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 216]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 217]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 218]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 219]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 220]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 221]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 222]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 223]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 224]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 225]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 226]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 227]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 228]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 229]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 230]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 231]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 232]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 233]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 234]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 235]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 236]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 237]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 238]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 239]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 240]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 241]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 242]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 243]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 244]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 245]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 246]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 247]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 248]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 249]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 250]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 251]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 252]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 253]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 254]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 255]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(small_ints)[_PY_NSMALLNEGINTS + 256]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[0]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[1]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[2]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[3]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[4]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[5]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[6]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[7]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[8]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[9]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[10]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[11]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[12]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[13]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[14]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[15]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[16]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[17]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[18]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[19]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[20]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[21]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[22]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[23]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[24]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[25]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[26]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[27]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[28]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[29]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[30]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[31]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[32]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[33]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[34]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[35]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[36]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[37]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[38]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[39]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[40]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[41]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[42]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[43]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[44]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[45]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[46]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[47]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[48]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[49]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[50]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[51]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[52]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[53]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[54]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[55]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[56]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[57]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[58]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[59]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[60]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[61]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[62]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[63]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[64]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[65]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[66]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[67]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[68]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[69]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[70]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[71]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[72]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[73]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[74]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[75]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[76]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[77]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[78]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[79]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[80]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[81]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[82]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[83]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[84]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[85]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[86]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[87]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[88]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[89]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[90]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[91]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[92]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[93]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[94]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[95]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[96]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[97]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[98]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[99]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[100]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[101]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[102]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[103]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[104]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[105]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[106]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[107]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[108]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[109]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[110]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[111]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[112]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[113]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[114]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[115]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[116]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[117]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[118]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[119]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[120]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[121]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[122]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[123]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[124]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[125]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[126]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[127]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[129]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[130]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[131]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[132]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[133]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[134]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[135]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[136]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[137]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[138]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[139]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[140]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[141]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[142]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[143]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[144]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[145]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[146]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[147]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[148]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[149]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[150]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[151]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[152]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[153]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[154]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[155]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[156]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[157]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[158]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[159]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[160]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[161]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[162]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[163]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[164]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[165]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[166]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[167]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[168]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[169]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[170]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[171]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[172]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[173]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[174]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[175]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[176]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[177]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[178]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[179]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[180]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[181]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[182]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[183]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[184]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[185]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[186]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[187]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[188]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[189]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[190]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[191]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[192]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[193]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[194]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[195]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[196]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[197]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[198]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[199]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[200]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[201]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[202]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[203]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[204]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[205]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[206]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[207]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[208]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[209]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[210]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[211]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[212]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[213]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[214]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[215]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[216]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[217]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[218]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[219]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[220]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[221]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[222]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[223]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[224]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[225]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[226]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[227]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[228]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[229]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[230]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[231]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[232]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[233]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[234]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[235]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[236]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[237]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[238]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[239]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[240]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[241]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[242]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[243]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[244]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[245]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[246]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[247]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[248]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[249]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[250]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[251]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[252]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[253]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[254]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_characters)[255]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_dictcomp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_genexpr)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_lambda)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_listcomp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_module)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_null)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_setcomp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_string)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(anon_unknown)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dbl_close_br)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dbl_open_br)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dbl_percent)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(defaults)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(dot_locals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(empty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(generic_base)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(json_decoder)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(kwdefaults)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(list_err)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(str_replace_inf)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(type_params)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_STR(utf_8)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(CANCELLED)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(FINISHED)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(False)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(JSONDecodeError)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(PENDING)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(Py_Repr)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(TextIOWrapper)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(True)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(WarningMessage)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_WindowsConsoleIO)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__IOBase_closed)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__abc_tpflags__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__abs__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__abstractmethods__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__add__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__aenter__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__aexit__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__aiter__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__all__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__and__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__anext__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__annotations__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__args__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__await__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__bases__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__bool__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__buffer__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__build_class__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__builtins__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__bytes__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__call__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__cantrace__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__class__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__class_getitem__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__classcell__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__classdict__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__classdictcell__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__complex__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__contains__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__copy__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ctypes_from_outparam__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__del__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__delattr__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__delete__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__delitem__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__dict__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__dictoffset__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__dir__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__divmod__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__doc__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__enter__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__eq__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__exit__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__file__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__firstlineno__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__float__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__floordiv__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__format__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__fspath__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ge__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__get__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getattr__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getattribute__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getinitargs__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getitem__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getnewargs__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getnewargs_ex__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__getstate__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__gt__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__hash__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__iadd__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__iand__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ifloordiv__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ilshift__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__imatmul__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__imod__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__import__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__imul__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__index__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__init__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__init_subclass__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__instancecheck__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__int__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__invert__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ior__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ipow__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__irshift__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__isabstractmethod__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__isub__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__iter__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__itruediv__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ixor__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__le__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__len__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__length_hint__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__lltrace__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__loader__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__lshift__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__lt__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__main__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__match_args__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__matmul__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__missing__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__mod__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__module__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__mro_entries__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__mul__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__name__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ne__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__neg__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__new__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__newobj__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__newobj_ex__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__next__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__notes__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__or__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__orig_class__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__origin__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__package__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__parameters__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__path__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__pos__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__pow__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__prepare__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__qualname__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__radd__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rand__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rdivmod__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__reduce__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__reduce_ex__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__release_buffer__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__repr__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__reversed__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rfloordiv__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rlshift__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rmatmul__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rmod__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rmul__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__ror__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__round__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rpow__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rrshift__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rshift__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rsub__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rtruediv__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__rxor__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__set__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__set_name__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__setattr__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__setitem__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__setstate__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__sizeof__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__slotnames__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__slots__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__spec__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__static_attributes__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__str__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__sub__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasscheck__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__subclasshook__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__truediv__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__trunc__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__type_params__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_is_unpacked_typevartuple__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_prepare_subst__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_subst__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__typing_unpacked_tuple_args__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__warningregistry__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__weaklistoffset__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__weakref__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(__xor__)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_abc_impl)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_abstract_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_active)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_align_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_annotation)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_anonymous_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_argtypes_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_as_parameter_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_asyncio_future_blocking)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_blksize)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_bootstrap)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_check_retval_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_dealloc_warn)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_feature_version)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_field_types)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_fields_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_finalizing)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_find_and_load)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_fix_up_module)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_flags_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_get_sourcefile)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_handle_fromlist)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_initializing)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_io)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_is_text_encoding)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_length_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_limbo)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_lock_unlock_module)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_loop)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_needs_com_addref_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_only_immortal)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_pack_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_restype_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_showwarnmsg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_shutdown)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_slotnames)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_strptime)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_strptime_datetime)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_swappedbytes_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_type_)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_uninitialized_submodules)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_warn_unawaited_coroutine)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(_xoptions)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(abs_tol)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(access)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aclose)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(add_done_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_child)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(after_in_parent)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(aggregate_class)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(alias)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(allow_code)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(append)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(arg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(argdefs)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(args)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(arguments)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(argv)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(as_integer_ratio)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(asend)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ast)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(athrow)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(attribute)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(authorizer_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(autocommit)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(backtick)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(base)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(before)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(big)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(binary_form)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(block)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bound)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(buffer)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(buffer_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(buffer_size)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(buffering)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(buffers)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bufsize)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(builtins)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(byteorder)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(bytes_per_sep)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_call)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_exception)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(c_return)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_datetime_module)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cached_statements)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cadata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cafile)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(call)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(call_exception_handler)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(call_soon)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cancel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(capath)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(category)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cb_type)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(certfile)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(check_same_thread)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(clear)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(close)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(closed)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(closefd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(closure)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_argcount)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_cellvars)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_code)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_consts)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_exceptiontable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_filename)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_firstlineno)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_flags)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_freevars)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_kwonlyargcount)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_linetable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_name)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_names)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_nlocals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_posonlyargcount)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_qualname)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_stacksize)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(co_varnames)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(code)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(col_offset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(command)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(comment_factory)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(compile_mode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(consts)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(context)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(contravariant)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cookie)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(copy)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(copyreg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(coro)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(count)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(covariant)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(cwd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(data)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(database)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(day)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(decoder)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(default)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(defaultaction)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(delete)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(depth)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(desired_access)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(detect_types)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(deterministic)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(device)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dict)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dictcomp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(difference_update)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digest_size)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(digestmod)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dir_fd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(discard)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dispatch_table)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(displayhook)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dklen)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(doc)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dont_inherit)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dst)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(dst_dir_fd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eager_start)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(effective_ids)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(element_factory)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(encode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(encoding)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(end)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(end_col_offset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(end_lineno)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(end_offset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(endpos)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(entrypoint)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(env)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(errors)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(event)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(eventmask)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_type)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exc_value)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(excepthook)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exception)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(existing_file_name)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(exp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extend)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(extra_tokens)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(facility)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(factory)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(false)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(family)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fanout)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fd2)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fdel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fget)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(file)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(file_actions)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filename)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fileno)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filepath)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fillvalue)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filter)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(filters)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(final)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(find_class)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fix_imports)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flags)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(flush)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fold)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(follow_symlinks)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(format)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(from_param)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromlist)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromtimestamp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fromutc)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(fset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(func)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(future)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(generation)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(genexpr)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_debug)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_event_loop)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_loop)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(get_source)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(getattr)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(getstate)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(gid)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(globals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(groupindex)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(groups)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(handle)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(handle_seq)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(has_location)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hash_name)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(header)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(headers)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hi)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hook)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(hour)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ident)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(identity_hint)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ignore)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(imag)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(importlib)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(in_fd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(incoming)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(indexgroup)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inf)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(infer_variance)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inherit_handle)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inheritable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_bytes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_owner)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_state)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initial_value)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(initval)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(inner_size)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(input)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(insert_comments)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(insert_pis)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(instructions)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intern)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(intersection)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(interval)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(is_running)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isatty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isinstance)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isoformat)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(isolation_level)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(istext)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(item)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(items)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(iter)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(iterable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(iterations)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(join)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(jump)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keepends)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(key)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keyfile)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(keys)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kind)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kw2)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(kwdefaults)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(label)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lambda)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_exc)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_node)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_traceback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_type)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(last_value)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(latin1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(leaf_size)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(len)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(length)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(level)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(limit)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(line)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(line_buffering)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lineno)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(listcomp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(little)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(lo)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(locale)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(locals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(logoption)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(loop)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(manual_reset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mapping)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(match)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(max_length)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxdigits)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxevents)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxlen)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxmem)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxsplit)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(maxvalue)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(memLevel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(memlimit)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(message)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(metaclass)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(metadata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(method)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(microsecond)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(milliseconds)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(minute)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mod)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(module)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(module_globals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(modules)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(month)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mro)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(msg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mutex)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(mycmp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_arg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_fields)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_sequence_fields)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(n_unnamed_fields)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(name_from)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespace_separator)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(namespaces)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(narg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ndigits)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nested)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_file_name)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(new_limit)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(newline)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(newlines)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(next)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nlocals)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_depth)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(node_offset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ns)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nstype)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(nt)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(null)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(number)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(obj)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(object)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(offset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(offset_dst)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(offset_src)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(on_type_read)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(onceregistry)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(only_keys)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(oparg)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opcode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(open)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(opener)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(operation)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(optimize)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(options)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(order)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(origin)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(out_fd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(outgoing)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(overlapped)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(owner)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pages)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(parent)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(password)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(path)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pattern)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(peek)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(persistent_id)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(persistent_load)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(person)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pi_factory)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pid)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(policy)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pos)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pos1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(pos2)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(posix)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(print_file_and_line)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(priority)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_handler)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(progress_routine)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(proto)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(protocol)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ps1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(ps2)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(query)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(quotetabs)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(raw)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(read)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(read1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readall)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readinto)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readinto1)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readline)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(readonly)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(real)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reducer_override)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(registry)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(rel_tol)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(release)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reload)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(repl)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(replace)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reserved)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reset)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(resetids)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(return)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reverse)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(reversed)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(salt)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sched_priority)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(scheduler)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(second)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(security_attributes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seek)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(seekable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(selectors)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(self)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(send)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sep)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sequence)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(server_hostname)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(server_side)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(session)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setcomp)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setpgroup)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setsid)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setsigdef)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setsigmask)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(setstate)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(shape)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(show_cmd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(signed)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(size)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sizehint)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(skip_file_prefixes)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sleep)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sock)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sort)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(source)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(source_traceback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(spam)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(src_dir_fd)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stacklevel)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(start)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(statement)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(status)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stderr)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stdin)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(stdout)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(step)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(steps)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(store_name)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strategy)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strftime)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(strict_mode)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(string)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(sub_key)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(symmetric_difference_update)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tabsize)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tag)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(target_is_directory)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(task)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tb_frame)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tb_lasti)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tb_lineno)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tb_next)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tell)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(template)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(term)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(text)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(threading)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(throw)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timeout)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(times)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(timetuple)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(top)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trace_callback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(traceback)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(trailers)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(translate)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(true)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(truncate)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(twice)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(txt)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(type)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(type_params)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tz)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tzinfo)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(tzname)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(uid)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(unlink)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(unraisablehook)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(uri)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(usedforsecurity)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(value)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(values)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(version)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(volume)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wait_all)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(warn_on_full_buffer)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(warnings)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(warnoptions)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(wbits)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(week)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(weekday)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(which)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(who)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(withdata)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(writable)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(write_through)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(year)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_ID(zdict)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[0]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[1]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[2]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[3]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[4]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[5]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[6]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[7]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[8]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[9]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[10]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[11]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[12]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[13]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[14]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[15]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[16]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[17]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[18]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[19]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[20]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[21]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[22]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[23]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[24]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[25]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[26]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[27]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[28]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[29]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[30]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[31]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[32]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[33]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[34]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[35]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[36]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[37]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[38]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[39]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[40]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[41]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[42]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[43]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[44]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[45]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[46]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[47]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[48]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[49]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[50]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[51]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[52]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[53]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[54]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[55]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[56]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[57]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[58]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[59]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[60]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[61]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[62]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[63]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[64]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[65]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[66]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[67]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[68]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[69]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[70]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[71]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[72]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[73]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[74]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[75]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[76]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[77]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[78]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[79]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[80]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[81]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[82]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[83]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[84]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[85]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[86]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[87]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[88]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[89]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[90]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[91]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[92]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[93]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[94]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[95]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[96]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[97]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[98]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[99]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[100]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[101]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[102]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[103]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[104]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[105]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[106]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[107]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[108]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[109]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[110]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[111]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[112]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[113]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[114]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[115]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[116]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[117]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[118]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[119]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[120]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[121]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[122]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[123]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[124]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[125]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[126]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).ascii[127]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[128 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[129 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[130 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[131 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[132 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[133 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[134 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[135 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[136 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[137 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[138 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[139 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[140 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[141 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[142 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[143 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[144 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[145 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[146 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[147 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[148 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[149 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[150 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[151 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[152 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[153 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[154 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[155 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[156 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[157 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[158 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[159 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[160 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[161 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[162 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[163 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[164 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[165 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[166 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[167 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[168 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[169 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[170 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[171 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[172 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[173 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[174 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[175 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[176 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[177 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[178 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[179 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[180 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[181 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[182 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[183 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[184 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[185 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[186 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[187 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[188 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[189 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[190 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[191 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[192 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[193 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[194 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[195 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[196 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[197 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[198 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[199 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[200 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[201 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[202 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[203 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[204 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[205 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[206 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[207 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[208 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[209 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[210 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[211 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[212 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[213 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[214 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[215 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[216 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[217 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[218 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[219 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[220 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[221 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[222 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[223 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[224 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[225 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[226 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[227 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[228 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[229 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[230 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[231 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[232 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[233 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[234 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[235 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[236 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[237 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[238 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[239 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[240 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[241 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[242 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[243 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[244 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[245 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[246 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[247 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[248 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[249 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[250 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[251 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[252 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[253 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[254 - 128]); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(strings).latin1[255 - 128]); + /* non-generated */ + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(bytes_empty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(tuple_empty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(hamt_bitmap_node_empty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_INTERP_SINGLETON(interp, hamt_empty)); + _PyStaticObject_CheckRefcnt((PyObject *)&_Py_SINGLETON(context_token_missing)); +} +#endif // Py_DEBUG +/* End auto-generated code */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_GLOBAL_OBJECTS_FINI_GENERATED_INIT_H */ diff --git a/Include/internal/pycore_global_strings.h b/Include/internal/pycore_global_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..cad2d1a8d22049ffee8b257b6a1db4fef26ac365 --- /dev/null +++ b/Include/internal/pycore_global_strings.h @@ -0,0 +1,814 @@ +#ifndef Py_INTERNAL_GLOBAL_STRINGS_H +#define Py_INTERNAL_GLOBAL_STRINGS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// The data structure & init here are inspired by Tools/build/deepfreeze.py. + +// All field names generated by ASCII_STR() have a common prefix, +// to help avoid collisions with keywords, macros, etc. + +#define STRUCT_FOR_ASCII_STR(LITERAL) \ + struct { \ + PyASCIIObject _ascii; \ + uint8_t _data[sizeof(LITERAL)]; \ + } +#define STRUCT_FOR_STR(NAME, LITERAL) \ + STRUCT_FOR_ASCII_STR(LITERAL) _py_ ## NAME; +#define STRUCT_FOR_ID(NAME) \ + STRUCT_FOR_ASCII_STR(#NAME) _py_ ## NAME; + +// XXX Order by frequency of use? + +/* The following is auto-generated by Tools/build/generate_global_objects.py. */ +struct _Py_global_strings { + struct { + STRUCT_FOR_STR(anon_dictcomp, "") + STRUCT_FOR_STR(anon_genexpr, "") + STRUCT_FOR_STR(anon_lambda, "") + STRUCT_FOR_STR(anon_listcomp, "") + STRUCT_FOR_STR(anon_module, "") + STRUCT_FOR_STR(anon_null, "") + STRUCT_FOR_STR(anon_setcomp, "") + STRUCT_FOR_STR(anon_string, "") + STRUCT_FOR_STR(anon_unknown, "") + STRUCT_FOR_STR(dbl_close_br, "}}") + STRUCT_FOR_STR(dbl_open_br, "{{") + STRUCT_FOR_STR(dbl_percent, "%%") + STRUCT_FOR_STR(defaults, ".defaults") + STRUCT_FOR_STR(dot_locals, ".") + STRUCT_FOR_STR(empty, "") + STRUCT_FOR_STR(generic_base, ".generic_base") + STRUCT_FOR_STR(json_decoder, "json.decoder") + STRUCT_FOR_STR(kwdefaults, ".kwdefaults") + STRUCT_FOR_STR(list_err, "list index out of range") + STRUCT_FOR_STR(str_replace_inf, "1e309") + STRUCT_FOR_STR(type_params, ".type_params") + STRUCT_FOR_STR(utf_8, "utf-8") + } literals; + + struct { + STRUCT_FOR_ID(CANCELLED) + STRUCT_FOR_ID(FINISHED) + STRUCT_FOR_ID(False) + STRUCT_FOR_ID(JSONDecodeError) + STRUCT_FOR_ID(PENDING) + STRUCT_FOR_ID(Py_Repr) + STRUCT_FOR_ID(TextIOWrapper) + STRUCT_FOR_ID(True) + STRUCT_FOR_ID(WarningMessage) + STRUCT_FOR_ID(_WindowsConsoleIO) + STRUCT_FOR_ID(__IOBase_closed) + STRUCT_FOR_ID(__abc_tpflags__) + STRUCT_FOR_ID(__abs__) + STRUCT_FOR_ID(__abstractmethods__) + STRUCT_FOR_ID(__add__) + STRUCT_FOR_ID(__aenter__) + STRUCT_FOR_ID(__aexit__) + STRUCT_FOR_ID(__aiter__) + STRUCT_FOR_ID(__all__) + STRUCT_FOR_ID(__and__) + STRUCT_FOR_ID(__anext__) + STRUCT_FOR_ID(__annotations__) + STRUCT_FOR_ID(__args__) + STRUCT_FOR_ID(__await__) + STRUCT_FOR_ID(__bases__) + STRUCT_FOR_ID(__bool__) + STRUCT_FOR_ID(__buffer__) + STRUCT_FOR_ID(__build_class__) + STRUCT_FOR_ID(__builtins__) + STRUCT_FOR_ID(__bytes__) + STRUCT_FOR_ID(__call__) + STRUCT_FOR_ID(__cantrace__) + STRUCT_FOR_ID(__class__) + STRUCT_FOR_ID(__class_getitem__) + STRUCT_FOR_ID(__classcell__) + STRUCT_FOR_ID(__classdict__) + STRUCT_FOR_ID(__classdictcell__) + STRUCT_FOR_ID(__complex__) + STRUCT_FOR_ID(__contains__) + STRUCT_FOR_ID(__copy__) + STRUCT_FOR_ID(__ctypes_from_outparam__) + STRUCT_FOR_ID(__del__) + STRUCT_FOR_ID(__delattr__) + STRUCT_FOR_ID(__delete__) + STRUCT_FOR_ID(__delitem__) + STRUCT_FOR_ID(__dict__) + STRUCT_FOR_ID(__dictoffset__) + STRUCT_FOR_ID(__dir__) + STRUCT_FOR_ID(__divmod__) + STRUCT_FOR_ID(__doc__) + STRUCT_FOR_ID(__enter__) + STRUCT_FOR_ID(__eq__) + STRUCT_FOR_ID(__exit__) + STRUCT_FOR_ID(__file__) + STRUCT_FOR_ID(__firstlineno__) + STRUCT_FOR_ID(__float__) + STRUCT_FOR_ID(__floordiv__) + STRUCT_FOR_ID(__format__) + STRUCT_FOR_ID(__fspath__) + STRUCT_FOR_ID(__ge__) + STRUCT_FOR_ID(__get__) + STRUCT_FOR_ID(__getattr__) + STRUCT_FOR_ID(__getattribute__) + STRUCT_FOR_ID(__getinitargs__) + STRUCT_FOR_ID(__getitem__) + STRUCT_FOR_ID(__getnewargs__) + STRUCT_FOR_ID(__getnewargs_ex__) + STRUCT_FOR_ID(__getstate__) + STRUCT_FOR_ID(__gt__) + STRUCT_FOR_ID(__hash__) + STRUCT_FOR_ID(__iadd__) + STRUCT_FOR_ID(__iand__) + STRUCT_FOR_ID(__ifloordiv__) + STRUCT_FOR_ID(__ilshift__) + STRUCT_FOR_ID(__imatmul__) + STRUCT_FOR_ID(__imod__) + STRUCT_FOR_ID(__import__) + STRUCT_FOR_ID(__imul__) + STRUCT_FOR_ID(__index__) + STRUCT_FOR_ID(__init__) + STRUCT_FOR_ID(__init_subclass__) + STRUCT_FOR_ID(__instancecheck__) + STRUCT_FOR_ID(__int__) + STRUCT_FOR_ID(__invert__) + STRUCT_FOR_ID(__ior__) + STRUCT_FOR_ID(__ipow__) + STRUCT_FOR_ID(__irshift__) + STRUCT_FOR_ID(__isabstractmethod__) + STRUCT_FOR_ID(__isub__) + STRUCT_FOR_ID(__iter__) + STRUCT_FOR_ID(__itruediv__) + STRUCT_FOR_ID(__ixor__) + STRUCT_FOR_ID(__le__) + STRUCT_FOR_ID(__len__) + STRUCT_FOR_ID(__length_hint__) + STRUCT_FOR_ID(__lltrace__) + STRUCT_FOR_ID(__loader__) + STRUCT_FOR_ID(__lshift__) + STRUCT_FOR_ID(__lt__) + STRUCT_FOR_ID(__main__) + STRUCT_FOR_ID(__match_args__) + STRUCT_FOR_ID(__matmul__) + STRUCT_FOR_ID(__missing__) + STRUCT_FOR_ID(__mod__) + STRUCT_FOR_ID(__module__) + STRUCT_FOR_ID(__mro_entries__) + STRUCT_FOR_ID(__mul__) + STRUCT_FOR_ID(__name__) + STRUCT_FOR_ID(__ne__) + STRUCT_FOR_ID(__neg__) + STRUCT_FOR_ID(__new__) + STRUCT_FOR_ID(__newobj__) + STRUCT_FOR_ID(__newobj_ex__) + STRUCT_FOR_ID(__next__) + STRUCT_FOR_ID(__notes__) + STRUCT_FOR_ID(__or__) + STRUCT_FOR_ID(__orig_class__) + STRUCT_FOR_ID(__origin__) + STRUCT_FOR_ID(__package__) + STRUCT_FOR_ID(__parameters__) + STRUCT_FOR_ID(__path__) + STRUCT_FOR_ID(__pos__) + STRUCT_FOR_ID(__pow__) + STRUCT_FOR_ID(__prepare__) + STRUCT_FOR_ID(__qualname__) + STRUCT_FOR_ID(__radd__) + STRUCT_FOR_ID(__rand__) + STRUCT_FOR_ID(__rdivmod__) + STRUCT_FOR_ID(__reduce__) + STRUCT_FOR_ID(__reduce_ex__) + STRUCT_FOR_ID(__release_buffer__) + STRUCT_FOR_ID(__repr__) + STRUCT_FOR_ID(__reversed__) + STRUCT_FOR_ID(__rfloordiv__) + STRUCT_FOR_ID(__rlshift__) + STRUCT_FOR_ID(__rmatmul__) + STRUCT_FOR_ID(__rmod__) + STRUCT_FOR_ID(__rmul__) + STRUCT_FOR_ID(__ror__) + STRUCT_FOR_ID(__round__) + STRUCT_FOR_ID(__rpow__) + STRUCT_FOR_ID(__rrshift__) + STRUCT_FOR_ID(__rshift__) + STRUCT_FOR_ID(__rsub__) + STRUCT_FOR_ID(__rtruediv__) + STRUCT_FOR_ID(__rxor__) + STRUCT_FOR_ID(__set__) + STRUCT_FOR_ID(__set_name__) + STRUCT_FOR_ID(__setattr__) + STRUCT_FOR_ID(__setitem__) + STRUCT_FOR_ID(__setstate__) + STRUCT_FOR_ID(__sizeof__) + STRUCT_FOR_ID(__slotnames__) + STRUCT_FOR_ID(__slots__) + STRUCT_FOR_ID(__spec__) + STRUCT_FOR_ID(__static_attributes__) + STRUCT_FOR_ID(__str__) + STRUCT_FOR_ID(__sub__) + STRUCT_FOR_ID(__subclasscheck__) + STRUCT_FOR_ID(__subclasshook__) + STRUCT_FOR_ID(__truediv__) + STRUCT_FOR_ID(__trunc__) + STRUCT_FOR_ID(__type_params__) + STRUCT_FOR_ID(__typing_is_unpacked_typevartuple__) + STRUCT_FOR_ID(__typing_prepare_subst__) + STRUCT_FOR_ID(__typing_subst__) + STRUCT_FOR_ID(__typing_unpacked_tuple_args__) + STRUCT_FOR_ID(__warningregistry__) + STRUCT_FOR_ID(__weaklistoffset__) + STRUCT_FOR_ID(__weakref__) + STRUCT_FOR_ID(__xor__) + STRUCT_FOR_ID(_abc_impl) + STRUCT_FOR_ID(_abstract_) + STRUCT_FOR_ID(_active) + STRUCT_FOR_ID(_align_) + STRUCT_FOR_ID(_annotation) + STRUCT_FOR_ID(_anonymous_) + STRUCT_FOR_ID(_argtypes_) + STRUCT_FOR_ID(_as_parameter_) + STRUCT_FOR_ID(_asyncio_future_blocking) + STRUCT_FOR_ID(_blksize) + STRUCT_FOR_ID(_bootstrap) + STRUCT_FOR_ID(_check_retval_) + STRUCT_FOR_ID(_dealloc_warn) + STRUCT_FOR_ID(_feature_version) + STRUCT_FOR_ID(_field_types) + STRUCT_FOR_ID(_fields_) + STRUCT_FOR_ID(_finalizing) + STRUCT_FOR_ID(_find_and_load) + STRUCT_FOR_ID(_fix_up_module) + STRUCT_FOR_ID(_flags_) + STRUCT_FOR_ID(_get_sourcefile) + STRUCT_FOR_ID(_handle_fromlist) + STRUCT_FOR_ID(_initializing) + STRUCT_FOR_ID(_io) + STRUCT_FOR_ID(_is_text_encoding) + STRUCT_FOR_ID(_length_) + STRUCT_FOR_ID(_limbo) + STRUCT_FOR_ID(_lock_unlock_module) + STRUCT_FOR_ID(_loop) + STRUCT_FOR_ID(_needs_com_addref_) + STRUCT_FOR_ID(_only_immortal) + STRUCT_FOR_ID(_pack_) + STRUCT_FOR_ID(_restype_) + STRUCT_FOR_ID(_showwarnmsg) + STRUCT_FOR_ID(_shutdown) + STRUCT_FOR_ID(_slotnames) + STRUCT_FOR_ID(_strptime) + STRUCT_FOR_ID(_strptime_datetime) + STRUCT_FOR_ID(_swappedbytes_) + STRUCT_FOR_ID(_type_) + STRUCT_FOR_ID(_uninitialized_submodules) + STRUCT_FOR_ID(_warn_unawaited_coroutine) + STRUCT_FOR_ID(_xoptions) + STRUCT_FOR_ID(abs_tol) + STRUCT_FOR_ID(access) + STRUCT_FOR_ID(aclose) + STRUCT_FOR_ID(add) + STRUCT_FOR_ID(add_done_callback) + STRUCT_FOR_ID(after_in_child) + STRUCT_FOR_ID(after_in_parent) + STRUCT_FOR_ID(aggregate_class) + STRUCT_FOR_ID(alias) + STRUCT_FOR_ID(allow_code) + STRUCT_FOR_ID(append) + STRUCT_FOR_ID(arg) + STRUCT_FOR_ID(argdefs) + STRUCT_FOR_ID(args) + STRUCT_FOR_ID(arguments) + STRUCT_FOR_ID(argv) + STRUCT_FOR_ID(as_integer_ratio) + STRUCT_FOR_ID(asend) + STRUCT_FOR_ID(ast) + STRUCT_FOR_ID(athrow) + STRUCT_FOR_ID(attribute) + STRUCT_FOR_ID(authorizer_callback) + STRUCT_FOR_ID(autocommit) + STRUCT_FOR_ID(backtick) + STRUCT_FOR_ID(base) + STRUCT_FOR_ID(before) + STRUCT_FOR_ID(big) + STRUCT_FOR_ID(binary_form) + STRUCT_FOR_ID(block) + STRUCT_FOR_ID(bound) + STRUCT_FOR_ID(buffer) + STRUCT_FOR_ID(buffer_callback) + STRUCT_FOR_ID(buffer_size) + STRUCT_FOR_ID(buffering) + STRUCT_FOR_ID(buffers) + STRUCT_FOR_ID(bufsize) + STRUCT_FOR_ID(builtins) + STRUCT_FOR_ID(byteorder) + STRUCT_FOR_ID(bytes) + STRUCT_FOR_ID(bytes_per_sep) + STRUCT_FOR_ID(c_call) + STRUCT_FOR_ID(c_exception) + STRUCT_FOR_ID(c_return) + STRUCT_FOR_ID(cached_datetime_module) + STRUCT_FOR_ID(cached_statements) + STRUCT_FOR_ID(cadata) + STRUCT_FOR_ID(cafile) + STRUCT_FOR_ID(call) + STRUCT_FOR_ID(call_exception_handler) + STRUCT_FOR_ID(call_soon) + STRUCT_FOR_ID(callback) + STRUCT_FOR_ID(cancel) + STRUCT_FOR_ID(capath) + STRUCT_FOR_ID(category) + STRUCT_FOR_ID(cb_type) + STRUCT_FOR_ID(certfile) + STRUCT_FOR_ID(check_same_thread) + STRUCT_FOR_ID(clear) + STRUCT_FOR_ID(close) + STRUCT_FOR_ID(closed) + STRUCT_FOR_ID(closefd) + STRUCT_FOR_ID(closure) + STRUCT_FOR_ID(co_argcount) + STRUCT_FOR_ID(co_cellvars) + STRUCT_FOR_ID(co_code) + STRUCT_FOR_ID(co_consts) + STRUCT_FOR_ID(co_exceptiontable) + STRUCT_FOR_ID(co_filename) + STRUCT_FOR_ID(co_firstlineno) + STRUCT_FOR_ID(co_flags) + STRUCT_FOR_ID(co_freevars) + STRUCT_FOR_ID(co_kwonlyargcount) + STRUCT_FOR_ID(co_linetable) + STRUCT_FOR_ID(co_name) + STRUCT_FOR_ID(co_names) + STRUCT_FOR_ID(co_nlocals) + STRUCT_FOR_ID(co_posonlyargcount) + STRUCT_FOR_ID(co_qualname) + STRUCT_FOR_ID(co_stacksize) + STRUCT_FOR_ID(co_varnames) + STRUCT_FOR_ID(code) + STRUCT_FOR_ID(col_offset) + STRUCT_FOR_ID(command) + STRUCT_FOR_ID(comment_factory) + STRUCT_FOR_ID(compile_mode) + STRUCT_FOR_ID(consts) + STRUCT_FOR_ID(context) + STRUCT_FOR_ID(contravariant) + STRUCT_FOR_ID(cookie) + STRUCT_FOR_ID(copy) + STRUCT_FOR_ID(copyreg) + STRUCT_FOR_ID(coro) + STRUCT_FOR_ID(count) + STRUCT_FOR_ID(covariant) + STRUCT_FOR_ID(cwd) + STRUCT_FOR_ID(data) + STRUCT_FOR_ID(database) + STRUCT_FOR_ID(day) + STRUCT_FOR_ID(decode) + STRUCT_FOR_ID(decoder) + STRUCT_FOR_ID(default) + STRUCT_FOR_ID(defaultaction) + STRUCT_FOR_ID(delete) + STRUCT_FOR_ID(depth) + STRUCT_FOR_ID(desired_access) + STRUCT_FOR_ID(detect_types) + STRUCT_FOR_ID(deterministic) + STRUCT_FOR_ID(device) + STRUCT_FOR_ID(dict) + STRUCT_FOR_ID(dictcomp) + STRUCT_FOR_ID(difference_update) + STRUCT_FOR_ID(digest) + STRUCT_FOR_ID(digest_size) + STRUCT_FOR_ID(digestmod) + STRUCT_FOR_ID(dir_fd) + STRUCT_FOR_ID(discard) + STRUCT_FOR_ID(dispatch_table) + STRUCT_FOR_ID(displayhook) + STRUCT_FOR_ID(dklen) + STRUCT_FOR_ID(doc) + STRUCT_FOR_ID(dont_inherit) + STRUCT_FOR_ID(dst) + STRUCT_FOR_ID(dst_dir_fd) + STRUCT_FOR_ID(eager_start) + STRUCT_FOR_ID(effective_ids) + STRUCT_FOR_ID(element_factory) + STRUCT_FOR_ID(encode) + STRUCT_FOR_ID(encoding) + STRUCT_FOR_ID(end) + STRUCT_FOR_ID(end_col_offset) + STRUCT_FOR_ID(end_lineno) + STRUCT_FOR_ID(end_offset) + STRUCT_FOR_ID(endpos) + STRUCT_FOR_ID(entrypoint) + STRUCT_FOR_ID(env) + STRUCT_FOR_ID(errors) + STRUCT_FOR_ID(event) + STRUCT_FOR_ID(eventmask) + STRUCT_FOR_ID(exc_type) + STRUCT_FOR_ID(exc_value) + STRUCT_FOR_ID(excepthook) + STRUCT_FOR_ID(exception) + STRUCT_FOR_ID(existing_file_name) + STRUCT_FOR_ID(exp) + STRUCT_FOR_ID(extend) + STRUCT_FOR_ID(extra_tokens) + STRUCT_FOR_ID(facility) + STRUCT_FOR_ID(factory) + STRUCT_FOR_ID(false) + STRUCT_FOR_ID(family) + STRUCT_FOR_ID(fanout) + STRUCT_FOR_ID(fd) + STRUCT_FOR_ID(fd2) + STRUCT_FOR_ID(fdel) + STRUCT_FOR_ID(fget) + STRUCT_FOR_ID(file) + STRUCT_FOR_ID(file_actions) + STRUCT_FOR_ID(filename) + STRUCT_FOR_ID(fileno) + STRUCT_FOR_ID(filepath) + STRUCT_FOR_ID(fillvalue) + STRUCT_FOR_ID(filter) + STRUCT_FOR_ID(filters) + STRUCT_FOR_ID(final) + STRUCT_FOR_ID(find_class) + STRUCT_FOR_ID(fix_imports) + STRUCT_FOR_ID(flags) + STRUCT_FOR_ID(flush) + STRUCT_FOR_ID(fold) + STRUCT_FOR_ID(follow_symlinks) + STRUCT_FOR_ID(format) + STRUCT_FOR_ID(from_param) + STRUCT_FOR_ID(fromlist) + STRUCT_FOR_ID(fromtimestamp) + STRUCT_FOR_ID(fromutc) + STRUCT_FOR_ID(fset) + STRUCT_FOR_ID(func) + STRUCT_FOR_ID(future) + STRUCT_FOR_ID(generation) + STRUCT_FOR_ID(genexpr) + STRUCT_FOR_ID(get) + STRUCT_FOR_ID(get_debug) + STRUCT_FOR_ID(get_event_loop) + STRUCT_FOR_ID(get_loop) + STRUCT_FOR_ID(get_source) + STRUCT_FOR_ID(getattr) + STRUCT_FOR_ID(getstate) + STRUCT_FOR_ID(gid) + STRUCT_FOR_ID(globals) + STRUCT_FOR_ID(groupindex) + STRUCT_FOR_ID(groups) + STRUCT_FOR_ID(handle) + STRUCT_FOR_ID(handle_seq) + STRUCT_FOR_ID(has_location) + STRUCT_FOR_ID(hash_name) + STRUCT_FOR_ID(header) + STRUCT_FOR_ID(headers) + STRUCT_FOR_ID(hi) + STRUCT_FOR_ID(hook) + STRUCT_FOR_ID(hour) + STRUCT_FOR_ID(ident) + STRUCT_FOR_ID(identity_hint) + STRUCT_FOR_ID(ignore) + STRUCT_FOR_ID(imag) + STRUCT_FOR_ID(importlib) + STRUCT_FOR_ID(in_fd) + STRUCT_FOR_ID(incoming) + STRUCT_FOR_ID(indexgroup) + STRUCT_FOR_ID(inf) + STRUCT_FOR_ID(infer_variance) + STRUCT_FOR_ID(inherit_handle) + STRUCT_FOR_ID(inheritable) + STRUCT_FOR_ID(initial) + STRUCT_FOR_ID(initial_bytes) + STRUCT_FOR_ID(initial_owner) + STRUCT_FOR_ID(initial_state) + STRUCT_FOR_ID(initial_value) + STRUCT_FOR_ID(initval) + STRUCT_FOR_ID(inner_size) + STRUCT_FOR_ID(input) + STRUCT_FOR_ID(insert_comments) + STRUCT_FOR_ID(insert_pis) + STRUCT_FOR_ID(instructions) + STRUCT_FOR_ID(intern) + STRUCT_FOR_ID(intersection) + STRUCT_FOR_ID(interval) + STRUCT_FOR_ID(is_running) + STRUCT_FOR_ID(isatty) + STRUCT_FOR_ID(isinstance) + STRUCT_FOR_ID(isoformat) + STRUCT_FOR_ID(isolation_level) + STRUCT_FOR_ID(istext) + STRUCT_FOR_ID(item) + STRUCT_FOR_ID(items) + STRUCT_FOR_ID(iter) + STRUCT_FOR_ID(iterable) + STRUCT_FOR_ID(iterations) + STRUCT_FOR_ID(join) + STRUCT_FOR_ID(jump) + STRUCT_FOR_ID(keepends) + STRUCT_FOR_ID(key) + STRUCT_FOR_ID(keyfile) + STRUCT_FOR_ID(keys) + STRUCT_FOR_ID(kind) + STRUCT_FOR_ID(kw) + STRUCT_FOR_ID(kw1) + STRUCT_FOR_ID(kw2) + STRUCT_FOR_ID(kwdefaults) + STRUCT_FOR_ID(label) + STRUCT_FOR_ID(lambda) + STRUCT_FOR_ID(last) + STRUCT_FOR_ID(last_exc) + STRUCT_FOR_ID(last_node) + STRUCT_FOR_ID(last_traceback) + STRUCT_FOR_ID(last_type) + STRUCT_FOR_ID(last_value) + STRUCT_FOR_ID(latin1) + STRUCT_FOR_ID(leaf_size) + STRUCT_FOR_ID(len) + STRUCT_FOR_ID(length) + STRUCT_FOR_ID(level) + STRUCT_FOR_ID(limit) + STRUCT_FOR_ID(line) + STRUCT_FOR_ID(line_buffering) + STRUCT_FOR_ID(lineno) + STRUCT_FOR_ID(listcomp) + STRUCT_FOR_ID(little) + STRUCT_FOR_ID(lo) + STRUCT_FOR_ID(locale) + STRUCT_FOR_ID(locals) + STRUCT_FOR_ID(logoption) + STRUCT_FOR_ID(loop) + STRUCT_FOR_ID(manual_reset) + STRUCT_FOR_ID(mapping) + STRUCT_FOR_ID(match) + STRUCT_FOR_ID(max_length) + STRUCT_FOR_ID(maxdigits) + STRUCT_FOR_ID(maxevents) + STRUCT_FOR_ID(maxlen) + STRUCT_FOR_ID(maxmem) + STRUCT_FOR_ID(maxsplit) + STRUCT_FOR_ID(maxvalue) + STRUCT_FOR_ID(memLevel) + STRUCT_FOR_ID(memlimit) + STRUCT_FOR_ID(message) + STRUCT_FOR_ID(metaclass) + STRUCT_FOR_ID(metadata) + STRUCT_FOR_ID(method) + STRUCT_FOR_ID(microsecond) + STRUCT_FOR_ID(milliseconds) + STRUCT_FOR_ID(minute) + STRUCT_FOR_ID(mod) + STRUCT_FOR_ID(mode) + STRUCT_FOR_ID(module) + STRUCT_FOR_ID(module_globals) + STRUCT_FOR_ID(modules) + STRUCT_FOR_ID(month) + STRUCT_FOR_ID(mro) + STRUCT_FOR_ID(msg) + STRUCT_FOR_ID(mutex) + STRUCT_FOR_ID(mycmp) + STRUCT_FOR_ID(n_arg) + STRUCT_FOR_ID(n_fields) + STRUCT_FOR_ID(n_sequence_fields) + STRUCT_FOR_ID(n_unnamed_fields) + STRUCT_FOR_ID(name) + STRUCT_FOR_ID(name_from) + STRUCT_FOR_ID(namespace_separator) + STRUCT_FOR_ID(namespaces) + STRUCT_FOR_ID(narg) + STRUCT_FOR_ID(ndigits) + STRUCT_FOR_ID(nested) + STRUCT_FOR_ID(new_file_name) + STRUCT_FOR_ID(new_limit) + STRUCT_FOR_ID(newline) + STRUCT_FOR_ID(newlines) + STRUCT_FOR_ID(next) + STRUCT_FOR_ID(nlocals) + STRUCT_FOR_ID(node_depth) + STRUCT_FOR_ID(node_offset) + STRUCT_FOR_ID(ns) + STRUCT_FOR_ID(nstype) + STRUCT_FOR_ID(nt) + STRUCT_FOR_ID(null) + STRUCT_FOR_ID(number) + STRUCT_FOR_ID(obj) + STRUCT_FOR_ID(object) + STRUCT_FOR_ID(offset) + STRUCT_FOR_ID(offset_dst) + STRUCT_FOR_ID(offset_src) + STRUCT_FOR_ID(on_type_read) + STRUCT_FOR_ID(onceregistry) + STRUCT_FOR_ID(only_keys) + STRUCT_FOR_ID(oparg) + STRUCT_FOR_ID(opcode) + STRUCT_FOR_ID(open) + STRUCT_FOR_ID(opener) + STRUCT_FOR_ID(operation) + STRUCT_FOR_ID(optimize) + STRUCT_FOR_ID(options) + STRUCT_FOR_ID(order) + STRUCT_FOR_ID(origin) + STRUCT_FOR_ID(out_fd) + STRUCT_FOR_ID(outgoing) + STRUCT_FOR_ID(overlapped) + STRUCT_FOR_ID(owner) + STRUCT_FOR_ID(pages) + STRUCT_FOR_ID(parent) + STRUCT_FOR_ID(password) + STRUCT_FOR_ID(path) + STRUCT_FOR_ID(pattern) + STRUCT_FOR_ID(peek) + STRUCT_FOR_ID(persistent_id) + STRUCT_FOR_ID(persistent_load) + STRUCT_FOR_ID(person) + STRUCT_FOR_ID(pi_factory) + STRUCT_FOR_ID(pid) + STRUCT_FOR_ID(policy) + STRUCT_FOR_ID(pos) + STRUCT_FOR_ID(pos1) + STRUCT_FOR_ID(pos2) + STRUCT_FOR_ID(posix) + STRUCT_FOR_ID(print_file_and_line) + STRUCT_FOR_ID(priority) + STRUCT_FOR_ID(progress) + STRUCT_FOR_ID(progress_handler) + STRUCT_FOR_ID(progress_routine) + STRUCT_FOR_ID(proto) + STRUCT_FOR_ID(protocol) + STRUCT_FOR_ID(ps1) + STRUCT_FOR_ID(ps2) + STRUCT_FOR_ID(query) + STRUCT_FOR_ID(quotetabs) + STRUCT_FOR_ID(raw) + STRUCT_FOR_ID(read) + STRUCT_FOR_ID(read1) + STRUCT_FOR_ID(readable) + STRUCT_FOR_ID(readall) + STRUCT_FOR_ID(readinto) + STRUCT_FOR_ID(readinto1) + STRUCT_FOR_ID(readline) + STRUCT_FOR_ID(readonly) + STRUCT_FOR_ID(real) + STRUCT_FOR_ID(reducer_override) + STRUCT_FOR_ID(registry) + STRUCT_FOR_ID(rel_tol) + STRUCT_FOR_ID(release) + STRUCT_FOR_ID(reload) + STRUCT_FOR_ID(repl) + STRUCT_FOR_ID(replace) + STRUCT_FOR_ID(reserved) + STRUCT_FOR_ID(reset) + STRUCT_FOR_ID(resetids) + STRUCT_FOR_ID(return) + STRUCT_FOR_ID(reverse) + STRUCT_FOR_ID(reversed) + STRUCT_FOR_ID(salt) + STRUCT_FOR_ID(sched_priority) + STRUCT_FOR_ID(scheduler) + STRUCT_FOR_ID(second) + STRUCT_FOR_ID(security_attributes) + STRUCT_FOR_ID(seek) + STRUCT_FOR_ID(seekable) + STRUCT_FOR_ID(selectors) + STRUCT_FOR_ID(self) + STRUCT_FOR_ID(send) + STRUCT_FOR_ID(sep) + STRUCT_FOR_ID(sequence) + STRUCT_FOR_ID(server_hostname) + STRUCT_FOR_ID(server_side) + STRUCT_FOR_ID(session) + STRUCT_FOR_ID(setcomp) + STRUCT_FOR_ID(setpgroup) + STRUCT_FOR_ID(setsid) + STRUCT_FOR_ID(setsigdef) + STRUCT_FOR_ID(setsigmask) + STRUCT_FOR_ID(setstate) + STRUCT_FOR_ID(shape) + STRUCT_FOR_ID(show_cmd) + STRUCT_FOR_ID(signed) + STRUCT_FOR_ID(size) + STRUCT_FOR_ID(sizehint) + STRUCT_FOR_ID(skip_file_prefixes) + STRUCT_FOR_ID(sleep) + STRUCT_FOR_ID(sock) + STRUCT_FOR_ID(sort) + STRUCT_FOR_ID(source) + STRUCT_FOR_ID(source_traceback) + STRUCT_FOR_ID(spam) + STRUCT_FOR_ID(src) + STRUCT_FOR_ID(src_dir_fd) + STRUCT_FOR_ID(stacklevel) + STRUCT_FOR_ID(start) + STRUCT_FOR_ID(statement) + STRUCT_FOR_ID(status) + STRUCT_FOR_ID(stderr) + STRUCT_FOR_ID(stdin) + STRUCT_FOR_ID(stdout) + STRUCT_FOR_ID(step) + STRUCT_FOR_ID(steps) + STRUCT_FOR_ID(store_name) + STRUCT_FOR_ID(strategy) + STRUCT_FOR_ID(strftime) + STRUCT_FOR_ID(strict) + STRUCT_FOR_ID(strict_mode) + STRUCT_FOR_ID(string) + STRUCT_FOR_ID(sub_key) + STRUCT_FOR_ID(symmetric_difference_update) + STRUCT_FOR_ID(tabsize) + STRUCT_FOR_ID(tag) + STRUCT_FOR_ID(target) + STRUCT_FOR_ID(target_is_directory) + STRUCT_FOR_ID(task) + STRUCT_FOR_ID(tb_frame) + STRUCT_FOR_ID(tb_lasti) + STRUCT_FOR_ID(tb_lineno) + STRUCT_FOR_ID(tb_next) + STRUCT_FOR_ID(tell) + STRUCT_FOR_ID(template) + STRUCT_FOR_ID(term) + STRUCT_FOR_ID(text) + STRUCT_FOR_ID(threading) + STRUCT_FOR_ID(throw) + STRUCT_FOR_ID(timeout) + STRUCT_FOR_ID(times) + STRUCT_FOR_ID(timetuple) + STRUCT_FOR_ID(top) + STRUCT_FOR_ID(trace_callback) + STRUCT_FOR_ID(traceback) + STRUCT_FOR_ID(trailers) + STRUCT_FOR_ID(translate) + STRUCT_FOR_ID(true) + STRUCT_FOR_ID(truncate) + STRUCT_FOR_ID(twice) + STRUCT_FOR_ID(txt) + STRUCT_FOR_ID(type) + STRUCT_FOR_ID(type_params) + STRUCT_FOR_ID(tz) + STRUCT_FOR_ID(tzinfo) + STRUCT_FOR_ID(tzname) + STRUCT_FOR_ID(uid) + STRUCT_FOR_ID(unlink) + STRUCT_FOR_ID(unraisablehook) + STRUCT_FOR_ID(uri) + STRUCT_FOR_ID(usedforsecurity) + STRUCT_FOR_ID(value) + STRUCT_FOR_ID(values) + STRUCT_FOR_ID(version) + STRUCT_FOR_ID(volume) + STRUCT_FOR_ID(wait_all) + STRUCT_FOR_ID(warn_on_full_buffer) + STRUCT_FOR_ID(warnings) + STRUCT_FOR_ID(warnoptions) + STRUCT_FOR_ID(wbits) + STRUCT_FOR_ID(week) + STRUCT_FOR_ID(weekday) + STRUCT_FOR_ID(which) + STRUCT_FOR_ID(who) + STRUCT_FOR_ID(withdata) + STRUCT_FOR_ID(writable) + STRUCT_FOR_ID(write) + STRUCT_FOR_ID(write_through) + STRUCT_FOR_ID(year) + STRUCT_FOR_ID(zdict) + } identifiers; + struct { + PyASCIIObject _ascii; + uint8_t _data[2]; + } ascii[128]; + struct { + PyCompactUnicodeObject _latin1; + uint8_t _data[2]; + } latin1[128]; +}; +/* End auto-generated code */ + +#undef ID +#undef STR + + +#define _Py_ID(NAME) \ + (_Py_SINGLETON(strings.identifiers._py_ ## NAME._ascii.ob_base)) +#define _Py_STR(NAME) \ + (_Py_SINGLETON(strings.literals._py_ ## NAME._ascii.ob_base)) +#define _Py_LATIN1_CHR(CH) \ + ((CH) < 128 \ + ? (PyObject*)&_Py_SINGLETON(strings).ascii[(CH)] \ + : (PyObject*)&_Py_SINGLETON(strings).latin1[(CH) - 128]) + +/* _Py_DECLARE_STR() should precede all uses of _Py_STR() in a function. + + This is true even if the same string has already been declared + elsewhere, even in the same file. Mismatched duplicates are detected + by Tools/scripts/generate-global-objects.py. + + Pairing _Py_DECLARE_STR() with every use of _Py_STR() makes sure the + string keeps working even if the declaration is removed somewhere + else. It also makes it clear what the actual string is at every + place it is being used. */ +#define _Py_DECLARE_STR(name, str) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_GLOBAL_STRINGS_H */ diff --git a/Include/internal/pycore_hamt.h b/Include/internal/pycore_hamt.h new file mode 100644 index 0000000000000000000000000000000000000000..d8742c7cb6357833c7d1b5326646ef6375953e46 --- /dev/null +++ b/Include/internal/pycore_hamt.h @@ -0,0 +1,134 @@ +#ifndef Py_INTERNAL_HAMT_H +#define Py_INTERNAL_HAMT_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* +HAMT tree is shaped by hashes of keys. Every group of 5 bits of a hash denotes +the exact position of the key in one level of the tree. Since we're using +32 bit hashes, we can have at most 7 such levels. Although if there are +two distinct keys with equal hashes, they will have to occupy the same +cell in the 7th level of the tree -- so we'd put them in a "collision" node. +Which brings the total possible tree depth to 8. Read more about the actual +layout of the HAMT tree in `hamt.c`. + +This constant is used to define a datastucture for storing iteration state. +*/ +#define _Py_HAMT_MAX_TREE_DEPTH 8 + + +extern PyTypeObject _PyHamt_Type; +extern PyTypeObject _PyHamt_ArrayNode_Type; +extern PyTypeObject _PyHamt_BitmapNode_Type; +extern PyTypeObject _PyHamt_CollisionNode_Type; +extern PyTypeObject _PyHamtKeys_Type; +extern PyTypeObject _PyHamtValues_Type; +extern PyTypeObject _PyHamtItems_Type; + + +/* other API */ + +#define PyHamt_Check(o) Py_IS_TYPE((o), &_PyHamt_Type) + + +/* Abstract tree node. */ +typedef struct { + PyObject_HEAD +} PyHamtNode; + + +/* An HAMT immutable mapping collection. */ +typedef struct { + PyObject_HEAD + PyHamtNode *h_root; + PyObject *h_weakreflist; + Py_ssize_t h_count; +} PyHamtObject; + + +typedef struct { + PyObject_VAR_HEAD + uint32_t b_bitmap; + PyObject *b_array[1]; +} PyHamtNode_Bitmap; + + +/* A struct to hold the state of depth-first traverse of the tree. + + HAMT is an immutable collection. Iterators will hold a strong reference + to it, and every node in the HAMT has strong references to its children. + + So for iterators, we can implement zero allocations and zero reference + inc/dec depth-first iteration. + + - i_nodes: an array of seven pointers to tree nodes + - i_level: the current node in i_nodes + - i_pos: an array of positions within nodes in i_nodes. +*/ +typedef struct { + PyHamtNode *i_nodes[_Py_HAMT_MAX_TREE_DEPTH]; + Py_ssize_t i_pos[_Py_HAMT_MAX_TREE_DEPTH]; + int8_t i_level; +} PyHamtIteratorState; + + +/* Base iterator object. + + Contains the iteration state, a pointer to the HAMT tree, + and a pointer to the 'yield function'. The latter is a simple + function that returns a key/value tuple for the 'Items' iterator, + just a key for the 'Keys' iterator, and a value for the 'Values' + iterator. +*/ +typedef struct { + PyObject_HEAD + PyHamtObject *hi_obj; + PyHamtIteratorState hi_iter; + binaryfunc hi_yield; +} PyHamtIterator; + + +/* Create a new HAMT immutable mapping. */ +PyHamtObject * _PyHamt_New(void); + +/* Return a new collection based on "o", but with an additional + key/val pair. */ +PyHamtObject * _PyHamt_Assoc(PyHamtObject *o, PyObject *key, PyObject *val); + +/* Return a new collection based on "o", but without "key". */ +PyHamtObject * _PyHamt_Without(PyHamtObject *o, PyObject *key); + +/* Find "key" in the "o" collection. + + Return: + - -1: An error occurred. + - 0: "key" wasn't found in "o". + - 1: "key" is in "o"; "*val" is set to its value (a borrowed ref). +*/ +int _PyHamt_Find(PyHamtObject *o, PyObject *key, PyObject **val); + +/* Check if "v" is equal to "w". + + Return: + - 0: v != w + - 1: v == w + - -1: An error occurred. +*/ +int _PyHamt_Eq(PyHamtObject *v, PyHamtObject *w); + +/* Return the size of "o"; equivalent of "len(o)". */ +Py_ssize_t _PyHamt_Len(PyHamtObject *o); + +/* Return a Keys iterator over "o". */ +PyObject * _PyHamt_NewIterKeys(PyHamtObject *o); + +/* Return a Values iterator over "o". */ +PyObject * _PyHamt_NewIterValues(PyHamtObject *o); + +/* Return a Items iterator over "o". */ +PyObject * _PyHamt_NewIterItems(PyHamtObject *o); + +#endif /* !Py_INTERNAL_HAMT_H */ diff --git a/Include/internal/pycore_hashtable.h b/Include/internal/pycore_hashtable.h new file mode 100644 index 0000000000000000000000000000000000000000..369d49c42bbfccde2e72c2adf5611a5322fb06c6 --- /dev/null +++ b/Include/internal/pycore_hashtable.h @@ -0,0 +1,150 @@ +#ifndef Py_INTERNAL_HASHTABLE_H +#define Py_INTERNAL_HASHTABLE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* Single linked list */ + +typedef struct _Py_slist_item_s { + struct _Py_slist_item_s *next; +} _Py_slist_item_t; + +typedef struct { + _Py_slist_item_t *head; +} _Py_slist_t; + +#define _Py_SLIST_ITEM_NEXT(ITEM) _Py_RVALUE(((_Py_slist_item_t *)(ITEM))->next) + +#define _Py_SLIST_HEAD(SLIST) _Py_RVALUE(((_Py_slist_t *)(SLIST))->head) + + +/* _Py_hashtable: table entry */ + +typedef struct { + /* used by _Py_hashtable_t.buckets to link entries */ + _Py_slist_item_t _Py_slist_item; + + Py_uhash_t key_hash; + void *key; + void *value; +} _Py_hashtable_entry_t; + + +/* _Py_hashtable: prototypes */ + +/* Forward declaration */ +struct _Py_hashtable_t; +typedef struct _Py_hashtable_t _Py_hashtable_t; + +typedef Py_uhash_t (*_Py_hashtable_hash_func) (const void *key); +typedef int (*_Py_hashtable_compare_func) (const void *key1, const void *key2); +typedef void (*_Py_hashtable_destroy_func) (void *key); +typedef _Py_hashtable_entry_t* (*_Py_hashtable_get_entry_func)(_Py_hashtable_t *ht, + const void *key); + +typedef struct { + // Allocate a memory block + void* (*malloc) (size_t size); + + // Release a memory block + void (*free) (void *ptr); +} _Py_hashtable_allocator_t; + + +/* _Py_hashtable: table */ +struct _Py_hashtable_t { + size_t nentries; // Total number of entries in the table + size_t nbuckets; + _Py_slist_t *buckets; + + _Py_hashtable_get_entry_func get_entry_func; + _Py_hashtable_hash_func hash_func; + _Py_hashtable_compare_func compare_func; + _Py_hashtable_destroy_func key_destroy_func; + _Py_hashtable_destroy_func value_destroy_func; + _Py_hashtable_allocator_t alloc; +}; + +// Export _Py_hashtable functions for '_testinternalcapi' shared extension +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new( + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func); + +/* Hash a pointer (void*) */ +PyAPI_FUNC(Py_uhash_t) _Py_hashtable_hash_ptr(const void *key); + +/* Comparison using memcmp() */ +PyAPI_FUNC(int) _Py_hashtable_compare_direct( + const void *key1, + const void *key2); + +PyAPI_FUNC(_Py_hashtable_t *) _Py_hashtable_new_full( + _Py_hashtable_hash_func hash_func, + _Py_hashtable_compare_func compare_func, + _Py_hashtable_destroy_func key_destroy_func, + _Py_hashtable_destroy_func value_destroy_func, + _Py_hashtable_allocator_t *allocator); + +PyAPI_FUNC(void) _Py_hashtable_destroy(_Py_hashtable_t *ht); + +PyAPI_FUNC(void) _Py_hashtable_clear(_Py_hashtable_t *ht); + +typedef int (*_Py_hashtable_foreach_func) (_Py_hashtable_t *ht, + const void *key, const void *value, + void *user_data); + +/* Call func() on each entry of the hashtable. + Iteration stops if func() result is non-zero, in this case it's the result + of the call. Otherwise, the function returns 0. */ +PyAPI_FUNC(int) _Py_hashtable_foreach( + _Py_hashtable_t *ht, + _Py_hashtable_foreach_func func, + void *user_data); + +PyAPI_FUNC(size_t) _Py_hashtable_size(const _Py_hashtable_t *ht); +PyAPI_FUNC(size_t) _Py_hashtable_len(const _Py_hashtable_t *ht); + +/* Add a new entry to the hash. The key must not be present in the hash table. + Return 0 on success, -1 on memory error. */ +PyAPI_FUNC(int) _Py_hashtable_set( + _Py_hashtable_t *ht, + const void *key, + void *value); + + +/* Get an entry. + Return NULL if the key does not exist. */ +static inline _Py_hashtable_entry_t * +_Py_hashtable_get_entry(_Py_hashtable_t *ht, const void *key) +{ + return ht->get_entry_func(ht, key); +} + + +/* Get value from an entry. + Return NULL if the entry is not found. + + Use _Py_hashtable_get_entry() to distinguish entry value equal to NULL + and entry not found. */ +PyAPI_FUNC(void*) _Py_hashtable_get(_Py_hashtable_t *ht, const void *key); + + +/* Remove a key and its associated value without calling key and value destroy + functions. + + Return the removed value if the key was found. + Return NULL if the key was not found. */ +PyAPI_FUNC(void*) _Py_hashtable_steal( + _Py_hashtable_t *ht, + const void *key); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_HASHTABLE_H */ diff --git a/Include/internal/pycore_identifier.h b/Include/internal/pycore_identifier.h new file mode 100644 index 0000000000000000000000000000000000000000..cda28810a48196227bcb54bf5028f18e6bb34e42 --- /dev/null +++ b/Include/internal/pycore_identifier.h @@ -0,0 +1,20 @@ +/* String Literals: _Py_Identifier API */ + +#ifndef Py_INTERNAL_IDENTIFIER_H +#define Py_INTERNAL_IDENTIFIER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyObject* _PyType_LookupId(PyTypeObject *, _Py_Identifier *); +extern PyObject* _PyObject_LookupSpecialId(PyObject *, _Py_Identifier *); +extern int _PyObject_SetAttrId(PyObject *, _Py_Identifier *, PyObject *); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_IDENTIFIER_H diff --git a/Include/internal/pycore_import.h b/Include/internal/pycore_import.h new file mode 100644 index 0000000000000000000000000000000000000000..55029abdd31b5a479f16a048d402c44c2d7affca --- /dev/null +++ b/Include/internal/pycore_import.h @@ -0,0 +1,213 @@ +#ifndef Py_LIMITED_API +#ifndef Py_INTERNAL_IMPORT_H +#define Py_INTERNAL_IMPORT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_hashtable.h" // _Py_hashtable_t + +extern int _PyImport_IsInitialized(PyInterpreterState *); + +// Export for 'pyexpat' shared extension +PyAPI_FUNC(int) _PyImport_SetModule(PyObject *name, PyObject *module); + +extern int _PyImport_SetModuleString(const char *name, PyObject* module); + +extern void _PyImport_AcquireLock(PyInterpreterState *interp); +extern void _PyImport_ReleaseLock(PyInterpreterState *interp); +extern void _PyImport_ReInitLock(PyInterpreterState *interp); + +// This is used exclusively for the sys and builtins modules: +extern int _PyImport_FixupBuiltin( + PyThreadState *tstate, + PyObject *mod, + const char *name, /* UTF-8 encoded string */ + PyObject *modules + ); + +// Export for many shared extensions, like '_json' +PyAPI_FUNC(PyObject*) _PyImport_GetModuleAttr(PyObject *, PyObject *); + +// Export for many shared extensions, like '_datetime' +PyAPI_FUNC(PyObject*) _PyImport_GetModuleAttrString(const char *, const char *); + + +struct _import_runtime_state { + /* The builtin modules (defined in config.c). */ + struct _inittab *inittab; + /* The most recent value assigned to a PyModuleDef.m_base.m_index. + This is incremented each time PyModuleDef_Init() is called, + which is just about every time an extension module is imported. + See PyInterpreterState.modules_by_index for more info. */ + Py_ssize_t last_module_index; + struct { + /* A lock to guard the cache. */ + PyMutex mutex; + /* The actual cache of (filename, name, PyModuleDef) for modules. + Only legacy (single-phase init) extension modules are added + and only if they support multiple initialization (m_size >- 0) + or are imported in the main interpreter. + This is initialized lazily in fix_up_extension() in import.c. + Modules are added there and looked up in _imp.find_extension(). */ + _Py_hashtable_t *hashtable; + } extensions; + /* Package context -- the full module name for package imports */ + const char * pkgcontext; +}; + +struct _import_state { + /* cached sys.modules dictionary */ + PyObject *modules; + /* This is the list of module objects for all legacy (single-phase init) + extension modules ever loaded in this process (i.e. imported + in this interpreter or in any other). Py_None stands in for + modules that haven't actually been imported in this interpreter. + + A module's index (PyModuleDef.m_base.m_index) is used to look up + the corresponding module object for this interpreter, if any. + (See PyState_FindModule().) When any extension module + is initialized during import, its moduledef gets initialized by + PyModuleDef_Init(), and the first time that happens for each + PyModuleDef, its index gets set to the current value of + a global counter (see _PyRuntimeState.imports.last_module_index). + The entry for that index in this interpreter remains unset until + the module is actually imported here. (Py_None is used as + a placeholder.) Note that multi-phase init modules always get + an index for which there will never be a module set. + + This is initialized lazily in PyState_AddModule(), which is also + where modules get added. */ + PyObject *modules_by_index; + /* importlib module._bootstrap */ + PyObject *importlib; + /* override for config->use_frozen_modules (for tests) + (-1: "off", 1: "on", 0: no override) */ + int override_frozen_modules; + int override_multi_interp_extensions_check; +#ifdef HAVE_DLOPEN + int dlopenflags; +#endif + PyObject *import_func; + /* The global import lock. */ + _PyRecursiveMutex lock; + /* diagnostic info in PyImport_ImportModuleLevelObject() */ + struct { + int import_level; + PyTime_t accumulated; + int header; + } find_and_load; +}; + +#ifdef HAVE_DLOPEN +# include // RTLD_NOW, RTLD_LAZY +# if HAVE_DECL_RTLD_NOW +# define _Py_DLOPEN_FLAGS RTLD_NOW +# else +# define _Py_DLOPEN_FLAGS RTLD_LAZY +# endif +# define DLOPENFLAGS_INIT .dlopenflags = _Py_DLOPEN_FLAGS, +#else +# define _Py_DLOPEN_FLAGS 0 +# define DLOPENFLAGS_INIT +#endif + +#define IMPORTS_INIT \ + { \ + DLOPENFLAGS_INIT \ + .find_and_load = { \ + .header = 1, \ + }, \ + } + +extern void _PyImport_ClearCore(PyInterpreterState *interp); + +extern Py_ssize_t _PyImport_GetNextModuleIndex(void); +extern const char * _PyImport_ResolveNameWithPackageContext(const char *name); +extern const char * _PyImport_SwapPackageContext(const char *newcontext); + +extern int _PyImport_GetDLOpenFlags(PyInterpreterState *interp); +extern void _PyImport_SetDLOpenFlags(PyInterpreterState *interp, int new_val); + +extern PyObject * _PyImport_InitModules(PyInterpreterState *interp); +extern PyObject * _PyImport_GetModules(PyInterpreterState *interp); +extern void _PyImport_ClearModules(PyInterpreterState *interp); + +extern void _PyImport_ClearModulesByIndex(PyInterpreterState *interp); + +extern int _PyImport_InitDefaultImportFunc(PyInterpreterState *interp); +extern int _PyImport_IsDefaultImportFunc( + PyInterpreterState *interp, + PyObject *func); + +extern PyObject * _PyImport_GetImportlibLoader( + PyInterpreterState *interp, + const char *loader_name); +extern PyObject * _PyImport_GetImportlibExternalLoader( + PyInterpreterState *interp, + const char *loader_name); +extern PyObject * _PyImport_BlessMyLoader( + PyInterpreterState *interp, + PyObject *module_globals); +extern PyObject * _PyImport_ImportlibModuleRepr( + PyInterpreterState *interp, + PyObject *module); + + +extern PyStatus _PyImport_Init(void); +extern void _PyImport_Fini(void); +extern void _PyImport_Fini2(void); + +extern PyStatus _PyImport_InitCore( + PyThreadState *tstate, + PyObject *sysmod, + int importlib); +extern PyStatus _PyImport_InitExternal(PyThreadState *tstate); +extern void _PyImport_FiniCore(PyInterpreterState *interp); +extern void _PyImport_FiniExternal(PyInterpreterState *interp); + + +extern PyObject* _PyImport_GetBuiltinModuleNames(void); + +struct _module_alias { + const char *name; /* ASCII encoded string */ + const char *orig; /* ASCII encoded string */ +}; + +// Export these 3 symbols for test_ctypes +PyAPI_DATA(const struct _frozen*) _PyImport_FrozenBootstrap; +PyAPI_DATA(const struct _frozen*) _PyImport_FrozenStdlib; +PyAPI_DATA(const struct _frozen*) _PyImport_FrozenTest; + +extern const struct _module_alias * _PyImport_FrozenAliases; + +extern int _PyImport_CheckSubinterpIncompatibleExtensionAllowed( + const char *name); + + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(int) _PyImport_ClearExtension(PyObject *name, PyObject *filename); + +#ifdef Py_GIL_DISABLED +// Assuming that the GIL is enabled from a call to +// _PyEval_EnableGILTransient(), resolve the transient request depending on the +// state of the module argument: +// - If module is NULL or a PyModuleObject with md_gil == Py_MOD_GIL_NOT_USED, +// call _PyEval_DisableGIL(). +// - Otherwise, call _PyEval_EnableGILPermanent(). If the GIL was not already +// enabled permanently, issue a warning referencing the module's name. +// +// This function may raise an exception. +extern int _PyImport_CheckGILForModule(PyObject *module, PyObject *module_name); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_IMPORT_H */ +#endif /* !Py_LIMITED_API */ diff --git a/Include/internal/pycore_importdl.h b/Include/internal/pycore_importdl.h new file mode 100644 index 0000000000000000000000000000000000000000..525a16f6b97274668ac9b4f8b4fd90502c8f10dd --- /dev/null +++ b/Include/internal/pycore_importdl.h @@ -0,0 +1,139 @@ +#ifndef Py_INTERNAL_IMPORTDL_H +#define Py_INTERNAL_IMPORTDL_H + +#include "patchlevel.h" // PY_MAJOR_VERSION + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +extern const char *_PyImport_DynLoadFiletab[]; + + +typedef enum ext_module_kind { + _Py_ext_module_kind_UNKNOWN = 0, + _Py_ext_module_kind_SINGLEPHASE = 1, + _Py_ext_module_kind_MULTIPHASE = 2, + _Py_ext_module_kind_INVALID = 3, +} _Py_ext_module_kind; + +typedef enum ext_module_origin { + _Py_ext_module_origin_CORE = 1, + _Py_ext_module_origin_BUILTIN = 2, + _Py_ext_module_origin_DYNAMIC = 3, +} _Py_ext_module_origin; + +/* Input for loading an extension module. */ +struct _Py_ext_module_loader_info { + PyObject *filename; +#ifndef MS_WINDOWS + PyObject *filename_encoded; +#endif + PyObject *name; + PyObject *name_encoded; + /* path is always a borrowed ref of name or filename, + * depending on if it's builtin or not. */ + PyObject *path; + _Py_ext_module_origin origin; + const char *hook_prefix; + const char *newcontext; +}; +extern void _Py_ext_module_loader_info_clear( + struct _Py_ext_module_loader_info *info); +extern int _Py_ext_module_loader_info_init( + struct _Py_ext_module_loader_info *info, + PyObject *name, + PyObject *filename, + _Py_ext_module_origin origin); +extern int _Py_ext_module_loader_info_init_for_core( + struct _Py_ext_module_loader_info *p_info, + PyObject *name); +extern int _Py_ext_module_loader_info_init_for_builtin( + struct _Py_ext_module_loader_info *p_info, + PyObject *name); +#ifdef HAVE_DYNAMIC_LOADING +extern int _Py_ext_module_loader_info_init_from_spec( + struct _Py_ext_module_loader_info *info, + PyObject *spec); +#endif + +/* The result from running an extension module's init function. */ +struct _Py_ext_module_loader_result { + PyModuleDef *def; + PyObject *module; + _Py_ext_module_kind kind; + struct _Py_ext_module_loader_result_error *err; + struct _Py_ext_module_loader_result_error { + enum _Py_ext_module_loader_result_error_kind { + _Py_ext_module_loader_result_EXCEPTION = 0, + _Py_ext_module_loader_result_ERR_MISSING = 1, + _Py_ext_module_loader_result_ERR_UNREPORTED_EXC = 2, + _Py_ext_module_loader_result_ERR_UNINITIALIZED = 3, + _Py_ext_module_loader_result_ERR_NONASCII_NOT_MULTIPHASE = 4, + _Py_ext_module_loader_result_ERR_NOT_MODULE = 5, + _Py_ext_module_loader_result_ERR_MISSING_DEF = 6, + } kind; + PyObject *exc; + } _err; +}; +extern void _Py_ext_module_loader_result_clear( + struct _Py_ext_module_loader_result *res); +extern void _Py_ext_module_loader_result_apply_error( + struct _Py_ext_module_loader_result *res, + const char *name); + +/* The module init function. */ +typedef PyObject *(*PyModInitFunction)(void); +#ifdef HAVE_DYNAMIC_LOADING +extern PyModInitFunction _PyImport_GetModInitFunc( + struct _Py_ext_module_loader_info *info, + FILE *fp); +#endif +extern int _PyImport_RunModInitFunc( + PyModInitFunction p0, + struct _Py_ext_module_loader_info *info, + struct _Py_ext_module_loader_result *p_res); + + +/* Max length of module suffix searched for -- accommodates "module.slb" */ +#define MAXSUFFIXSIZE 12 + +#ifdef MS_WINDOWS +#include +typedef FARPROC dl_funcptr; + +#ifdef _DEBUG +# define PYD_DEBUG_SUFFIX "_d" +#else +# define PYD_DEBUG_SUFFIX "" +#endif + +#ifdef Py_GIL_DISABLED +# define PYD_THREADING_TAG "t" +#else +# define PYD_THREADING_TAG "" +#endif + +#ifdef PYD_PLATFORM_TAG +# define PYD_SOABI "cp" Py_STRINGIFY(PY_MAJOR_VERSION) Py_STRINGIFY(PY_MINOR_VERSION) PYD_THREADING_TAG "-" PYD_PLATFORM_TAG +#else +# define PYD_SOABI "cp" Py_STRINGIFY(PY_MAJOR_VERSION) Py_STRINGIFY(PY_MINOR_VERSION) PYD_THREADING_TAG +#endif + +#define PYD_TAGGED_SUFFIX PYD_DEBUG_SUFFIX "." PYD_SOABI ".pyd" +#define PYD_UNTAGGED_SUFFIX PYD_DEBUG_SUFFIX ".pyd" + +#else +typedef void (*dl_funcptr)(void); +#endif + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_IMPORTDL_H */ diff --git a/Include/internal/pycore_initconfig.h b/Include/internal/pycore_initconfig.h new file mode 100644 index 0000000000000000000000000000000000000000..1c68161341860aa4588e7fdf576737fa01cce99b --- /dev/null +++ b/Include/internal/pycore_initconfig.h @@ -0,0 +1,200 @@ +#ifndef Py_INTERNAL_CORECONFIG_H +#define Py_INTERNAL_CORECONFIG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* Forward declaration */ +struct pyruntimestate; + +/* --- PyStatus ----------------------------------------------- */ + +/* Almost all errors causing Python initialization to fail */ +#ifdef _MSC_VER + /* Visual Studio 2015 doesn't implement C99 __func__ in C */ +# define _PyStatus_GET_FUNC() __FUNCTION__ +#else +# define _PyStatus_GET_FUNC() __func__ +#endif + +#define _PyStatus_OK() \ + (PyStatus){._type = _PyStatus_TYPE_OK} + /* other fields are set to 0 */ +#define _PyStatus_ERR(ERR_MSG) \ + (PyStatus){ \ + ._type = _PyStatus_TYPE_ERROR, \ + .func = _PyStatus_GET_FUNC(), \ + .err_msg = (ERR_MSG)} + /* other fields are set to 0 */ +#define _PyStatus_NO_MEMORY_ERRMSG "memory allocation failed" +#define _PyStatus_NO_MEMORY() _PyStatus_ERR(_PyStatus_NO_MEMORY_ERRMSG) +#define _PyStatus_EXIT(EXITCODE) \ + (PyStatus){ \ + ._type = _PyStatus_TYPE_EXIT, \ + .exitcode = (EXITCODE)} +#define _PyStatus_IS_ERROR(err) \ + ((err)._type == _PyStatus_TYPE_ERROR) +#define _PyStatus_IS_EXIT(err) \ + ((err)._type == _PyStatus_TYPE_EXIT) +#define _PyStatus_EXCEPTION(err) \ + ((err)._type != _PyStatus_TYPE_OK) +#define _PyStatus_UPDATE_FUNC(err) \ + do { (err).func = _PyStatus_GET_FUNC(); } while (0) + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(void) _PyErr_SetFromPyStatus(PyStatus status); + + +/* --- PyWideStringList ------------------------------------------------ */ + +#define _PyWideStringList_INIT (PyWideStringList){.length = 0, .items = NULL} + +#ifndef NDEBUG +extern int _PyWideStringList_CheckConsistency(const PyWideStringList *list); +#endif +extern void _PyWideStringList_Clear(PyWideStringList *list); +extern int _PyWideStringList_Copy(PyWideStringList *list, + const PyWideStringList *list2); +extern PyStatus _PyWideStringList_Extend(PyWideStringList *list, + const PyWideStringList *list2); +extern PyObject* _PyWideStringList_AsList(const PyWideStringList *list); + + +/* --- _PyArgv ---------------------------------------------------- */ + +typedef struct _PyArgv { + Py_ssize_t argc; + int use_bytes_argv; + char * const *bytes_argv; + wchar_t * const *wchar_argv; +} _PyArgv; + +extern PyStatus _PyArgv_AsWstrList(const _PyArgv *args, + PyWideStringList *list); + + +/* --- Helper functions ------------------------------------------- */ + +extern int _Py_str_to_int( + const char *str, + int *result); +extern const wchar_t* _Py_get_xoption( + const PyWideStringList *xoptions, + const wchar_t *name); +extern const char* _Py_GetEnv( + int use_environment, + const char *name); +extern void _Py_get_env_flag( + int use_environment, + int *flag, + const char *name); + +/* Py_GetArgcArgv() helper */ +extern void _Py_ClearArgcArgv(void); + + +/* --- _PyPreCmdline ------------------------------------------------- */ + +typedef struct { + PyWideStringList argv; + PyWideStringList xoptions; /* "-X value" option */ + int isolated; /* -I option */ + int use_environment; /* -E option */ + int dev_mode; /* -X dev and PYTHONDEVMODE */ + int warn_default_encoding; /* -X warn_default_encoding and PYTHONWARNDEFAULTENCODING */ +} _PyPreCmdline; + +#define _PyPreCmdline_INIT \ + (_PyPreCmdline){ \ + .use_environment = -1, \ + .isolated = -1, \ + .dev_mode = -1} +/* Note: _PyPreCmdline_INIT sets other fields to 0/NULL */ + +extern void _PyPreCmdline_Clear(_PyPreCmdline *cmdline); +extern PyStatus _PyPreCmdline_SetArgv(_PyPreCmdline *cmdline, + const _PyArgv *args); +extern PyStatus _PyPreCmdline_SetConfig( + const _PyPreCmdline *cmdline, + PyConfig *config); +extern PyStatus _PyPreCmdline_Read(_PyPreCmdline *cmdline, + const PyPreConfig *preconfig); + + +/* --- PyPreConfig ----------------------------------------------- */ + +// Export for '_testembed' program +PyAPI_FUNC(void) _PyPreConfig_InitCompatConfig(PyPreConfig *preconfig); + +extern void _PyPreConfig_InitFromConfig( + PyPreConfig *preconfig, + const PyConfig *config); +extern PyStatus _PyPreConfig_InitFromPreConfig( + PyPreConfig *preconfig, + const PyPreConfig *config2); +extern PyObject* _PyPreConfig_AsDict(const PyPreConfig *preconfig); +extern void _PyPreConfig_GetConfig(PyPreConfig *preconfig, + const PyConfig *config); +extern PyStatus _PyPreConfig_Read(PyPreConfig *preconfig, + const _PyArgv *args); +extern PyStatus _PyPreConfig_Write(const PyPreConfig *preconfig); + + +/* --- PyConfig ---------------------------------------------- */ + +typedef enum { + /* Py_Initialize() API: backward compatibility with Python 3.6 and 3.7 */ + _PyConfig_INIT_COMPAT = 1, + _PyConfig_INIT_PYTHON = 2, + _PyConfig_INIT_ISOLATED = 3 +} _PyConfigInitEnum; + +typedef enum { + /* For now, this means the GIL is enabled. + + gh-116329: This will eventually change to "the GIL is disabled but can + be reenabled by loading an incompatible extension module." */ + _PyConfig_GIL_DEFAULT = -1, + + /* The GIL has been forced off or on, and will not be affected by module loading. */ + _PyConfig_GIL_DISABLE = 0, + _PyConfig_GIL_ENABLE = 1, +} _PyConfigGILEnum; + +// Export for '_testembed' program +PyAPI_FUNC(void) _PyConfig_InitCompatConfig(PyConfig *config); + +extern PyStatus _PyConfig_Copy( + PyConfig *config, + const PyConfig *config2); +extern PyStatus _PyConfig_InitPathConfig( + PyConfig *config, + int compute_path_config); +extern PyStatus _PyConfig_InitImportConfig(PyConfig *config); +extern PyStatus _PyConfig_Read(PyConfig *config, int compute_path_config); +extern PyStatus _PyConfig_Write(const PyConfig *config, + struct pyruntimestate *runtime); +extern PyStatus _PyConfig_SetPyArgv( + PyConfig *config, + const _PyArgv *args); + + +extern void _Py_DumpPathConfig(PyThreadState *tstate); + + +/* --- Function used for testing ---------------------------------- */ + +// Export these functions for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyConfig_AsDict(const PyConfig *config); +PyAPI_FUNC(int) _PyConfig_FromDict(PyConfig *config, PyObject *dict); +PyAPI_FUNC(PyObject*) _Py_Get_Getpath_CodeObject(void); +PyAPI_FUNC(PyObject*) _Py_GetConfigsAsDict(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_CORECONFIG_H */ diff --git a/Include/internal/pycore_instruction_sequence.h b/Include/internal/pycore_instruction_sequence.h new file mode 100644 index 0000000000000000000000000000000000000000..d6a79616db71fa0bf65e50be31214155323374c9 --- /dev/null +++ b/Include/internal/pycore_instruction_sequence.h @@ -0,0 +1,73 @@ +#ifndef Py_INTERNAL_INSTRUCTION_SEQUENCE_H +#define Py_INTERNAL_INSTRUCTION_SEQUENCE_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_symtable.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef struct { + int h_label; + int h_startdepth; + int h_preserve_lasti; +} _PyExceptHandlerInfo; + +typedef struct { + int i_opcode; + int i_oparg; + _Py_SourceLocation i_loc; + _PyExceptHandlerInfo i_except_handler_info; + + /* Temporary fields, used by the assembler and in instr_sequence_to_cfg */ + int i_target; + int i_offset; +} _PyInstruction; + +typedef struct instruction_sequence { + PyObject_HEAD + _PyInstruction *s_instrs; + int s_allocated; + int s_used; + + int s_next_free_label; /* next free label id */ + + /* Map of a label id to instruction offset (index into s_instrs). + * If s_labelmap is NULL, then each label id is the offset itself. + */ + int *s_labelmap; + int s_labelmap_size; + + /* PyList of instruction sequences of nested functions */ + PyObject *s_nested; +} _PyInstructionSequence; + +typedef struct { + int id; +} _PyJumpTargetLabel; + +PyAPI_FUNC(PyObject*)_PyInstructionSequence_New(void); + +int _PyInstructionSequence_UseLabel(_PyInstructionSequence *seq, int lbl); +int _PyInstructionSequence_Addop(_PyInstructionSequence *seq, + int opcode, int oparg, + _Py_SourceLocation loc); +_PyJumpTargetLabel _PyInstructionSequence_NewLabel(_PyInstructionSequence *seq); +int _PyInstructionSequence_ApplyLabelMap(_PyInstructionSequence *seq); +int _PyInstructionSequence_InsertInstruction(_PyInstructionSequence *seq, int pos, + int opcode, int oparg, _Py_SourceLocation loc); +int _PyInstructionSequence_AddNested(_PyInstructionSequence *seq, _PyInstructionSequence *nested); +void PyInstructionSequence_Fini(_PyInstructionSequence *seq); + +extern PyTypeObject _PyInstructionSequence_Type; +#define _PyInstructionSequence_Check(v) Py_IS_TYPE((v), &_PyInstructionSequence_Type) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_INSTRUCTION_SEQUENCE_H */ diff --git a/Include/internal/pycore_instruments.h b/Include/internal/pycore_instruments.h new file mode 100644 index 0000000000000000000000000000000000000000..c98e82c8be5546ad82caca437113c98b1372f297 --- /dev/null +++ b/Include/internal/pycore_instruments.h @@ -0,0 +1,75 @@ +#ifndef Py_INTERNAL_INSTRUMENT_H +#define Py_INTERNAL_INSTRUMENT_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_frame.h" // _PyInterpreterFrame + +#ifdef __cplusplus +extern "C" { +#endif + +#define PY_MONITORING_TOOL_IDS 8 + +typedef uint32_t _PyMonitoringEventSet; + +/* Tool IDs */ + +/* These are defined in PEP 669 for convenience to avoid clashes */ +#define PY_MONITORING_DEBUGGER_ID 0 +#define PY_MONITORING_COVERAGE_ID 1 +#define PY_MONITORING_PROFILER_ID 2 +#define PY_MONITORING_OPTIMIZER_ID 5 + +/* Internal IDs used to suuport sys.setprofile() and sys.settrace() */ +#define PY_MONITORING_SYS_PROFILE_ID 6 +#define PY_MONITORING_SYS_TRACE_ID 7 + + +PyObject *_PyMonitoring_RegisterCallback(int tool_id, int event_id, PyObject *obj); + +int _PyMonitoring_SetEvents(int tool_id, _PyMonitoringEventSet events); +int _PyMonitoring_SetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEventSet events); +int _PyMonitoring_GetLocalEvents(PyCodeObject *code, int tool_id, _PyMonitoringEventSet *events); + +extern int +_Py_call_instrumentation(PyThreadState *tstate, int event, + _PyInterpreterFrame *frame, _Py_CODEUNIT *instr); + +extern int +_Py_call_instrumentation_line(PyThreadState *tstate, _PyInterpreterFrame* frame, + _Py_CODEUNIT *instr, _Py_CODEUNIT *prev); + +extern int +_Py_call_instrumentation_instruction( + PyThreadState *tstate, _PyInterpreterFrame* frame, _Py_CODEUNIT *instr); + +_Py_CODEUNIT * +_Py_call_instrumentation_jump( + PyThreadState *tstate, int event, + _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, _Py_CODEUNIT *target); + +extern int +_Py_call_instrumentation_arg(PyThreadState *tstate, int event, + _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, PyObject *arg); + +extern int +_Py_call_instrumentation_2args(PyThreadState *tstate, int event, + _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, PyObject *arg0, PyObject *arg1); + +extern void +_Py_call_instrumentation_exc2(PyThreadState *tstate, int event, + _PyInterpreterFrame *frame, _Py_CODEUNIT *instr, PyObject *arg0, PyObject *arg1); + +extern int +_Py_Instrumentation_GetLine(PyCodeObject *code, int index); + +extern PyObject _PyInstrumentation_MISSING; +extern PyObject _PyInstrumentation_DISABLE; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_INSTRUMENT_H */ diff --git a/Include/internal/pycore_interp.h b/Include/internal/pycore_interp.h new file mode 100644 index 0000000000000000000000000000000000000000..075d35a37557777a091c9ee13aaf7f25df527941 --- /dev/null +++ b/Include/internal/pycore_interp.h @@ -0,0 +1,423 @@ +#ifndef Py_INTERNAL_INTERP_H +#define Py_INTERNAL_INTERP_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include // bool + +#include "pycore_ast_state.h" // struct ast_state +#include "pycore_atexit.h" // struct atexit_state +#include "pycore_ceval_state.h" // struct _ceval_state +#include "pycore_code.h" // struct callable_cache +#include "pycore_codecs.h" // struct codecs_state +#include "pycore_context.h" // struct _Py_context_state +#include "pycore_crossinterp.h" // struct _xidregistry +#include "pycore_dict_state.h" // struct _Py_dict_state +#include "pycore_dtoa.h" // struct _dtoa_state +#include "pycore_exceptions.h" // struct _Py_exc_state +#include "pycore_floatobject.h" // struct _Py_float_state +#include "pycore_function.h" // FUNC_MAX_WATCHERS +#include "pycore_gc.h" // struct _gc_runtime_state +#include "pycore_genobject.h" // struct _Py_async_gen_state +#include "pycore_global_objects.h"// struct _Py_interp_cached_objects +#include "pycore_import.h" // struct _import_state +#include "pycore_instruments.h" // _PY_MONITORING_EVENTS +#include "pycore_list.h" // struct _Py_list_state +#include "pycore_mimalloc.h" // struct _mimalloc_interp_state +#include "pycore_object_state.h" // struct _py_object_state +#include "pycore_optimizer.h" // _PyOptimizerObject +#include "pycore_obmalloc.h" // struct _obmalloc_state +#include "pycore_qsbr.h" // struct _qsbr_state +#include "pycore_tstate.h" // _PyThreadStateImpl +#include "pycore_tuple.h" // struct _Py_tuple_state +#include "pycore_typeobject.h" // struct types_state +#include "pycore_unicodeobject.h" // struct _Py_unicode_state +#include "pycore_warnings.h" // struct _warnings_runtime_state + + +struct _Py_long_state { + int max_str_digits; +}; + +// Support for stop-the-world events. This exists in both the PyRuntime struct +// for global pauses and in each PyInterpreterState for per-interpreter pauses. +struct _stoptheworld_state { + PyMutex mutex; // Serializes stop-the-world attempts. + + // NOTE: The below fields are protected by HEAD_LOCK(runtime), not by the + // above mutex. + bool requested; // Set when a pause is requested. + bool world_stopped; // Set when the world is stopped. + bool is_global; // Set when contained in PyRuntime struct. + + PyEvent stop_event; // Set when thread_countdown reaches zero. + Py_ssize_t thread_countdown; // Number of threads that must pause. + + PyThreadState *requester; // Thread that requested the pause (may be NULL). +}; + +#ifdef Py_GIL_DISABLED +// This should be prime but otherwise the choice is arbitrary. A larger value +// increases concurrency at the expense of memory. +# define NUM_WEAKREF_LIST_LOCKS 127 +#endif + +/* cross-interpreter data registry */ + +/* Tracks some rare events per-interpreter, used by the optimizer to turn on/off + specific optimizations. */ +typedef struct _rare_events { + /* Setting an object's class, obj.__class__ = ... */ + uint8_t set_class; + /* Setting the bases of a class, cls.__bases__ = ... */ + uint8_t set_bases; + /* Setting the PEP 523 frame eval function, _PyInterpreterState_SetFrameEvalFunc() */ + uint8_t set_eval_frame_func; + /* Modifying the builtins, __builtins__.__dict__[var] = ... */ + uint8_t builtin_dict; + /* Modifying a function, e.g. func.__defaults__ = ..., etc. */ + uint8_t func_modification; +} _rare_events; + +/* interpreter state */ + +/* PyInterpreterState holds the global state for one of the runtime's + interpreters. Typically the initial (main) interpreter is the only one. + + The PyInterpreterState typedef is in Include/pytypedefs.h. + */ +struct _is { + + /* This struct contains the eval_breaker, + * which is by far the hottest field in this struct + * and should be placed at the beginning. */ + struct _ceval_state ceval; + + PyInterpreterState *next; + + int64_t id; + int64_t id_refcount; + int requires_idref; + PyThread_type_lock id_mutex; + +#define _PyInterpreterState_WHENCE_NOTSET -1 +#define _PyInterpreterState_WHENCE_UNKNOWN 0 +#define _PyInterpreterState_WHENCE_RUNTIME 1 +#define _PyInterpreterState_WHENCE_LEGACY_CAPI 2 +#define _PyInterpreterState_WHENCE_CAPI 3 +#define _PyInterpreterState_WHENCE_XI 4 +#define _PyInterpreterState_WHENCE_STDLIB 5 +#define _PyInterpreterState_WHENCE_MAX 5 + long _whence; + + /* Has been initialized to a safe state. + + In order to be effective, this must be set to 0 during or right + after allocation. */ + int _initialized; + /* Has been fully initialized via pylifecycle.c. */ + int _ready; + int finalizing; + + uintptr_t last_restart_version; + struct pythreads { + uint64_t next_unique_id; + /* The linked list of threads, newest first. */ + PyThreadState *head; + /* The thread currently executing in the __main__ module, if any. */ + PyThreadState *main; + /* Used in Modules/_threadmodule.c. */ + Py_ssize_t count; + /* Support for runtime thread stack size tuning. + A value of 0 means using the platform's default stack size + or the size specified by the THREAD_STACK_SIZE macro. */ + /* Used in Python/thread.c. */ + size_t stacksize; + } threads; + + /* Reference to the _PyRuntime global variable. This field exists + to not have to pass runtime in addition to tstate to a function. + Get runtime from tstate: tstate->interp->runtime. */ + struct pyruntimestate *runtime; + + /* Set by Py_EndInterpreter(). + + Use _PyInterpreterState_GetFinalizing() + and _PyInterpreterState_SetFinalizing() + to access it, don't access it directly. */ + PyThreadState* _finalizing; + /* The ID of the OS thread in which we are finalizing. */ + unsigned long _finalizing_id; + + struct _gc_runtime_state gc; + + /* The following fields are here to avoid allocation during init. + The data is exposed through PyInterpreterState pointer fields. + These fields should not be accessed directly outside of init. + + All other PyInterpreterState pointer fields are populated when + needed and default to NULL. + + For now there are some exceptions to that rule, which require + allocation during init. These will be addressed on a case-by-case + basis. Also see _PyRuntimeState regarding the various mutex fields. + */ + + // Dictionary of the sys module + PyObject *sysdict; + + // Dictionary of the builtins module + PyObject *builtins; + + struct _import_state imports; + + /* The per-interpreter GIL, which might not be used. */ + struct _gil_runtime_state _gil; + + /* ---------- IMPORTANT --------------------------- + The fields above this line are declared as early as + possible to facilitate out-of-process observability + tools. */ + + struct codecs_state codecs; + + PyConfig config; + unsigned long feature_flags; + + PyObject *dict; /* Stores per-interpreter state */ + + PyObject *sysdict_copy; + PyObject *builtins_copy; + // Initialized to _PyEval_EvalFrameDefault(). + _PyFrameEvalFunction eval_frame; + + PyFunction_WatchCallback func_watchers[FUNC_MAX_WATCHERS]; + // One bit is set for each non-NULL entry in func_watchers + uint8_t active_func_watchers; + + Py_ssize_t co_extra_user_count; + freefunc co_extra_freefuncs[MAX_CO_EXTRA_USERS]; + + /* cross-interpreter data and utils */ + struct _xi_state xi; + +#ifdef HAVE_FORK + PyObject *before_forkers; + PyObject *after_forkers_parent; + PyObject *after_forkers_child; +#endif + + struct _warnings_runtime_state warnings; + struct atexit_state atexit; + struct _stoptheworld_state stoptheworld; + struct _qsbr_shared qsbr; + +#if defined(Py_GIL_DISABLED) + struct _mimalloc_interp_state mimalloc; + struct _brc_state brc; // biased reference counting state + PyMutex weakref_locks[NUM_WEAKREF_LIST_LOCKS]; +#endif + + // Per-interpreter state for the obmalloc allocator. For the main + // interpreter and for all interpreters that don't have their + // own obmalloc state, this points to the static structure in + // obmalloc.c obmalloc_state_main. For other interpreters, it is + // heap allocated by _PyMem_init_obmalloc() and freed when the + // interpreter structure is freed. In the case of a heap allocated + // obmalloc state, it is not safe to hold on to or use memory after + // the interpreter is freed. The obmalloc state corresponding to + // that allocated memory is gone. See free_obmalloc_arenas() for + // more comments. + struct _obmalloc_state *obmalloc; + + PyObject *audit_hooks; + PyType_WatchCallback type_watchers[TYPE_MAX_WATCHERS]; + PyCode_WatchCallback code_watchers[CODE_MAX_WATCHERS]; + // One bit is set for each non-NULL entry in code_watchers + uint8_t active_code_watchers; + + struct _py_object_state object_state; + struct _Py_unicode_state unicode; + struct _Py_long_state long_state; + struct _dtoa_state dtoa; + struct _py_func_state func_state; + struct _py_code_state code_state; + + struct _Py_dict_state dict_state; + struct _Py_exc_state exc_state; + struct _Py_mem_interp_free_queue mem_free_queue; + + struct ast_state ast; + struct types_state types; + struct callable_cache callable_cache; + _PyOptimizerObject *optimizer; + _PyExecutorObject *executor_list_head; + + _rare_events rare_events; + PyDict_WatchCallback builtins_dict_watcher; + + _Py_GlobalMonitors monitors; + bool sys_profile_initialized; + bool sys_trace_initialized; + Py_ssize_t sys_profiling_threads; /* Count of threads with c_profilefunc set */ + Py_ssize_t sys_tracing_threads; /* Count of threads with c_tracefunc set */ + PyObject *monitoring_callables[PY_MONITORING_TOOL_IDS][_PY_MONITORING_EVENTS]; + PyObject *monitoring_tool_names[PY_MONITORING_TOOL_IDS]; + + struct _Py_interp_cached_objects cached_objects; + struct _Py_interp_static_objects static_objects; + + /* the initial PyInterpreterState.threads.head */ + _PyThreadStateImpl _initial_thread; + Py_ssize_t _interactive_src_count; + // In 3.14+ this is interp->threads.preallocated. + _PyThreadStateImpl *threads_preallocated; +}; + + +/* other API */ + +extern void _PyInterpreterState_Clear(PyThreadState *tstate); + + +static inline PyThreadState* +_PyInterpreterState_GetFinalizing(PyInterpreterState *interp) { + return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&interp->_finalizing); +} + +static inline unsigned long +_PyInterpreterState_GetFinalizingID(PyInterpreterState *interp) { + return _Py_atomic_load_ulong_relaxed(&interp->_finalizing_id); +} + +static inline void +_PyInterpreterState_SetFinalizing(PyInterpreterState *interp, PyThreadState *tstate) { + _Py_atomic_store_ptr_relaxed(&interp->_finalizing, tstate); + if (tstate == NULL) { + _Py_atomic_store_ulong_relaxed(&interp->_finalizing_id, 0); + } + else { + // XXX Re-enable this assert once gh-109860 is fixed. + //assert(tstate->thread_id == PyThread_get_thread_ident()); + _Py_atomic_store_ulong_relaxed(&interp->_finalizing_id, + tstate->thread_id); + } +} + + + +// Exports for the _testinternalcapi module. +PyAPI_FUNC(int64_t) _PyInterpreterState_ObjectToID(PyObject *); +PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpID(int64_t); +PyAPI_FUNC(PyInterpreterState *) _PyInterpreterState_LookUpIDObject(PyObject *); +PyAPI_FUNC(int) _PyInterpreterState_IDInitref(PyInterpreterState *); +PyAPI_FUNC(int) _PyInterpreterState_IDIncref(PyInterpreterState *); +PyAPI_FUNC(void) _PyInterpreterState_IDDecref(PyInterpreterState *); + +PyAPI_FUNC(int) _PyInterpreterState_IsReady(PyInterpreterState *interp); + +PyAPI_FUNC(long) _PyInterpreterState_GetWhence(PyInterpreterState *interp); +extern void _PyInterpreterState_SetWhence( + PyInterpreterState *interp, + long whence); + +extern const PyConfig* _PyInterpreterState_GetConfig(PyInterpreterState *interp); + +// Get a copy of the current interpreter configuration. +// +// Return 0 on success. Raise an exception and return -1 on error. +// +// The caller must initialize 'config', using PyConfig_InitPythonConfig() +// for example. +// +// Python must be preinitialized to call this method. +// The caller must hold the GIL. +// +// Once done with the configuration, PyConfig_Clear() must be called to clear +// it. +// +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(int) _PyInterpreterState_GetConfigCopy( + struct PyConfig *config); + +// Set the configuration of the current interpreter. +// +// This function should be called during or just after the Python +// initialization. +// +// Update the sys module with the new configuration. If the sys module was +// modified directly after the Python initialization, these changes are lost. +// +// Some configuration like faulthandler or warnoptions can be updated in the +// configuration, but don't reconfigure Python (don't enable/disable +// faulthandler and don't reconfigure warnings filters). +// +// Return 0 on success. Raise an exception and return -1 on error. +// +// The configuration should come from _PyInterpreterState_GetConfigCopy(). +// +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(int) _PyInterpreterState_SetConfig( + const struct PyConfig *config); + + +/* +Runtime Feature Flags + +Each flag indicate whether or not a specific runtime feature +is available in a given context. For example, forking the process +might not be allowed in the current interpreter (i.e. os.fork() would fail). +*/ + +/* Set if the interpreter share obmalloc runtime state + with the main interpreter. */ +#define Py_RTFLAGS_USE_MAIN_OBMALLOC (1UL << 5) + +/* Set if import should check a module for subinterpreter support. */ +#define Py_RTFLAGS_MULTI_INTERP_EXTENSIONS (1UL << 8) + +/* Set if threads are allowed. */ +#define Py_RTFLAGS_THREADS (1UL << 10) + +/* Set if daemon threads are allowed. */ +#define Py_RTFLAGS_DAEMON_THREADS (1UL << 11) + +/* Set if os.fork() is allowed. */ +#define Py_RTFLAGS_FORK (1UL << 15) + +/* Set if os.exec*() is allowed. */ +#define Py_RTFLAGS_EXEC (1UL << 16) + +extern int _PyInterpreterState_HasFeature(PyInterpreterState *interp, + unsigned long feature); + +PyAPI_FUNC(PyStatus) _PyInterpreterState_New( + PyThreadState *tstate, + PyInterpreterState **pinterp); + + +#define RARE_EVENT_INTERP_INC(interp, name) \ + do { \ + /* saturating add */ \ + int val = FT_ATOMIC_LOAD_UINT8_RELAXED(interp->rare_events.name); \ + if (val < UINT8_MAX) { \ + FT_ATOMIC_STORE_UINT8(interp->rare_events.name, val + 1); \ + } \ + RARE_EVENT_STAT_INC(name); \ + } while (0); \ + +#define RARE_EVENT_INC(name) \ + do { \ + PyInterpreterState *interp = PyInterpreterState_Get(); \ + RARE_EVENT_INTERP_INC(interp, name); \ + } while (0); \ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_INTERP_H */ diff --git a/Include/internal/pycore_intrinsics.h b/Include/internal/pycore_intrinsics.h new file mode 100644 index 0000000000000000000000000000000000000000..39c2a30f6e979de3e4521393c53b198a07a585ab --- /dev/null +++ b/Include/internal/pycore_intrinsics.h @@ -0,0 +1,51 @@ +#ifndef Py_INTERNAL_INTRINSIC_H +#define Py_INTERNAL_INTRINSIC_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* Unary Functions: */ +#define INTRINSIC_1_INVALID 0 +#define INTRINSIC_PRINT 1 +#define INTRINSIC_IMPORT_STAR 2 +#define INTRINSIC_STOPITERATION_ERROR 3 +#define INTRINSIC_ASYNC_GEN_WRAP 4 +#define INTRINSIC_UNARY_POSITIVE 5 +#define INTRINSIC_LIST_TO_TUPLE 6 +#define INTRINSIC_TYPEVAR 7 +#define INTRINSIC_PARAMSPEC 8 +#define INTRINSIC_TYPEVARTUPLE 9 +#define INTRINSIC_SUBSCRIPT_GENERIC 10 +#define INTRINSIC_TYPEALIAS 11 + +#define MAX_INTRINSIC_1 11 + + +/* Binary Functions: */ +#define INTRINSIC_2_INVALID 0 +#define INTRINSIC_PREP_RERAISE_STAR 1 +#define INTRINSIC_TYPEVAR_WITH_BOUND 2 +#define INTRINSIC_TYPEVAR_WITH_CONSTRAINTS 3 +#define INTRINSIC_SET_FUNCTION_TYPE_PARAMS 4 +#define INTRINSIC_SET_TYPEPARAM_DEFAULT 5 + +#define MAX_INTRINSIC_2 5 + +typedef PyObject *(*intrinsic_func1)(PyThreadState* tstate, PyObject *value); +typedef PyObject *(*intrinsic_func2)(PyThreadState* tstate, PyObject *value1, PyObject *value2); + +typedef struct { + intrinsic_func1 func; + const char *name; +} intrinsic_func1_info; + +typedef struct { + intrinsic_func2 func; + const char *name; +} intrinsic_func2_info; + +PyAPI_DATA(const intrinsic_func1_info) _PyIntrinsics_UnaryFunctions[]; +PyAPI_DATA(const intrinsic_func2_info) _PyIntrinsics_BinaryFunctions[]; + +#endif // !Py_INTERNAL_INTRINSIC_H diff --git a/Include/internal/pycore_jit.h b/Include/internal/pycore_jit.h new file mode 100644 index 0000000000000000000000000000000000000000..17bd23f0752be20f2600a434fd33b36062969839 --- /dev/null +++ b/Include/internal/pycore_jit.h @@ -0,0 +1,25 @@ +#ifndef Py_INTERNAL_JIT_H +#define Py_INTERNAL_JIT_H + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef _Py_JIT + +typedef _Py_CODEUNIT *(*jit_func)(_PyInterpreterFrame *frame, PyObject **stack_pointer, PyThreadState *tstate); + +int _PyJIT_Compile(_PyExecutorObject *executor, const _PyUOpInstruction *trace, size_t length); +void _PyJIT_Free(_PyExecutorObject *executor); + +#endif // _Py_JIT + +#ifdef __cplusplus +} +#endif + +#endif // !Py_INTERNAL_JIT_H diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h new file mode 100644 index 0000000000000000000000000000000000000000..73695d10e0c37216b44e5e209e537a4a4dcaa8b3 --- /dev/null +++ b/Include/internal/pycore_list.h @@ -0,0 +1,66 @@ +#ifndef Py_INTERNAL_LIST_H +#define Py_INTERNAL_LIST_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyFreeListState + +PyAPI_FUNC(PyObject*) _PyList_Extend(PyListObject *, PyObject *); +extern void _PyList_DebugMallocStats(FILE *out); + +#define _PyList_ITEMS(op) _Py_RVALUE(_PyList_CAST(op)->ob_item) + +PyAPI_FUNC(int) +_PyList_AppendTakeRefListResize(PyListObject *self, PyObject *newitem); + +// In free-threaded build: self should be locked by the caller, if it should be thread-safe. +static inline int +_PyList_AppendTakeRef(PyListObject *self, PyObject *newitem) +{ + assert(self != NULL && newitem != NULL); + assert(PyList_Check(self)); + Py_ssize_t len = Py_SIZE(self); + Py_ssize_t allocated = self->allocated; + assert((size_t)len + 1 < PY_SSIZE_T_MAX); + if (allocated > len) { +#ifdef Py_GIL_DISABLED + _Py_atomic_store_ptr_release(&self->ob_item[len], newitem); +#else + PyList_SET_ITEM(self, len, newitem); +#endif + Py_SET_SIZE(self, len + 1); + return 0; + } + return _PyList_AppendTakeRefListResize(self, newitem); +} + +// Repeat the bytes of a buffer in place +static inline void +_Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src) +{ + assert(len_src > 0); + Py_ssize_t copied = len_src; + while (copied < len_dest) { + Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied); + memcpy(dest + copied, dest, bytes_to_copy); + copied += bytes_to_copy; + } +} + +typedef struct { + PyObject_HEAD + Py_ssize_t it_index; + PyListObject *it_seq; /* Set to NULL when iterator is exhausted */ +} _PyListIterObject; + +PyAPI_FUNC(PyObject *)_PyList_FromArraySteal(PyObject *const *src, Py_ssize_t n); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LIST_H */ diff --git a/Include/internal/pycore_llist.h b/Include/internal/pycore_llist.h new file mode 100644 index 0000000000000000000000000000000000000000..f629902fda9ff181f434b7b8e8df39fd71aa50ef --- /dev/null +++ b/Include/internal/pycore_llist.h @@ -0,0 +1,106 @@ +// A doubly-linked list that can be embedded in a struct. +// +// Usage: +// struct llist_node head = LLIST_INIT(head); +// typedef struct { +// ... +// struct llist_node node; +// ... +// } MyObj; +// +// llist_insert_tail(&head, &obj->node); +// llist_remove(&obj->node); +// +// struct llist_node *node; +// llist_for_each(node, &head) { +// MyObj *obj = llist_data(node, MyObj, node); +// ... +// } +// + +#ifndef Py_INTERNAL_LLIST_H +#define Py_INTERNAL_LLIST_H + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "Py_BUILD_CORE must be defined to include this header" +#endif + +struct llist_node { + struct llist_node *next; + struct llist_node *prev; +}; + +// Get the struct containing a node. +#define llist_data(node, type, member) (_Py_CONTAINER_OF(node, type, member)) + +// Iterate over a list. +#define llist_for_each(node, head) \ + for (node = (head)->next; node != (head); node = node->next) + +// Iterate over a list, but allow removal of the current node. +#define llist_for_each_safe(node, head) \ + for (struct llist_node *_next = (node = (head)->next, node->next); \ + node != (head); node = _next, _next = node->next) + +#define LLIST_INIT(head) { &head, &head } + +static inline void +llist_init(struct llist_node *head) +{ + head->next = head; + head->prev = head; +} + +// Returns 1 if the list is empty, 0 otherwise. +static inline int +llist_empty(struct llist_node *head) +{ + return head->next == head; +} + +// Appends to the tail of the list. +static inline void +llist_insert_tail(struct llist_node *head, struct llist_node *node) +{ + node->prev = head->prev; + node->next = head; + head->prev->next = node; + head->prev = node; +} + +// Remove a node from the list. +static inline void +llist_remove(struct llist_node *node) +{ + struct llist_node *prev = node->prev; + struct llist_node *next = node->next; + prev->next = next; + next->prev = prev; + node->prev = NULL; + node->next = NULL; +} + +// Append all nodes from head2 onto head1. head2 is left empty. +static inline void +llist_concat(struct llist_node *head1, struct llist_node *head2) +{ + if (!llist_empty(head2)) { + head1->prev->next = head2->next; + head2->next->prev = head1->prev; + + head1->prev = head2->prev; + head2->prev->next = head1; + llist_init(head2); + } +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LLIST_H */ diff --git a/Include/internal/pycore_lock.h b/Include/internal/pycore_lock.h new file mode 100644 index 0000000000000000000000000000000000000000..2a18bb7644725fcb3cf8391af11a6e8e7e919b36 --- /dev/null +++ b/Include/internal/pycore_lock.h @@ -0,0 +1,241 @@ +// Lightweight locks and other synchronization mechanisms. +// +// These implementations are based on WebKit's WTF::Lock. See +// https://webkit.org/blog/6161/locking-in-webkit/ for a description of the +// design. +#ifndef Py_INTERNAL_LOCK_H +#define Py_INTERNAL_LOCK_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +//_Py_UNLOCKED is defined as 0 and _Py_LOCKED as 1 in Include/cpython/lock.h +#define _Py_HAS_PARKED 2 +#define _Py_ONCE_INITIALIZED 4 + +static inline int +PyMutex_LockFast(uint8_t *lock_bits) +{ + uint8_t expected = _Py_UNLOCKED; + return _Py_atomic_compare_exchange_uint8(lock_bits, &expected, _Py_LOCKED); +} + +// Checks if the mutex is currently locked. +static inline int +PyMutex_IsLocked(PyMutex *m) +{ + return (_Py_atomic_load_uint8(&m->_bits) & _Py_LOCKED) != 0; +} + +// Re-initializes the mutex after a fork to the unlocked state. +static inline void +_PyMutex_at_fork_reinit(PyMutex *m) +{ + memset(m, 0, sizeof(*m)); +} + +typedef enum _PyLockFlags { + // Do not detach/release the GIL when waiting on the lock. + _Py_LOCK_DONT_DETACH = 0, + + // Detach/release the GIL while waiting on the lock. + _PY_LOCK_DETACH = 1, + + // Handle signals if interrupted while waiting on the lock. + _PY_LOCK_HANDLE_SIGNALS = 2, +} _PyLockFlags; + +// Lock a mutex with an optional timeout and additional options. See +// _PyLockFlags for details. +extern PyLockStatus +_PyMutex_LockTimed(PyMutex *m, PyTime_t timeout_ns, _PyLockFlags flags); + +// Lock a mutex with aditional options. See _PyLockFlags for details. +static inline void +PyMutex_LockFlags(PyMutex *m, _PyLockFlags flags) +{ + uint8_t expected = _Py_UNLOCKED; + if (!_Py_atomic_compare_exchange_uint8(&m->_bits, &expected, _Py_LOCKED)) { + _PyMutex_LockTimed(m, -1, flags); + } +} + +// Unlock a mutex, returns 0 if the mutex is not locked (used for improved +// error messages). +extern int _PyMutex_TryUnlock(PyMutex *m); + + +// PyEvent is a one-time event notification +typedef struct { + uint8_t v; +} PyEvent; + +// Check if the event is set without blocking. Returns 1 if the event is set or +// 0 otherwise. +PyAPI_FUNC(int) _PyEvent_IsSet(PyEvent *evt); + +// Set the event and notify any waiting threads. +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(void) _PyEvent_Notify(PyEvent *evt); + +// Wait for the event to be set. If the event is already set, then this returns +// immediately. +PyAPI_FUNC(void) PyEvent_Wait(PyEvent *evt); + +// Wait for the event to be set, or until the timeout expires. If the event is +// already set, then this returns immediately. Returns 1 if the event was set, +// and 0 if the timeout expired or thread was interrupted. If `detach` is +// true, then the thread will detach/release the GIL while waiting. +PyAPI_FUNC(int) +PyEvent_WaitTimed(PyEvent *evt, PyTime_t timeout_ns, int detach); + +// _PyRawMutex implements a word-sized mutex that that does not depend on the +// parking lot API, and therefore can be used in the parking lot +// implementation. +// +// The mutex uses a packed representation: the least significant bit is used to +// indicate whether the mutex is locked or not. The remaining bits are either +// zero or a pointer to a `struct raw_mutex_entry` (see lock.c). +typedef struct { + uintptr_t v; +} _PyRawMutex; + +// Slow paths for lock/unlock +extern void _PyRawMutex_LockSlow(_PyRawMutex *m); +extern void _PyRawMutex_UnlockSlow(_PyRawMutex *m); + +static inline void +_PyRawMutex_Lock(_PyRawMutex *m) +{ + uintptr_t unlocked = _Py_UNLOCKED; + if (_Py_atomic_compare_exchange_uintptr(&m->v, &unlocked, _Py_LOCKED)) { + return; + } + _PyRawMutex_LockSlow(m); +} + +static inline void +_PyRawMutex_Unlock(_PyRawMutex *m) +{ + uintptr_t locked = _Py_LOCKED; + if (_Py_atomic_compare_exchange_uintptr(&m->v, &locked, _Py_UNLOCKED)) { + return; + } + _PyRawMutex_UnlockSlow(m); +} + +// Type signature for one-time initialization functions. The function should +// return 0 on success and -1 on failure. +typedef int _Py_once_fn_t(void *arg); + +// (private) slow path for one time initialization +PyAPI_FUNC(int) +_PyOnceFlag_CallOnceSlow(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg); + +// Calls `fn` once using `flag`. The `arg` is passed to the call to `fn`. +// +// Returns 0 on success and -1 on failure. +// +// If `fn` returns 0 (success), then subsequent calls immediately return 0. +// If `fn` returns -1 (failure), then subsequent calls will retry the call. +static inline int +_PyOnceFlag_CallOnce(_PyOnceFlag *flag, _Py_once_fn_t *fn, void *arg) +{ + if (_Py_atomic_load_uint8(&flag->v) == _Py_ONCE_INITIALIZED) { + return 0; + } + return _PyOnceFlag_CallOnceSlow(flag, fn, arg); +} + +// A recursive mutex. The mutex should zero-initialized. +typedef struct { + PyMutex mutex; + unsigned long long thread; // i.e., PyThread_get_thread_ident_ex() + size_t level; +} _PyRecursiveMutex; + +PyAPI_FUNC(int) _PyRecursiveMutex_IsLockedByCurrentThread(_PyRecursiveMutex *m); +PyAPI_FUNC(void) _PyRecursiveMutex_Lock(_PyRecursiveMutex *m); +PyAPI_FUNC(void) _PyRecursiveMutex_Unlock(_PyRecursiveMutex *m); + + +// A readers-writer (RW) lock. The lock supports multiple concurrent readers or +// a single writer. The lock is write-preferring: if a writer is waiting while +// the lock is read-locked then, new readers will be blocked. This avoids +// starvation of writers. +// +// In C++, the equivalent synchronization primitive is std::shared_mutex +// with shared ("read") and exclusive ("write") locking. +// +// The two least significant bits are used to indicate if the lock is +// write-locked and if there are parked threads (either readers or writers) +// waiting to acquire the lock. The remaining bits are used to indicate the +// number of readers holding the lock. +// +// 0b000..00000: unlocked +// 0bnnn..nnn00: nnn..nnn readers holding the lock +// 0bnnn..nnn10: nnn..nnn readers holding the lock and a writer is waiting +// 0b00000..010: unlocked with awoken writer about to acquire lock +// 0b00000..001: write-locked +// 0b00000..011: write-locked and readers or other writers are waiting +// +// Note that reader_count must be zero if the lock is held by a writer, and +// vice versa. The lock can only be held by readers or a writer, but not both. +// +// The design is optimized for simplicity of the implementation. The lock is +// not fair: if fairness is desired, use an additional PyMutex to serialize +// writers. The lock is also not reentrant. +typedef struct { + uintptr_t bits; +} _PyRWMutex; + +// Read lock (i.e., shared lock) +PyAPI_FUNC(void) _PyRWMutex_RLock(_PyRWMutex *rwmutex); +PyAPI_FUNC(void) _PyRWMutex_RUnlock(_PyRWMutex *rwmutex); + +// Write lock (i.e., exclusive lock) +PyAPI_FUNC(void) _PyRWMutex_Lock(_PyRWMutex *rwmutex); +PyAPI_FUNC(void) _PyRWMutex_Unlock(_PyRWMutex *rwmutex); + +// Similar to linux seqlock: https://en.wikipedia.org/wiki/Seqlock +// We use a sequence number to lock the writer, an even sequence means we're unlocked, an odd +// sequence means we're locked. Readers will read the sequence before attempting to read the +// underlying data and then read the sequence number again after reading the data. If the +// sequence has not changed the data is valid. +// +// Differs a little bit in that we use CAS on sequence as the lock, instead of a separate spin lock. +// The writer can also detect that the undelering data has not changed and abandon the write +// and restore the previous sequence. +typedef struct { + uint32_t sequence; +} _PySeqLock; + +// Lock the sequence lock for the writer +PyAPI_FUNC(void) _PySeqLock_LockWrite(_PySeqLock *seqlock); + +// Unlock the sequence lock and move to the next sequence number. +PyAPI_FUNC(void) _PySeqLock_UnlockWrite(_PySeqLock *seqlock); + +// Abandon the current update indicating that no mutations have occurred +// and restore the previous sequence value. +PyAPI_FUNC(void) _PySeqLock_AbandonWrite(_PySeqLock *seqlock); + +// Begin a read operation and return the current sequence number. +PyAPI_FUNC(uint32_t) _PySeqLock_BeginRead(_PySeqLock *seqlock); + +// End the read operation and confirm that the sequence number has not changed. +// Returns 1 if the read was successful or 0 if the read should be retried. +PyAPI_FUNC(int) _PySeqLock_EndRead(_PySeqLock *seqlock, uint32_t previous); + +// Check if the lock was held during a fork and clear the lock. Returns 1 +// if the lock was held and any associated data should be cleared. +PyAPI_FUNC(int) _PySeqLock_AfterFork(_PySeqLock *seqlock); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LOCK_H */ diff --git a/Include/internal/pycore_long.h b/Include/internal/pycore_long.h new file mode 100644 index 0000000000000000000000000000000000000000..ff7d9afc03a4f24ea4602560b49c68fc7b2273cb --- /dev/null +++ b/Include/internal/pycore_long.h @@ -0,0 +1,310 @@ +#ifndef Py_INTERNAL_LONG_H +#define Py_INTERNAL_LONG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_bytesobject.h" // _PyBytesWriter +#include "pycore_global_objects.h"// _PY_NSMALLNEGINTS +#include "pycore_runtime.h" // _PyRuntime + +/* + * Default int base conversion size limitation: Denial of Service prevention. + * + * Chosen such that this isn't wildly slow on modern hardware and so that + * everyone's existing deployed numpy test suite passes before + * https://github.com/numpy/numpy/issues/22098 is widely available. + * + * $ python -m timeit -s 's = "1"*4300' 'int(s)' + * 2000 loops, best of 5: 125 usec per loop + * $ python -m timeit -s 's = "1"*4300; v = int(s)' 'str(v)' + * 1000 loops, best of 5: 311 usec per loop + * (zen2 cloud VM) + * + * 4300 decimal digits fits a ~14284 bit number. + */ +#define _PY_LONG_DEFAULT_MAX_STR_DIGITS 4300 +/* + * Threshold for max digits check. For performance reasons int() and + * int.__str__() don't checks values that are smaller than this + * threshold. Acts as a guaranteed minimum size limit for bignums that + * applications can expect from CPython. + * + * % python -m timeit -s 's = "1"*640; v = int(s)' 'str(int(s))' + * 20000 loops, best of 5: 12 usec per loop + * + * "640 digits should be enough for anyone." - gps + * fits a ~2126 bit decimal number. + */ +#define _PY_LONG_MAX_STR_DIGITS_THRESHOLD 640 + +#if ((_PY_LONG_DEFAULT_MAX_STR_DIGITS != 0) && \ + (_PY_LONG_DEFAULT_MAX_STR_DIGITS < _PY_LONG_MAX_STR_DIGITS_THRESHOLD)) +# error "_PY_LONG_DEFAULT_MAX_STR_DIGITS smaller than threshold." +#endif + +/* runtime lifecycle */ + +extern PyStatus _PyLong_InitTypes(PyInterpreterState *); +extern void _PyLong_FiniTypes(PyInterpreterState *interp); + + +/* other API */ + +#define _PyLong_SMALL_INTS _Py_SINGLETON(small_ints) + +// _PyLong_GetZero() and _PyLong_GetOne() must always be available +// _PyLong_FromUnsignedChar must always be available +#if _PY_NSMALLPOSINTS < 257 +# error "_PY_NSMALLPOSINTS must be greater than or equal to 257" +#endif + +// Return a reference to the immortal zero singleton. +// The function cannot return NULL. +static inline PyObject* _PyLong_GetZero(void) +{ return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS]; } + +// Return a reference to the immortal one singleton. +// The function cannot return NULL. +static inline PyObject* _PyLong_GetOne(void) +{ return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS+1]; } + +static inline PyObject* _PyLong_FromUnsignedChar(unsigned char i) +{ + return (PyObject *)&_PyLong_SMALL_INTS[_PY_NSMALLNEGINTS+i]; +} + +// _PyLong_Frexp returns a double x and an exponent e such that the +// true value is approximately equal to x * 2**e. e is >= 0. x is +// 0.0 if and only if the input is 0 (in which case, e and x are both +// zeroes); otherwise, 0.5 <= abs(x) < 1.0. On overflow, which is +// possible if the number of bits doesn't fit into a Py_ssize_t, sets +// OverflowError and returns -1.0 for x, 0 for e. +// +// Export for 'math' shared extension +PyAPI_DATA(double) _PyLong_Frexp(PyLongObject *a, Py_ssize_t *e); + +extern PyObject* _PyLong_FromBytes(const char *, Py_ssize_t, int); + +// _PyLong_DivmodNear. Given integers a and b, compute the nearest +// integer q to the exact quotient a / b, rounding to the nearest even integer +// in the case of a tie. Return (q, r), where r = a - q*b. The remainder r +// will satisfy abs(r) <= abs(b)/2, with equality possible only if q is +// even. +// +// Export for '_datetime' shared extension. +PyAPI_DATA(PyObject*) _PyLong_DivmodNear(PyObject *, PyObject *); + +// _PyLong_Format: Convert the long to a string object with given base, +// appending a base prefix of 0[box] if base is 2, 8 or 16. +// Export for '_tkinter' shared extension. +PyAPI_DATA(PyObject*) _PyLong_Format(PyObject *obj, int base); + +// Export for 'math' shared extension +PyAPI_DATA(PyObject*) _PyLong_Rshift(PyObject *, size_t); + +// Export for 'math' shared extension +PyAPI_DATA(PyObject*) _PyLong_Lshift(PyObject *, size_t); + +PyAPI_FUNC(PyObject*) _PyLong_Add(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyLong_Multiply(PyLongObject *left, PyLongObject *right); +PyAPI_FUNC(PyObject*) _PyLong_Subtract(PyLongObject *left, PyLongObject *right); + +// Export for 'binascii' shared extension. +PyAPI_DATA(unsigned char) _PyLong_DigitValue[256]; + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +extern int _PyLong_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); + +extern int _PyLong_FormatWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + int base, + int alternate); + +extern char* _PyLong_FormatBytesWriter( + _PyBytesWriter *writer, + char *str, + PyObject *obj, + int base, + int alternate); + +// Argument converters used by Argument Clinic + +// Export for 'select' shared extension (Argument Clinic code) +PyAPI_FUNC(int) _PyLong_UnsignedShort_Converter(PyObject *, void *); + +// Export for '_testclinic' shared extension (Argument Clinic code) +PyAPI_FUNC(int) _PyLong_UnsignedInt_Converter(PyObject *, void *); + +// Export for '_blake2' shared extension (Argument Clinic code) +PyAPI_FUNC(int) _PyLong_UnsignedLong_Converter(PyObject *, void *); + +// Export for '_blake2' shared extension (Argument Clinic code) +PyAPI_FUNC(int) _PyLong_UnsignedLongLong_Converter(PyObject *, void *); + +// Export for '_testclinic' shared extension (Argument Clinic code) +PyAPI_FUNC(int) _PyLong_Size_t_Converter(PyObject *, void *); + +/* Long value tag bits: + * 0-1: Sign bits value = (1-sign), ie. negative=2, positive=0, zero=1. + * 2: Reserved for immortality bit + * 3+ Unsigned digit count + */ +#define SIGN_MASK 3 +#define SIGN_ZERO 1 +#define SIGN_NEGATIVE 2 +#define NON_SIZE_BITS 3 + +/* The functions _PyLong_IsCompact and _PyLong_CompactValue are defined + * in Include/cpython/longobject.h, since they need to be inline. + * + * "Compact" values have at least one bit to spare, + * so that addition and subtraction can be performed on the values + * without risk of overflow. + * + * The inline functions need tag bits. + * For readability, rather than do `#define SIGN_MASK _PyLong_SIGN_MASK` + * we define them to the numbers in both places and then assert that + * they're the same. + */ +#if SIGN_MASK != _PyLong_SIGN_MASK +# error "SIGN_MASK does not match _PyLong_SIGN_MASK" +#endif +#if NON_SIZE_BITS != _PyLong_NON_SIZE_BITS +# error "NON_SIZE_BITS does not match _PyLong_NON_SIZE_BITS" +#endif + +/* All *compact" values are guaranteed to fit into + * a Py_ssize_t with at least one bit to spare. + * In other words, for 64 bit machines, compact + * will be signed 63 (or fewer) bit values + */ + +/* Return 1 if the argument is compact int */ +static inline int +_PyLong_IsNonNegativeCompact(const PyLongObject* op) { + assert(PyLong_Check(op)); + return op->long_value.lv_tag <= (1 << NON_SIZE_BITS); +} + + +static inline int +_PyLong_BothAreCompact(const PyLongObject* a, const PyLongObject* b) { + assert(PyLong_Check(a)); + assert(PyLong_Check(b)); + return (a->long_value.lv_tag | b->long_value.lv_tag) < (2 << NON_SIZE_BITS); +} + +static inline bool +_PyLong_IsZero(const PyLongObject *op) +{ + return (op->long_value.lv_tag & SIGN_MASK) == SIGN_ZERO; +} + +static inline bool +_PyLong_IsNegative(const PyLongObject *op) +{ + return (op->long_value.lv_tag & SIGN_MASK) == SIGN_NEGATIVE; +} + +static inline bool +_PyLong_IsPositive(const PyLongObject *op) +{ + return (op->long_value.lv_tag & SIGN_MASK) == 0; +} + +static inline Py_ssize_t +_PyLong_DigitCount(const PyLongObject *op) +{ + assert(PyLong_Check(op)); + return op->long_value.lv_tag >> NON_SIZE_BITS; +} + +/* Equivalent to _PyLong_DigitCount(op) * _PyLong_NonCompactSign(op) */ +static inline Py_ssize_t +_PyLong_SignedDigitCount(const PyLongObject *op) +{ + assert(PyLong_Check(op)); + Py_ssize_t sign = 1 - (op->long_value.lv_tag & SIGN_MASK); + return sign * (Py_ssize_t)(op->long_value.lv_tag >> NON_SIZE_BITS); +} + +static inline int +_PyLong_CompactSign(const PyLongObject *op) +{ + assert(PyLong_Check(op)); + assert(_PyLong_IsCompact(op)); + return 1 - (op->long_value.lv_tag & SIGN_MASK); +} + +static inline int +_PyLong_NonCompactSign(const PyLongObject *op) +{ + assert(PyLong_Check(op)); + assert(!_PyLong_IsCompact(op)); + return 1 - (op->long_value.lv_tag & SIGN_MASK); +} + +/* Do a and b have the same sign? */ +static inline int +_PyLong_SameSign(const PyLongObject *a, const PyLongObject *b) +{ + return (a->long_value.lv_tag & SIGN_MASK) == (b->long_value.lv_tag & SIGN_MASK); +} + +#define TAG_FROM_SIGN_AND_SIZE(sign, size) ((1 - (sign)) | ((size) << NON_SIZE_BITS)) + +static inline void +_PyLong_SetSignAndDigitCount(PyLongObject *op, int sign, Py_ssize_t size) +{ + assert(size >= 0); + assert(-1 <= sign && sign <= 1); + assert(sign != 0 || size == 0); + op->long_value.lv_tag = TAG_FROM_SIGN_AND_SIZE(sign, (size_t)size); +} + +static inline void +_PyLong_SetDigitCount(PyLongObject *op, Py_ssize_t size) +{ + assert(size >= 0); + op->long_value.lv_tag = (((size_t)size) << NON_SIZE_BITS) | (op->long_value.lv_tag & SIGN_MASK); +} + +#define NON_SIZE_MASK ~((1 << NON_SIZE_BITS) - 1) + +static inline void +_PyLong_FlipSign(PyLongObject *op) { + unsigned int flipped_sign = 2 - (op->long_value.lv_tag & SIGN_MASK); + op->long_value.lv_tag &= NON_SIZE_MASK; + op->long_value.lv_tag |= flipped_sign; +} + +#define _PyLong_DIGIT_INIT(val) \ + { \ + .ob_base = _PyObject_HEAD_INIT(&PyLong_Type), \ + .long_value = { \ + .lv_tag = TAG_FROM_SIGN_AND_SIZE( \ + (val) == 0 ? 0 : ((val) < 0 ? -1 : 1), \ + (val) == 0 ? 0 : 1), \ + { ((val) >= 0 ? (val) : -(val)) }, \ + } \ + } + +#define _PyLong_FALSE_TAG TAG_FROM_SIGN_AND_SIZE(0, 0) +#define _PyLong_TRUE_TAG TAG_FROM_SIGN_AND_SIZE(1, 1) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LONG_H */ diff --git a/Include/internal/pycore_memoryobject.h b/Include/internal/pycore_memoryobject.h new file mode 100644 index 0000000000000000000000000000000000000000..62e204fcbf65339d252793e0ffd1e54b9f860ce3 --- /dev/null +++ b/Include/internal/pycore_memoryobject.h @@ -0,0 +1,20 @@ +#ifndef Py_INTERNAL_MEMORYOBJECT_H +#define Py_INTERNAL_MEMORYOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyTypeObject _PyManagedBuffer_Type; + +PyObject * +_PyMemoryView_FromBufferProc(PyObject *v, int flags, + getbufferproc bufferproc); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_MEMORYOBJECT_H */ diff --git a/Include/internal/pycore_mimalloc.h b/Include/internal/pycore_mimalloc.h new file mode 100644 index 0000000000000000000000000000000000000000..d870d01beb702c060b7b81ff325a2900bc4d9012 --- /dev/null +++ b/Include/internal/pycore_mimalloc.h @@ -0,0 +1,69 @@ +#ifndef Py_INTERNAL_MIMALLOC_H +#define Py_INTERNAL_MIMALLOC_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#if defined(MIMALLOC_H) || defined(MIMALLOC_TYPES_H) +# error "pycore_mimalloc.h must be included before mimalloc.h" +#endif + +typedef enum { + _Py_MIMALLOC_HEAP_MEM = 0, // PyMem_Malloc() and friends + _Py_MIMALLOC_HEAP_OBJECT = 1, // non-GC objects + _Py_MIMALLOC_HEAP_GC = 2, // GC objects without pre-header + _Py_MIMALLOC_HEAP_GC_PRE = 3, // GC objects with pre-header + _Py_MIMALLOC_HEAP_COUNT +} _Py_mimalloc_heap_id; + +#include "pycore_pymem.h" + +#ifdef WITH_MIMALLOC +# ifdef Py_GIL_DISABLED +# define MI_PRIM_THREAD_ID _Py_ThreadId +# endif +# define MI_DEBUG_UNINIT PYMEM_CLEANBYTE +# define MI_DEBUG_FREED PYMEM_DEADBYTE +# define MI_DEBUG_PADDING PYMEM_FORBIDDENBYTE +#ifdef Py_DEBUG +# define MI_DEBUG 2 +#else +# define MI_DEBUG 0 +#endif + +#ifdef _Py_THREAD_SANITIZER +# define MI_TSAN 1 +#endif + +#ifdef __cplusplus +extern "C++" { +#endif + +#include "mimalloc/mimalloc.h" +#include "mimalloc/mimalloc/types.h" +#include "mimalloc/mimalloc/internal.h" + +#ifdef __cplusplus +} +#endif + +#endif + +#ifdef Py_GIL_DISABLED +struct _mimalloc_interp_state { + // When exiting, threads place any segments with live blocks in this + // shared pool for other threads to claim and reuse. + mi_abandoned_pool_t abandoned_pool; +}; + +struct _mimalloc_thread_state { + mi_heap_t *current_object_heap; + mi_heap_t heaps[_Py_MIMALLOC_HEAP_COUNT]; + mi_tld_t tld; + int initialized; + struct llist_node page_list; +}; +#endif + +#endif // Py_INTERNAL_MIMALLOC_H diff --git a/Include/internal/pycore_modsupport.h b/Include/internal/pycore_modsupport.h new file mode 100644 index 0000000000000000000000000000000000000000..11fde814875938f23768d517a5434032502b9298 --- /dev/null +++ b/Include/internal/pycore_modsupport.h @@ -0,0 +1,107 @@ +#ifndef Py_INTERNAL_MODSUPPORT_H +#define Py_INTERNAL_MODSUPPORT_H + +#include "pycore_lock.h" // _PyOnceFlag + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +extern int _PyArg_NoKwnames(const char *funcname, PyObject *kwnames); +#define _PyArg_NoKwnames(funcname, kwnames) \ + ((kwnames) == NULL || _PyArg_NoKwnames((funcname), (kwnames))) + +// Export for '_bz2' shared extension +PyAPI_FUNC(int) _PyArg_NoPositional(const char *funcname, PyObject *args); +#define _PyArg_NoPositional(funcname, args) \ + ((args) == NULL || _PyArg_NoPositional((funcname), (args))) + +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _PyArg_NoKeywords(const char *funcname, PyObject *kwargs); +#define _PyArg_NoKeywords(funcname, kwargs) \ + ((kwargs) == NULL || _PyArg_NoKeywords((funcname), (kwargs))) + +// Export for 'zlib' shared extension +PyAPI_FUNC(int) _PyArg_CheckPositional(const char *, Py_ssize_t, + Py_ssize_t, Py_ssize_t); +#define _Py_ANY_VARARGS(n) ((n) == PY_SSIZE_T_MAX) +#define _PyArg_CheckPositional(funcname, nargs, min, max) \ + ((!_Py_ANY_VARARGS(max) && (min) <= (nargs) && (nargs) <= (max)) \ + || _PyArg_CheckPositional((funcname), (nargs), (min), (max))) + +extern PyObject ** _Py_VaBuildStack( + PyObject **small_stack, + Py_ssize_t small_stack_len, + const char *format, + va_list va, + Py_ssize_t *p_nargs); + +extern PyObject* _PyModule_CreateInitialized(PyModuleDef*, int apiver); + +// Export for '_curses' shared extension +PyAPI_FUNC(int) _PyArg_ParseStack( + PyObject *const *args, + Py_ssize_t nargs, + const char *format, + ...); + +extern int _PyArg_UnpackStack( + PyObject *const *args, + Py_ssize_t nargs, + const char *name, + Py_ssize_t min, + Py_ssize_t max, + ...); + +// Export for '_heapq' shared extension +PyAPI_FUNC(void) _PyArg_BadArgument( + const char *fname, + const char *displayname, + const char *expected, + PyObject *arg); + +// --- _PyArg_Parser API --------------------------------------------------- + +// Export for '_dbm' shared extension +PyAPI_FUNC(int) _PyArg_ParseStackAndKeywords( + PyObject *const *args, + Py_ssize_t nargs, + PyObject *kwnames, + struct _PyArg_Parser *, + ...); + +// Export for 'math' shared extension +PyAPI_FUNC(PyObject * const *) _PyArg_UnpackKeywords( + PyObject *const *args, + Py_ssize_t nargs, + PyObject *kwargs, + PyObject *kwnames, + struct _PyArg_Parser *parser, + int minpos, + int maxpos, + int minkw, + PyObject **buf); +#define _PyArg_UnpackKeywords(args, nargs, kwargs, kwnames, parser, minpos, maxpos, minkw, buf) \ + (((minkw) == 0 && (kwargs) == NULL && (kwnames) == NULL && \ + (minpos) <= (nargs) && (nargs) <= (maxpos) && (args) != NULL) ? (args) : \ + _PyArg_UnpackKeywords((args), (nargs), (kwargs), (kwnames), (parser), \ + (minpos), (maxpos), (minkw), (buf))) + +// Export for '_testclinic' shared extension +PyAPI_FUNC(PyObject * const *) _PyArg_UnpackKeywordsWithVararg( + PyObject *const *args, Py_ssize_t nargs, + PyObject *kwargs, PyObject *kwnames, + struct _PyArg_Parser *parser, + int minpos, int maxpos, int minkw, + int vararg, PyObject **buf); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_MODSUPPORT_H + diff --git a/Include/internal/pycore_moduleobject.h b/Include/internal/pycore_moduleobject.h new file mode 100644 index 0000000000000000000000000000000000000000..dacc00dba54495136067bf39955862af54087dab --- /dev/null +++ b/Include/internal/pycore_moduleobject.h @@ -0,0 +1,56 @@ +#ifndef Py_INTERNAL_MODULEOBJECT_H +#define Py_INTERNAL_MODULEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern void _PyModule_Clear(PyObject *); +extern void _PyModule_ClearDict(PyObject *); +extern int _PyModuleSpec_IsInitializing(PyObject *); +extern int _PyModuleSpec_GetFileOrigin(PyObject *, PyObject **); +extern int _PyModule_IsPossiblyShadowing(PyObject *); + +extern int _PyModule_IsExtension(PyObject *obj); + +typedef struct { + PyObject_HEAD + PyObject *md_dict; + PyModuleDef *md_def; + void *md_state; + PyObject *md_weaklist; + // for logging purposes after md_dict is cleared + PyObject *md_name; +#ifdef Py_GIL_DISABLED + void *md_gil; +#endif +} PyModuleObject; + +static inline PyModuleDef* _PyModule_GetDef(PyObject *mod) { + assert(PyModule_Check(mod)); + return ((PyModuleObject *)mod)->md_def; +} + +static inline void* _PyModule_GetState(PyObject* mod) { + assert(PyModule_Check(mod)); + return ((PyModuleObject *)mod)->md_state; +} + +static inline PyObject* _PyModule_GetDict(PyObject *mod) { + assert(PyModule_Check(mod)); + PyObject *dict = ((PyModuleObject *)mod) -> md_dict; + // _PyModule_GetDict(mod) must not be used after calling module_clear(mod) + assert(dict != NULL); + return dict; // borrowed reference +} + +PyObject* _Py_module_getattro_impl(PyModuleObject *m, PyObject *name, int suppress); +PyObject* _Py_module_getattro(PyModuleObject *m, PyObject *name); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_MODULEOBJECT_H */ diff --git a/Include/internal/pycore_namespace.h b/Include/internal/pycore_namespace.h new file mode 100644 index 0000000000000000000000000000000000000000..f165cf15319a599b1aab816a6a7bca128159a6a9 --- /dev/null +++ b/Include/internal/pycore_namespace.h @@ -0,0 +1,21 @@ +// Simple namespace object interface + +#ifndef Py_INTERNAL_NAMESPACE_H +#define Py_INTERNAL_NAMESPACE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyTypeObject _PyNamespace_Type; + +// Export for '_testmultiphase' shared extension +PyAPI_FUNC(PyObject*) _PyNamespace_New(PyObject *kwds); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_NAMESPACE_H diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h new file mode 100644 index 0000000000000000000000000000000000000000..5877d43f4fd5a4d9d5b2c989c9ef9f8245805e12 --- /dev/null +++ b/Include/internal/pycore_object.h @@ -0,0 +1,867 @@ +#ifndef Py_INTERNAL_OBJECT_H +#define Py_INTERNAL_OBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include +#include "pycore_gc.h" // _PyObject_GC_IS_TRACKED() +#include "pycore_emscripten_trampoline.h" // _PyCFunction_TrampolineCall() +#include "pycore_interp.h" // PyInterpreterState.gc +#include "pycore_pyatomic_ft_wrappers.h" // FT_ATOMIC_STORE_PTR_RELAXED +#include "pycore_pystate.h" // _PyInterpreterState_GET() + + +#define _Py_IMMORTAL_REFCNT_LOOSE ((_Py_IMMORTAL_REFCNT >> 1) + 1) + +// gh-121528, gh-118997: Similar to _Py_IsImmortal() but be more loose when +// comparing the reference count to stay compatible with C extensions built +// with the stable ABI 3.11 or older. Such extensions implement INCREF/DECREF +// as refcnt++ and refcnt-- without taking in account immortal objects. For +// example, the reference count of an immortal object can change from +// _Py_IMMORTAL_REFCNT to _Py_IMMORTAL_REFCNT+1 (INCREF) or +// _Py_IMMORTAL_REFCNT-1 (DECREF). +// +// This function should only be used in assertions. Otherwise, _Py_IsImmortal() +// must be used instead. +static inline int _Py_IsImmortalLoose(PyObject *op) +{ +#if defined(Py_GIL_DISABLED) + return _Py_IsImmortal(op); +#else + return (op->ob_refcnt >= _Py_IMMORTAL_REFCNT_LOOSE); +#endif +} +#define _Py_IsImmortalLoose(op) _Py_IsImmortalLoose(_PyObject_CAST(op)) + + +/* Check if an object is consistent. For example, ensure that the reference + counter is greater than or equal to 1, and ensure that ob_type is not NULL. + + Call _PyObject_AssertFailed() if the object is inconsistent. + + If check_content is zero, only check header fields: reduce the overhead. + + The function always return 1. The return value is just here to be able to + write: + + assert(_PyObject_CheckConsistency(obj, 1)); */ +extern int _PyObject_CheckConsistency(PyObject *op, int check_content); + +extern void _PyDebugAllocatorStats(FILE *out, const char *block_name, + int num_blocks, size_t sizeof_block); + +extern void _PyObject_DebugTypeStats(FILE *out); + +#ifdef Py_TRACE_REFS +// Forget a reference registered by _Py_NewReference(). Function called by +// _Py_Dealloc(). +// +// On a free list, the function can be used before modifying an object to +// remove the object from traced objects. Then _Py_NewReference() or +// _Py_NewReferenceNoTotal() should be called again on the object to trace +// it again. +extern void _Py_ForgetReference(PyObject *); +#endif + +// Export for shared _testinternalcapi extension +PyAPI_FUNC(int) _PyObject_IsFreed(PyObject *); + +/* We need to maintain an internal copy of Py{Var}Object_HEAD_INIT to avoid + designated initializer conflicts in C++20. If we use the definition in + object.h, we will be mixing designated and non-designated initializers in + pycore objects which is forbiddent in C++20. However, if we then use + designated initializers in object.h then Extensions without designated break. + Furthermore, we can't use designated initializers in Extensions since these + are not supported pre-C++20. Thus, keeping an internal copy here is the most + backwards compatible solution */ +#if defined(Py_GIL_DISABLED) +#define _PyObject_HEAD_INIT(type) \ + { \ + .ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL, \ + .ob_type = (type) \ + } +#else +#define _PyObject_HEAD_INIT(type) \ + { \ + .ob_refcnt = _Py_IMMORTAL_REFCNT, \ + .ob_type = (type) \ + } +#endif +#define _PyVarObject_HEAD_INIT(type, size) \ + { \ + .ob_base = _PyObject_HEAD_INIT(type), \ + .ob_size = size \ + } + +PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc( + const char *func, + const char *message); + +#define _Py_FatalRefcountError(message) \ + _Py_FatalRefcountErrorFunc(__func__, (message)) + +#define _PyReftracerTrack(obj, operation) \ + do { \ + struct _reftracer_runtime_state *tracer = &_PyRuntime.ref_tracer; \ + if (tracer->tracer_func != NULL) { \ + void *data = tracer->tracer_data; \ + tracer->tracer_func((obj), (operation), data); \ + } \ + } while(0) + +#ifdef Py_REF_DEBUG +/* The symbol is only exposed in the API for the sake of extensions + built against the pre-3.12 stable ABI. */ +PyAPI_DATA(Py_ssize_t) _Py_RefTotal; + +extern void _Py_AddRefTotal(PyThreadState *, Py_ssize_t); +extern void _Py_IncRefTotal(PyThreadState *); +extern void _Py_DecRefTotal(PyThreadState *); + +# define _Py_DEC_REFTOTAL(interp) \ + interp->object_state.reftotal-- +#endif + +// Increment reference count by n +static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n) +{ + if (_Py_IsImmortal(op)) { + return; + } +#ifdef Py_REF_DEBUG + _Py_AddRefTotal(_PyThreadState_GET(), n); +#endif +#if !defined(Py_GIL_DISABLED) + op->ob_refcnt += n; +#else + if (_Py_IsOwnedByCurrentThread(op)) { + uint32_t local = op->ob_ref_local; + Py_ssize_t refcnt = (Py_ssize_t)local + n; +# if PY_SSIZE_T_MAX > UINT32_MAX + if (refcnt > (Py_ssize_t)UINT32_MAX) { + // Make the object immortal if the 32-bit local reference count + // would overflow. + refcnt = _Py_IMMORTAL_REFCNT_LOCAL; + } +# endif + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, (uint32_t)refcnt); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (n << _Py_REF_SHARED_SHIFT)); + } +#endif + // Although the ref count was increased by `n` (which may be greater than 1) + // it is only a single increment (i.e. addition) operation, so only 1 refcnt + // increment operation is counted. + _Py_INCREF_STAT_INC(); +} +#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n) + +extern void _Py_SetImmortal(PyObject *op); +extern void _Py_SetImmortalUntracked(PyObject *op); + +// Checks if an object has a single, unique reference. If the caller holds a +// unique reference, it may be able to safely modify the object in-place. +static inline int +_PyObject_IsUniquelyReferenced(PyObject *ob) +{ +#if !defined(Py_GIL_DISABLED) + return Py_REFCNT(ob) == 1; +#else + // NOTE: the entire ob_ref_shared field must be zero, including flags, to + // ensure that other threads cannot concurrently create new references to + // this object. + return (_Py_IsOwnedByCurrentThread(ob) && + _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local) == 1 && + _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared) == 0); +#endif +} + +// Makes an immortal object mortal again with the specified refcnt. Should only +// be used during runtime finalization. +static inline void _Py_SetMortal(PyObject *op, Py_ssize_t refcnt) +{ + if (op) { + assert(_Py_IsImmortalLoose(op)); +#ifdef Py_GIL_DISABLED + op->ob_tid = _Py_UNOWNED_TID; + op->ob_ref_local = 0; + op->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); +#else + op->ob_refcnt = refcnt; +#endif + } +} + +/* _Py_ClearImmortal() should only be used during runtime finalization. */ +static inline void _Py_ClearImmortal(PyObject *op) +{ + if (op) { + _Py_SetMortal(op, 1); + Py_DECREF(op); + } +} +#define _Py_ClearImmortal(op) \ + do { \ + _Py_ClearImmortal(_PyObject_CAST(op)); \ + op = NULL; \ + } while (0) + +// Mark an object as supporting deferred reference counting. This is a no-op +// in the default (with GIL) build. Objects that use deferred reference +// counting should be tracked by the GC so that they are eventually collected. +extern void _PyObject_SetDeferredRefcount(PyObject *op); + +static inline int +_PyObject_HasDeferredRefcount(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + return _PyObject_HAS_GC_BITS(op, _PyGC_BITS_DEFERRED); +#else + return 0; +#endif +} + +#if !defined(Py_GIL_DISABLED) +static inline void +_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) +{ + if (_Py_IsImmortal(op)) { + return; + } + _Py_DECREF_STAT_INC(); +#ifdef Py_REF_DEBUG + _Py_DEC_REFTOTAL(PyInterpreterState_Get()); +#endif + if (--op->ob_refcnt != 0) { + assert(op->ob_refcnt > 0); + } + else { +#ifdef Py_TRACE_REFS + _Py_ForgetReference(op); +#endif + _PyReftracerTrack(op, PyRefTracer_DESTROY); + destruct(op); + } +} + +static inline void +_Py_DECREF_NO_DEALLOC(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return; + } + _Py_DECREF_STAT_INC(); +#ifdef Py_REF_DEBUG + _Py_DEC_REFTOTAL(PyInterpreterState_Get()); +#endif + op->ob_refcnt--; +#ifdef Py_DEBUG + if (op->ob_refcnt <= 0) { + _Py_FatalRefcountError("Expected a positive remaining refcount"); + } +#endif +} + +#else +// TODO: implement Py_DECREF specializations for Py_GIL_DISABLED build +static inline void +_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct) +{ + Py_DECREF(op); +} + +static inline void +_Py_DECREF_NO_DEALLOC(PyObject *op) +{ + Py_DECREF(op); +} + +static inline int +_Py_REF_IS_MERGED(Py_ssize_t ob_ref_shared) +{ + return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_MERGED; +} + +static inline int +_Py_REF_IS_QUEUED(Py_ssize_t ob_ref_shared) +{ + return (ob_ref_shared & _Py_REF_SHARED_FLAG_MASK) == _Py_REF_QUEUED; +} + +// Merge the local and shared reference count fields and add `extra` to the +// refcount when merging. +Py_ssize_t _Py_ExplicitMergeRefcount(PyObject *op, Py_ssize_t extra); +#endif // !defined(Py_GIL_DISABLED) + +#ifdef Py_REF_DEBUG +# undef _Py_DEC_REFTOTAL +#endif + + +extern int _PyType_CheckConsistency(PyTypeObject *type); +extern int _PyDict_CheckConsistency(PyObject *mp, int check_content); + +/* Update the Python traceback of an object. This function must be called + when a memory block is reused from a free list. + + Internal function called by _Py_NewReference(). */ +extern int _PyTraceMalloc_TraceRef(PyObject *op, PyRefTracerEvent event, void*); + +// Fast inlined version of PyType_HasFeature() +static inline int +_PyType_HasFeature(PyTypeObject *type, unsigned long feature) { + return ((FT_ATOMIC_LOAD_ULONG_RELAXED(type->tp_flags) & feature) != 0); +} + +extern void _PyType_InitCache(PyInterpreterState *interp); + +extern PyStatus _PyObject_InitState(PyInterpreterState *interp); +extern void _PyObject_FiniState(PyInterpreterState *interp); +extern bool _PyRefchain_IsTraced(PyInterpreterState *interp, PyObject *obj); + +/* Inline functions trading binary compatibility for speed: + _PyObject_Init() is the fast version of PyObject_Init(), and + _PyObject_InitVar() is the fast version of PyObject_InitVar(). + + These inline functions must not be called with op=NULL. */ +static inline void +_PyObject_Init(PyObject *op, PyTypeObject *typeobj) +{ + assert(op != NULL); + Py_SET_TYPE(op, typeobj); + assert(_PyType_HasFeature(typeobj, Py_TPFLAGS_HEAPTYPE) || _Py_IsImmortalLoose(typeobj)); + Py_INCREF(typeobj); + _Py_NewReference(op); +} + +static inline void +_PyObject_InitVar(PyVarObject *op, PyTypeObject *typeobj, Py_ssize_t size) +{ + assert(op != NULL); + assert(typeobj != &PyLong_Type); + _PyObject_Init((PyObject *)op, typeobj); + Py_SET_SIZE(op, size); +} + + +/* Tell the GC to track this object. + * + * The object must not be tracked by the GC. + * + * NB: While the object is tracked by the collector, it must be safe to call the + * ob_traverse method. + * + * Internal note: interp->gc.generation0->_gc_prev doesn't have any bit flags + * because it's not object header. So we don't use _PyGCHead_PREV() and + * _PyGCHead_SET_PREV() for it to avoid unnecessary bitwise operations. + * + * See also the public PyObject_GC_Track() function. + */ +static inline void _PyObject_GC_TRACK( +// The preprocessor removes _PyObject_ASSERT_FROM() calls if NDEBUG is defined +#ifndef NDEBUG + const char *filename, int lineno, +#endif + PyObject *op) +{ + _PyObject_ASSERT_FROM(op, !_PyObject_GC_IS_TRACKED(op), + "object already tracked by the garbage collector", + filename, lineno, __func__); +#ifdef Py_GIL_DISABLED + _PyObject_SET_GC_BITS(op, _PyGC_BITS_TRACKED); +#else + PyGC_Head *gc = _Py_AS_GC(op); + _PyObject_ASSERT_FROM(op, + (gc->_gc_prev & _PyGC_PREV_MASK_COLLECTING) == 0, + "object is in generation which is garbage collected", + filename, lineno, __func__); + + PyInterpreterState *interp = _PyInterpreterState_GET(); + PyGC_Head *generation0 = interp->gc.generation0; + PyGC_Head *last = (PyGC_Head*)(generation0->_gc_prev); + _PyGCHead_SET_NEXT(last, gc); + _PyGCHead_SET_PREV(gc, last); + _PyGCHead_SET_NEXT(gc, generation0); + generation0->_gc_prev = (uintptr_t)gc; +#endif +} + +/* Tell the GC to stop tracking this object. + * + * Internal note: This may be called while GC. So _PyGC_PREV_MASK_COLLECTING + * must be cleared. But _PyGC_PREV_MASK_FINALIZED bit is kept. + * + * The object must be tracked by the GC. + * + * See also the public PyObject_GC_UnTrack() which accept an object which is + * not tracked. + */ +static inline void _PyObject_GC_UNTRACK( +// The preprocessor removes _PyObject_ASSERT_FROM() calls if NDEBUG is defined +#ifndef NDEBUG + const char *filename, int lineno, +#endif + PyObject *op) +{ + _PyObject_ASSERT_FROM(op, _PyObject_GC_IS_TRACKED(op), + "object not tracked by the garbage collector", + filename, lineno, __func__); + +#ifdef Py_GIL_DISABLED + _PyObject_CLEAR_GC_BITS(op, _PyGC_BITS_TRACKED); +#else + PyGC_Head *gc = _Py_AS_GC(op); + PyGC_Head *prev = _PyGCHead_PREV(gc); + PyGC_Head *next = _PyGCHead_NEXT(gc); + _PyGCHead_SET_NEXT(prev, next); + _PyGCHead_SET_PREV(next, prev); + gc->_gc_next = 0; + gc->_gc_prev &= _PyGC_PREV_MASK_FINALIZED; +#endif +} + +// Macros to accept any type for the parameter, and to automatically pass +// the filename and the filename (if NDEBUG is not defined) where the macro +// is called. +#ifdef NDEBUG +# define _PyObject_GC_TRACK(op) \ + _PyObject_GC_TRACK(_PyObject_CAST(op)) +# define _PyObject_GC_UNTRACK(op) \ + _PyObject_GC_UNTRACK(_PyObject_CAST(op)) +#else +# define _PyObject_GC_TRACK(op) \ + _PyObject_GC_TRACK(__FILE__, __LINE__, _PyObject_CAST(op)) +# define _PyObject_GC_UNTRACK(op) \ + _PyObject_GC_UNTRACK(__FILE__, __LINE__, _PyObject_CAST(op)) +#endif + +#ifdef Py_GIL_DISABLED + +/* Tries to increment an object's reference count + * + * This is a specialized version of _Py_TryIncref that only succeeds if the + * object is immortal or local to this thread. It does not handle the case + * where the reference count modification requires an atomic operation. This + * allows call sites to specialize for the immortal/local case. + */ +static inline int +_Py_TryIncrefFast(PyObject *op) { + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + local += 1; + if (local == 0) { + // immortal + return 1; + } + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_INCREF_STAT_INC(); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + return 1; + } + return 0; +} + +static inline int +_Py_TryIncRefShared(PyObject *op) +{ + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + for (;;) { + // If the shared refcount is zero and the object is either merged + // or may not have weak references, then we cannot incref it. + if (shared == 0 || shared == _Py_REF_MERGED) { + return 0; + } + + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, + &shared, + shared + (1 << _Py_REF_SHARED_SHIFT))) { +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + _Py_INCREF_STAT_INC(); + return 1; + } + } +} + +/* Tries to incref the object op and ensures that *src still points to it. */ +static inline int +_Py_TryIncrefCompare(PyObject **src, PyObject *op) +{ + if (_Py_TryIncrefFast(op)) { + return 1; + } + if (!_Py_TryIncRefShared(op)) { + return 0; + } + if (op != _Py_atomic_load_ptr(src)) { + Py_DECREF(op); + return 0; + } + return 1; +} + +/* Loads and increfs an object from ptr, which may contain a NULL value. + Safe with concurrent (atomic) updates to ptr. + NOTE: The writer must set maybe-weakref on the stored object! */ +static inline PyObject * +_Py_XGetRef(PyObject **ptr) +{ + for (;;) { + PyObject *value = _Py_atomic_load_ptr(ptr); + if (value == NULL) { + return value; + } + if (_Py_TryIncrefCompare(ptr, value)) { + return value; + } + } +} + +/* Attempts to loads and increfs an object from ptr. Returns NULL + on failure, which may be due to a NULL value or a concurrent update. */ +static inline PyObject * +_Py_TryXGetRef(PyObject **ptr) +{ + PyObject *value = _Py_atomic_load_ptr(ptr); + if (value == NULL) { + return value; + } + if (_Py_TryIncrefCompare(ptr, value)) { + return value; + } + return NULL; +} + +/* Like Py_NewRef but also optimistically sets _Py_REF_MAYBE_WEAKREF + on objects owned by a different thread. */ +static inline PyObject * +_Py_NewRefWithLock(PyObject *op) +{ + if (_Py_TryIncrefFast(op)) { + return op; + } +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif + _Py_INCREF_STAT_INC(); + for (;;) { + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + Py_ssize_t new_shared = shared + (1 << _Py_REF_SHARED_SHIFT); + if ((shared & _Py_REF_SHARED_FLAG_MASK) == 0) { + new_shared |= _Py_REF_MAYBE_WEAKREF; + } + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, + &shared, + new_shared)) { + return op; + } + } +} + +static inline PyObject * +_Py_XNewRefWithLock(PyObject *obj) +{ + if (obj == NULL) { + return NULL; + } + return _Py_NewRefWithLock(obj); +} + +static inline void +_PyObject_SetMaybeWeakref(PyObject *op) +{ + if (_Py_IsImmortal(op)) { + return; + } + for (;;) { + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&op->ob_ref_shared); + if ((shared & _Py_REF_SHARED_FLAG_MASK) != 0) { + // Nothing to do if it's in WEAKREFS, QUEUED, or MERGED states. + return; + } + if (_Py_atomic_compare_exchange_ssize( + &op->ob_ref_shared, &shared, shared | _Py_REF_MAYBE_WEAKREF)) { + return; + } + } +} + +extern int _PyObject_ResurrectEndSlow(PyObject *op); +#endif + +// Temporarily resurrects an object during deallocation. The refcount is set +// to one. +static inline void +_PyObject_ResurrectStart(PyObject *op) +{ + assert(Py_REFCNT(op) == 0); +#ifdef Py_REF_DEBUG + _Py_IncRefTotal(_PyThreadState_GET()); +#endif +#ifdef Py_GIL_DISABLED + _Py_atomic_store_uintptr_relaxed(&op->ob_tid, _Py_ThreadId()); + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 1); + _Py_atomic_store_ssize_relaxed(&op->ob_ref_shared, 0); +#else + Py_SET_REFCNT(op, 1); +#endif +} + +// Undoes an object resurrection by decrementing the refcount without calling +// _Py_Dealloc(). Returns 0 if the object is dead (the normal case), and +// deallocation should continue. Returns 1 if the object is still alive. +static inline int +_PyObject_ResurrectEnd(PyObject *op) +{ +#ifdef Py_REF_DEBUG + _Py_DecRefTotal(_PyThreadState_GET()); +#endif +#ifndef Py_GIL_DISABLED + Py_SET_REFCNT(op, Py_REFCNT(op) - 1); + return Py_REFCNT(op) != 0; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + Py_ssize_t shared = _Py_atomic_load_ssize_acquire(&op->ob_ref_shared); + if (_Py_IsOwnedByCurrentThread(op) && local == 1 && shared == 0) { + // Fast-path: object has a single refcount and is owned by this thread + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, 0); + return 0; + } + // Slow-path: object has a shared refcount or is not owned by this thread + return _PyObject_ResurrectEndSlow(op); +#endif +} + +/* Tries to incref op and returns 1 if successful or 0 otherwise. */ +static inline int +_Py_TryIncref(PyObject *op) +{ +#ifdef Py_GIL_DISABLED + return _Py_TryIncrefFast(op) || _Py_TryIncRefShared(op); +#else + if (Py_REFCNT(op) > 0) { + Py_INCREF(op); + return 1; + } + return 0; +#endif +} + +#ifdef Py_REF_DEBUG +extern void _PyInterpreterState_FinalizeRefTotal(PyInterpreterState *); +extern void _Py_FinalizeRefTotal(_PyRuntimeState *); +extern void _PyDebug_PrintTotalRefs(void); +#endif + +#ifdef Py_TRACE_REFS +extern void _Py_AddToAllObjects(PyObject *op); +extern void _Py_PrintReferences(PyInterpreterState *, FILE *); +extern void _Py_PrintReferenceAddresses(PyInterpreterState *, FILE *); +#endif + + +/* Return the *address* of the object's weaklist. The address may be + * dereferenced to get the current head of the weaklist. This is useful + * for iterating over the linked list of weakrefs, especially when the + * list is being modified externally (e.g. refs getting removed). + * + * The returned pointer should not be used to change the head of the list + * nor should it be used to add, remove, or swap any refs in the list. + * That is the sole responsibility of the code in weakrefobject.c. + */ +static inline PyObject ** +_PyObject_GET_WEAKREFS_LISTPTR(PyObject *op) +{ + if (PyType_Check(op) && + ((PyTypeObject *)op)->tp_flags & _Py_TPFLAGS_STATIC_BUILTIN) { + PyInterpreterState *interp = _PyInterpreterState_GET(); + managed_static_type_state *state = _PyStaticType_GetState( + interp, (PyTypeObject *)op); + return _PyStaticType_GET_WEAKREFS_LISTPTR(state); + } + // Essentially _PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(): + Py_ssize_t offset = Py_TYPE(op)->tp_weaklistoffset; + return (PyObject **)((char *)op + offset); +} + +/* This is a special case of _PyObject_GET_WEAKREFS_LISTPTR(). + * Only the most fundamental lookup path is used. + * Consequently, static types should not be used. + * + * For static builtin types the returned pointer will always point + * to a NULL tp_weaklist. This is fine for any deallocation cases, + * since static types are never deallocated and static builtin types + * are only finalized at the end of runtime finalization. + * + * If the weaklist for static types is actually needed then use + * _PyObject_GET_WEAKREFS_LISTPTR(). + */ +static inline PyWeakReference ** +_PyObject_GET_WEAKREFS_LISTPTR_FROM_OFFSET(PyObject *op) +{ + assert(!PyType_Check(op) || + ((PyTypeObject *)op)->tp_flags & Py_TPFLAGS_HEAPTYPE); + Py_ssize_t offset = Py_TYPE(op)->tp_weaklistoffset; + return (PyWeakReference **)((char *)op + offset); +} + +// Fast inlined version of PyObject_IS_GC() +static inline int +_PyObject_IS_GC(PyObject *obj) +{ + PyTypeObject *type = Py_TYPE(obj); + return (PyType_IS_GC(type) + && (type->tp_is_gc == NULL || type->tp_is_gc(obj))); +} + +// Fast inlined version of PyObject_Hash() +static inline Py_hash_t +_PyObject_HashFast(PyObject *op) +{ + if (PyUnicode_CheckExact(op)) { + Py_hash_t hash = FT_ATOMIC_LOAD_SSIZE_RELAXED( + _PyASCIIObject_CAST(op)->hash); + if (hash != -1) { + return hash; + } + } + return PyObject_Hash(op); +} + +// Fast inlined version of PyType_IS_GC() +#define _PyType_IS_GC(t) _PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) + +static inline size_t +_PyType_PreHeaderSize(PyTypeObject *tp) +{ + return ( +#ifndef Py_GIL_DISABLED + _PyType_IS_GC(tp) * sizeof(PyGC_Head) + +#endif + _PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER) * 2 * sizeof(PyObject *) + ); +} + +void _PyObject_GC_Link(PyObject *op); + +// Usage: assert(_Py_CheckSlotResult(obj, "__getitem__", result != NULL)); +extern int _Py_CheckSlotResult( + PyObject *obj, + const char *slot_name, + int success); + +// Test if a type supports weak references +static inline int _PyType_SUPPORTS_WEAKREFS(PyTypeObject *type) { + return (type->tp_weaklistoffset != 0); +} + +extern PyObject* _PyType_AllocNoTrack(PyTypeObject *type, Py_ssize_t nitems); +extern PyObject *_PyType_NewManagedObject(PyTypeObject *type); + +extern PyTypeObject* _PyType_CalculateMetaclass(PyTypeObject *, PyObject *); +extern PyObject* _PyType_GetDocFromInternalDoc(const char *, const char *); +extern PyObject* _PyType_GetTextSignatureFromInternalDoc(const char *, const char *, int); +extern int _PyObject_SetAttributeErrorContext(PyObject *v, PyObject* name); + +void _PyObject_InitInlineValues(PyObject *obj, PyTypeObject *tp); +extern int _PyObject_StoreInstanceAttribute(PyObject *obj, + PyObject *name, PyObject *value); +extern bool _PyObject_TryGetInstanceAttribute(PyObject *obj, PyObject *name, + PyObject **attr); + +#ifdef Py_GIL_DISABLED +# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-1) +# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-2) +#else +# define MANAGED_DICT_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-3) +# define MANAGED_WEAKREF_OFFSET (((Py_ssize_t)sizeof(PyObject *))*-4) +#endif + +typedef union { + PyDictObject *dict; +} PyManagedDictPointer; + +static inline PyManagedDictPointer * +_PyObject_ManagedDictPointer(PyObject *obj) +{ + assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + return (PyManagedDictPointer *)((char *)obj + MANAGED_DICT_OFFSET); +} + +static inline PyDictObject * +_PyObject_GetManagedDict(PyObject *obj) +{ + PyManagedDictPointer *dorv = _PyObject_ManagedDictPointer(obj); + return (PyDictObject *)FT_ATOMIC_LOAD_PTR_ACQUIRE(dorv->dict); +} + +static inline PyDictValues * +_PyObject_InlineValues(PyObject *obj) +{ + assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_INLINE_VALUES); + assert(Py_TYPE(obj)->tp_flags & Py_TPFLAGS_MANAGED_DICT); + assert(Py_TYPE(obj)->tp_basicsize == sizeof(PyObject)); + return (PyDictValues *)((char *)obj + sizeof(PyObject)); +} + +extern PyObject ** _PyObject_ComputedDictPointer(PyObject *); +extern int _PyObject_IsInstanceDictEmpty(PyObject *); + +// Export for 'math' shared extension +PyAPI_FUNC(PyObject*) _PyObject_LookupSpecial(PyObject *, PyObject *); + +extern int _PyObject_IsAbstract(PyObject *); + +PyAPI_FUNC(int) _PyObject_GetMethod(PyObject *obj, PyObject *name, PyObject **method); +extern PyObject* _PyObject_NextNotImplemented(PyObject *); + +// Pickle support. +// Export for '_datetime' shared extension +PyAPI_FUNC(PyObject*) _PyObject_GetState(PyObject *); + +/* C function call trampolines to mitigate bad function pointer casts. + * + * Typical native ABIs ignore additional arguments or fill in missing + * values with 0/NULL in function pointer cast. Compilers do not show + * warnings when a function pointer is explicitly casted to an + * incompatible type. + * + * Bad fpcasts are an issue in WebAssembly. WASM's indirect_call has strict + * function signature checks. Argument count, types, and return type must + * match. + * + * Third party code unintentionally rely on problematic fpcasts. The call + * trampoline mitigates common occurrences of bad fpcasts on Emscripten. + */ +#if !(defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE)) +#define _PyCFunction_TrampolineCall(meth, self, args) \ + (meth)((self), (args)) +#define _PyCFunctionWithKeywords_TrampolineCall(meth, self, args, kw) \ + (meth)((self), (args), (kw)) +#endif // __EMSCRIPTEN__ && PY_CALL_TRAMPOLINE + +// Export these 2 symbols for '_pickle' shared extension +PyAPI_DATA(PyTypeObject) _PyNone_Type; +PyAPI_DATA(PyTypeObject) _PyNotImplemented_Type; + +// Maps Py_LT to Py_GT, ..., Py_GE to Py_LE. +// Export for the stable ABI. +PyAPI_DATA(int) _Py_SwappedOp[]; + +extern void _Py_GetConstant_Init(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OBJECT_H */ diff --git a/Include/internal/pycore_object_alloc.h b/Include/internal/pycore_object_alloc.h new file mode 100644 index 0000000000000000000000000000000000000000..8cc7a444bc93e7141702a944c6fbce4fa8bc0961 --- /dev/null +++ b/Include/internal/pycore_object_alloc.h @@ -0,0 +1,71 @@ +#ifndef Py_INTERNAL_OBJECT_ALLOC_H +#define Py_INTERNAL_OBJECT_ALLOC_H + +#include "pycore_object.h" // _PyType_HasFeature() +#include "pycore_pystate.h" // _PyThreadState_GET() +#include "pycore_tstate.h" // _PyThreadStateImpl + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED +static inline mi_heap_t * +_PyObject_GetAllocationHeap(_PyThreadStateImpl *tstate, PyTypeObject *tp) +{ + struct _mimalloc_thread_state *m = &tstate->mimalloc; + if (_PyType_HasFeature(tp, Py_TPFLAGS_PREHEADER)) { + return &m->heaps[_Py_MIMALLOC_HEAP_GC_PRE]; + } + else if (_PyType_IS_GC(tp)) { + return &m->heaps[_Py_MIMALLOC_HEAP_GC]; + } + else { + return &m->heaps[_Py_MIMALLOC_HEAP_OBJECT]; + } +} +#endif + +// Sets the heap used for PyObject_Malloc(), PyObject_Realloc(), etc. calls in +// Py_GIL_DISABLED builds. We use different heaps depending on if the object +// supports GC and if it has a pre-header. We smuggle the choice of heap +// through the _mimalloc_thread_state. In the default build, this simply +// calls PyObject_Malloc(). +static inline void * +_PyObject_MallocWithType(PyTypeObject *tp, size_t size) +{ +#ifdef Py_GIL_DISABLED + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + struct _mimalloc_thread_state *m = &tstate->mimalloc; + m->current_object_heap = _PyObject_GetAllocationHeap(tstate, tp); +#endif + void *mem = PyObject_Malloc(size); +#ifdef Py_GIL_DISABLED + m->current_object_heap = &m->heaps[_Py_MIMALLOC_HEAP_OBJECT]; +#endif + return mem; +} + +static inline void * +_PyObject_ReallocWithType(PyTypeObject *tp, void *ptr, size_t size) +{ +#ifdef Py_GIL_DISABLED + _PyThreadStateImpl *tstate = (_PyThreadStateImpl *)_PyThreadState_GET(); + struct _mimalloc_thread_state *m = &tstate->mimalloc; + m->current_object_heap = _PyObject_GetAllocationHeap(tstate, tp); +#endif + void *mem = PyObject_Realloc(ptr, size); +#ifdef Py_GIL_DISABLED + m->current_object_heap = &m->heaps[_Py_MIMALLOC_HEAP_OBJECT]; +#endif + return mem; +} + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBJECT_ALLOC_H diff --git a/Include/internal/pycore_object_stack.h b/Include/internal/pycore_object_stack.h new file mode 100644 index 0000000000000000000000000000000000000000..639f3c0c0d0b766290f1f4dfed44115fc2aacfcc --- /dev/null +++ b/Include/internal/pycore_object_stack.h @@ -0,0 +1,97 @@ +#ifndef Py_INTERNAL_OBJECT_STACK_H +#define Py_INTERNAL_OBJECT_STACK_H + +#include "pycore_freelist.h" // _PyFreeListState + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// _PyObjectStack is a stack of Python objects implemented as a linked list of +// fixed size buffers. + +// Chosen so that _PyObjectStackChunk is a power-of-two size. +#define _Py_OBJECT_STACK_CHUNK_SIZE 254 + +typedef struct _PyObjectStackChunk { + struct _PyObjectStackChunk *prev; + Py_ssize_t n; + PyObject *objs[_Py_OBJECT_STACK_CHUNK_SIZE]; +} _PyObjectStackChunk; + +typedef struct _PyObjectStack { + _PyObjectStackChunk *head; +} _PyObjectStack; + + +extern _PyObjectStackChunk * +_PyObjectStackChunk_New(void); + +extern void +_PyObjectStackChunk_Free(_PyObjectStackChunk *); + +// Push an item onto the stack. Return -1 on allocation failure, 0 on success. +static inline int +_PyObjectStack_Push(_PyObjectStack *stack, PyObject *obj) +{ + _PyObjectStackChunk *buf = stack->head; + if (buf == NULL || buf->n == _Py_OBJECT_STACK_CHUNK_SIZE) { + buf = _PyObjectStackChunk_New(); + if (buf == NULL) { + return -1; + } + buf->prev = stack->head; + buf->n = 0; + stack->head = buf; + } + + assert(buf->n >= 0 && buf->n < _Py_OBJECT_STACK_CHUNK_SIZE); + buf->objs[buf->n] = obj; + buf->n++; + return 0; +} + +// Pop the top item from the stack. Return NULL if the stack is empty. +static inline PyObject * +_PyObjectStack_Pop(_PyObjectStack *stack) +{ + _PyObjectStackChunk *buf = stack->head; + if (buf == NULL) { + return NULL; + } + assert(buf->n > 0 && buf->n <= _Py_OBJECT_STACK_CHUNK_SIZE); + buf->n--; + PyObject *obj = buf->objs[buf->n]; + if (buf->n == 0) { + stack->head = buf->prev; + _PyObjectStackChunk_Free(buf); + } + return obj; +} + +static inline Py_ssize_t +_PyObjectStack_Size(_PyObjectStack *stack) +{ + Py_ssize_t size = 0; + for (_PyObjectStackChunk *buf = stack->head; buf != NULL; buf = buf->prev) { + size += buf->n; + } + return size; +} + +// Merge src into dst, leaving src empty +extern void +_PyObjectStack_Merge(_PyObjectStack *dst, _PyObjectStack *src); + +// Remove all items from the stack +extern void +_PyObjectStack_Clear(_PyObjectStack *stack); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBJECT_STACK_H diff --git a/Include/internal/pycore_object_state.h b/Include/internal/pycore_object_state.h new file mode 100644 index 0000000000000000000000000000000000000000..cd7c9335b3e611c23b786d551d3fb9ab977c015f --- /dev/null +++ b/Include/internal/pycore_object_state.h @@ -0,0 +1,41 @@ +#ifndef Py_INTERNAL_OBJECT_STATE_H +#define Py_INTERNAL_OBJECT_STATE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyObject_freelists +#include "pycore_hashtable.h" // _Py_hashtable_t + +struct _py_object_runtime_state { +#ifdef Py_REF_DEBUG + Py_ssize_t interpreter_leaks; +#endif + int _not_used; +}; + +struct _py_object_state { +#if !defined(Py_GIL_DISABLED) + struct _Py_object_freelists freelists; +#endif +#ifdef Py_REF_DEBUG + Py_ssize_t reftotal; +#endif +#ifdef Py_TRACE_REFS + // Hash table storing all objects. The key is the object pointer + // (PyObject*) and the value is always the number 1 (as uintptr_t). + // See _PyRefchain_IsTraced() and _PyRefchain_Trace() functions. + _Py_hashtable_t *refchain; +#endif + int _not_used; +}; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OBJECT_STATE_H */ diff --git a/Include/internal/pycore_obmalloc.h b/Include/internal/pycore_obmalloc.h new file mode 100644 index 0000000000000000000000000000000000000000..9140d8f08f0af1e677a5234187050737ccb71955 --- /dev/null +++ b/Include/internal/pycore_obmalloc.h @@ -0,0 +1,702 @@ +#ifndef Py_INTERNAL_OBMALLOC_H +#define Py_INTERNAL_OBMALLOC_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +typedef unsigned int pymem_uint; /* assuming >= 16 bits */ + +#undef uint +#define uint pymem_uint + + +/* An object allocator for Python. + + Here is an introduction to the layers of the Python memory architecture, + showing where the object allocator is actually used (layer +2), It is + called for every object allocation and deallocation (PyObject_New/Del), + unless the object-specific allocators implement a proprietary allocation + scheme (ex.: ints use a simple free list). This is also the place where + the cyclic garbage collector operates selectively on container objects. + + + Object-specific allocators + _____ ______ ______ ________ + [ int ] [ dict ] [ list ] ... [ string ] Python core | ++3 | <----- Object-specific memory -----> | <-- Non-object memory --> | + _______________________________ | | + [ Python's object allocator ] | | ++2 | ####### Object memory ####### | <------ Internal buffers ------> | + ______________________________________________________________ | + [ Python's raw memory allocator (PyMem_ API) ] | ++1 | <----- Python memory (under PyMem manager's control) ------> | | + __________________________________________________________________ + [ Underlying general-purpose allocator (ex: C library malloc) ] + 0 | <------ Virtual memory allocated for the python process -------> | + + ========================================================================= + _______________________________________________________________________ + [ OS-specific Virtual Memory Manager (VMM) ] +-1 | <--- Kernel dynamic storage allocation & management (page-based) ---> | + __________________________________ __________________________________ + [ ] [ ] +-2 | <-- Physical memory: ROM/RAM --> | | <-- Secondary storage (swap) --> | + +*/ +/*==========================================================================*/ + +/* A fast, special-purpose memory allocator for small blocks, to be used + on top of a general-purpose malloc -- heavily based on previous art. */ + +/* Vladimir Marangozov -- August 2000 */ + +/* + * "Memory management is where the rubber meets the road -- if we do the wrong + * thing at any level, the results will not be good. And if we don't make the + * levels work well together, we are in serious trouble." (1) + * + * (1) Paul R. Wilson, Mark S. Johnstone, Michael Neely, and David Boles, + * "Dynamic Storage Allocation: A Survey and Critical Review", + * in Proc. 1995 Int'l. Workshop on Memory Management, September 1995. + */ + +/* #undef WITH_MEMORY_LIMITS */ /* disable mem limit checks */ + +/*==========================================================================*/ + +/* + * Allocation strategy abstract: + * + * For small requests, the allocator sub-allocates blocks of memory. + * Requests greater than SMALL_REQUEST_THRESHOLD bytes are routed to the + * system's allocator. + * + * Small requests are grouped in size classes spaced 8 bytes apart, due + * to the required valid alignment of the returned address. Requests of + * a particular size are serviced from memory pools of 4K (one VMM page). + * Pools are fragmented on demand and contain free lists of blocks of one + * particular size class. In other words, there is a fixed-size allocator + * for each size class. Free pools are shared by the different allocators + * thus minimizing the space reserved for a particular size class. + * + * This allocation strategy is a variant of what is known as "simple + * segregated storage based on array of free lists". The main drawback of + * simple segregated storage is that we might end up with lot of reserved + * memory for the different free lists, which degenerate in time. To avoid + * this, we partition each free list in pools and we share dynamically the + * reserved space between all free lists. This technique is quite efficient + * for memory intensive programs which allocate mainly small-sized blocks. + * + * For small requests we have the following table: + * + * Request in bytes Size of allocated block Size class idx + * ---------------------------------------------------------------- + * 1-8 8 0 + * 9-16 16 1 + * 17-24 24 2 + * 25-32 32 3 + * 33-40 40 4 + * 41-48 48 5 + * 49-56 56 6 + * 57-64 64 7 + * 65-72 72 8 + * ... ... ... + * 497-504 504 62 + * 505-512 512 63 + * + * 0, SMALL_REQUEST_THRESHOLD + 1 and up: routed to the underlying + * allocator. + */ + +/*==========================================================================*/ + +/* + * -- Main tunable settings section -- + */ + +/* + * Alignment of addresses returned to the user. 8-bytes alignment works + * on most current architectures (with 32-bit or 64-bit address buses). + * The alignment value is also used for grouping small requests in size + * classes spaced ALIGNMENT bytes apart. + * + * You shouldn't change this unless you know what you are doing. + */ + +#if SIZEOF_VOID_P > 4 +#define ALIGNMENT 16 /* must be 2^N */ +#define ALIGNMENT_SHIFT 4 +#else +#define ALIGNMENT 8 /* must be 2^N */ +#define ALIGNMENT_SHIFT 3 +#endif + +/* Return the number of bytes in size class I, as a uint. */ +#define INDEX2SIZE(I) (((pymem_uint)(I) + 1) << ALIGNMENT_SHIFT) + +/* + * Max size threshold below which malloc requests are considered to be + * small enough in order to use preallocated memory pools. You can tune + * this value according to your application behaviour and memory needs. + * + * Note: a size threshold of 512 guarantees that newly created dictionaries + * will be allocated from preallocated memory pools on 64-bit. + * + * The following invariants must hold: + * 1) ALIGNMENT <= SMALL_REQUEST_THRESHOLD <= 512 + * 2) SMALL_REQUEST_THRESHOLD is evenly divisible by ALIGNMENT + * + * Although not required, for better performance and space efficiency, + * it is recommended that SMALL_REQUEST_THRESHOLD is set to a power of 2. + */ +#define SMALL_REQUEST_THRESHOLD 512 +#define NB_SMALL_SIZE_CLASSES (SMALL_REQUEST_THRESHOLD / ALIGNMENT) + +/* + * The system's VMM page size can be obtained on most unices with a + * getpagesize() call or deduced from various header files. To make + * things simpler, we assume that it is 4K, which is OK for most systems. + * It is probably better if this is the native page size, but it doesn't + * have to be. In theory, if SYSTEM_PAGE_SIZE is larger than the native page + * size, then `POOL_ADDR(p)->arenaindex' could rarely cause a segmentation + * violation fault. 4K is apparently OK for all the platforms that python + * currently targets. + */ +#define SYSTEM_PAGE_SIZE (4 * 1024) + +/* + * Maximum amount of memory managed by the allocator for small requests. + */ +#ifdef WITH_MEMORY_LIMITS +#ifndef SMALL_MEMORY_LIMIT +#define SMALL_MEMORY_LIMIT (64 * 1024 * 1024) /* 64 MB -- more? */ +#endif +#endif + +#if !defined(WITH_PYMALLOC_RADIX_TREE) +/* Use radix-tree to track arena memory regions, for address_in_range(). + * Enable by default since it allows larger pool sizes. Can be disabled + * using -DWITH_PYMALLOC_RADIX_TREE=0 */ +#define WITH_PYMALLOC_RADIX_TREE 1 +#endif + +#if SIZEOF_VOID_P > 4 +/* on 64-bit platforms use larger pools and arenas if we can */ +#define USE_LARGE_ARENAS +#if WITH_PYMALLOC_RADIX_TREE +/* large pools only supported if radix-tree is enabled */ +#define USE_LARGE_POOLS +#endif +#endif + +/* + * The allocator sub-allocates blocks of memory (called arenas) aligned + * on a page boundary. This is a reserved virtual address space for the + * current process (obtained through a malloc()/mmap() call). In no way this + * means that the memory arenas will be used entirely. A malloc() is + * usually an address range reservation for bytes, unless all pages within + * this space are referenced subsequently. So malloc'ing big blocks and not + * using them does not mean "wasting memory". It's an addressable range + * wastage... + * + * Arenas are allocated with mmap() on systems supporting anonymous memory + * mappings to reduce heap fragmentation. + */ +#ifdef USE_LARGE_ARENAS +#define ARENA_BITS 20 /* 1 MiB */ +#else +#define ARENA_BITS 18 /* 256 KiB */ +#endif +#define ARENA_SIZE (1 << ARENA_BITS) +#define ARENA_SIZE_MASK (ARENA_SIZE - 1) + +#ifdef WITH_MEMORY_LIMITS +#define MAX_ARENAS (SMALL_MEMORY_LIMIT / ARENA_SIZE) +#endif + +/* + * Size of the pools used for small blocks. Must be a power of 2. + */ +#ifdef USE_LARGE_POOLS +#define POOL_BITS 14 /* 16 KiB */ +#else +#define POOL_BITS 12 /* 4 KiB */ +#endif +#define POOL_SIZE (1 << POOL_BITS) +#define POOL_SIZE_MASK (POOL_SIZE - 1) + +#if !WITH_PYMALLOC_RADIX_TREE +#if POOL_SIZE != SYSTEM_PAGE_SIZE +# error "pool size must be equal to system page size" +#endif +#endif + +#define MAX_POOLS_IN_ARENA (ARENA_SIZE / POOL_SIZE) +#if MAX_POOLS_IN_ARENA * POOL_SIZE != ARENA_SIZE +# error "arena size not an exact multiple of pool size" +#endif + +/* + * -- End of tunable settings section -- + */ + +/*==========================================================================*/ + +/* When you say memory, my mind reasons in terms of (pointers to) blocks */ +typedef uint8_t pymem_block; + +/* Pool for small blocks. */ +struct pool_header { + union { pymem_block *_padding; + uint count; } ref; /* number of allocated blocks */ + pymem_block *freeblock; /* pool's free list head */ + struct pool_header *nextpool; /* next pool of this size class */ + struct pool_header *prevpool; /* previous pool "" */ + uint arenaindex; /* index into arenas of base adr */ + uint szidx; /* block size class index */ + uint nextoffset; /* bytes to virgin block */ + uint maxnextoffset; /* largest valid nextoffset */ +}; + +typedef struct pool_header *poolp; + +/* Record keeping for arenas. */ +struct arena_object { + /* The address of the arena, as returned by malloc. Note that 0 + * will never be returned by a successful malloc, and is used + * here to mark an arena_object that doesn't correspond to an + * allocated arena. + */ + uintptr_t address; + + /* Pool-aligned pointer to the next pool to be carved off. */ + pymem_block* pool_address; + + /* The number of available pools in the arena: free pools + never- + * allocated pools. + */ + uint nfreepools; + + /* The total number of pools in the arena, whether or not available. */ + uint ntotalpools; + + /* Singly-linked list of available pools. */ + struct pool_header* freepools; + + /* Whenever this arena_object is not associated with an allocated + * arena, the nextarena member is used to link all unassociated + * arena_objects in the singly-linked `unused_arena_objects` list. + * The prevarena member is unused in this case. + * + * When this arena_object is associated with an allocated arena + * with at least one available pool, both members are used in the + * doubly-linked `usable_arenas` list, which is maintained in + * increasing order of `nfreepools` values. + * + * Else this arena_object is associated with an allocated arena + * all of whose pools are in use. `nextarena` and `prevarena` + * are both meaningless in this case. + */ + struct arena_object* nextarena; + struct arena_object* prevarena; +}; + +#define POOL_OVERHEAD _Py_SIZE_ROUND_UP(sizeof(struct pool_header), ALIGNMENT) + +#define DUMMY_SIZE_IDX 0xffff /* size class of newly cached pools */ + +/* Round pointer P down to the closest pool-aligned address <= P, as a poolp */ +#define POOL_ADDR(P) ((poolp)_Py_ALIGN_DOWN((P), POOL_SIZE)) + +/* Return total number of blocks in pool of size index I, as a uint. */ +#define NUMBLOCKS(I) ((pymem_uint)(POOL_SIZE - POOL_OVERHEAD) / INDEX2SIZE(I)) + +/*==========================================================================*/ + +/* + * Pool table -- headed, circular, doubly-linked lists of partially used pools. + +This is involved. For an index i, usedpools[i+i] is the header for a list of +all partially used pools holding small blocks with "size class idx" i. So +usedpools[0] corresponds to blocks of size 8, usedpools[2] to blocks of size +16, and so on: index 2*i <-> blocks of size (i+1)<freeblock points to +the start of a singly-linked list of free blocks within the pool. When a +block is freed, it's inserted at the front of its pool's freeblock list. Note +that the available blocks in a pool are *not* linked all together when a pool +is initialized. Instead only "the first two" (lowest addresses) blocks are +set up, returning the first such block, and setting pool->freeblock to a +one-block list holding the second such block. This is consistent with that +pymalloc strives at all levels (arena, pool, and block) never to touch a piece +of memory until it's actually needed. + +So long as a pool is in the used state, we're certain there *is* a block +available for allocating, and pool->freeblock is not NULL. If pool->freeblock +points to the end of the free list before we've carved the entire pool into +blocks, that means we simply haven't yet gotten to one of the higher-address +blocks. The offset from the pool_header to the start of "the next" virgin +block is stored in the pool_header nextoffset member, and the largest value +of nextoffset that makes sense is stored in the maxnextoffset member when a +pool is initialized. All the blocks in a pool have been passed out at least +once when and only when nextoffset > maxnextoffset. + + +Major obscurity: While the usedpools vector is declared to have poolp +entries, it doesn't really. It really contains two pointers per (conceptual) +poolp entry, the nextpool and prevpool members of a pool_header. The +excruciating initialization code below fools C so that + + usedpool[i+i] + +"acts like" a genuine poolp, but only so long as you only reference its +nextpool and prevpool members. The "- 2*sizeof(pymem_block *)" gibberish is +compensating for that a pool_header's nextpool and prevpool members +immediately follow a pool_header's first two members: + + union { pymem_block *_padding; + uint count; } ref; + pymem_block *freeblock; + +each of which consume sizeof(pymem_block *) bytes. So what usedpools[i+i] really +contains is a fudged-up pointer p such that *if* C believes it's a poolp +pointer, then p->nextpool and p->prevpool are both p (meaning that the headed +circular list is empty). + +It's unclear why the usedpools setup is so convoluted. It could be to +minimize the amount of cache required to hold this heavily-referenced table +(which only *needs* the two interpool pointer members of a pool_header). OTOH, +referencing code has to remember to "double the index" and doing so isn't +free, usedpools[0] isn't a strictly legal pointer, and we're crucially relying +on that C doesn't insert any padding anywhere in a pool_header at or before +the prevpool member. +**************************************************************************** */ + +#define OBMALLOC_USED_POOLS_SIZE (2 * ((NB_SMALL_SIZE_CLASSES + 7) / 8) * 8) + +struct _obmalloc_pools { + poolp used[OBMALLOC_USED_POOLS_SIZE]; +}; + + +/*========================================================================== +Arena management. + +`arenas` is a vector of arena_objects. It contains maxarenas entries, some of +which may not be currently used (== they're arena_objects that aren't +currently associated with an allocated arena). Note that arenas proper are +separately malloc'ed. + +Prior to Python 2.5, arenas were never free()'ed. Starting with Python 2.5, +we do try to free() arenas, and use some mild heuristic strategies to increase +the likelihood that arenas eventually can be freed. + +unused_arena_objects + + This is a singly-linked list of the arena_objects that are currently not + being used (no arena is associated with them). Objects are taken off the + head of the list in new_arena(), and are pushed on the head of the list in + PyObject_Free() when the arena is empty. Key invariant: an arena_object + is on this list if and only if its .address member is 0. + +usable_arenas + + This is a doubly-linked list of the arena_objects associated with arenas + that have pools available. These pools are either waiting to be reused, + or have not been used before. The list is sorted to have the most- + allocated arenas first (ascending order based on the nfreepools member). + This means that the next allocation will come from a heavily used arena, + which gives the nearly empty arenas a chance to be returned to the system. + In my unscientific tests this dramatically improved the number of arenas + that could be freed. + +Note that an arena_object associated with an arena all of whose pools are +currently in use isn't on either list. + +Changed in Python 3.8: keeping usable_arenas sorted by number of free pools +used to be done by one-at-a-time linear search when an arena's number of +free pools changed. That could, overall, consume time quadratic in the +number of arenas. That didn't really matter when there were only a few +hundred arenas (typical!), but could be a timing disaster when there were +hundreds of thousands. See bpo-37029. + +Now we have a vector of "search fingers" to eliminate the need to search: +nfp2lasta[nfp] returns the last ("rightmost") arena in usable_arenas +with nfp free pools. This is NULL if and only if there is no arena with +nfp free pools in usable_arenas. +*/ + +/* How many arena_objects do we initially allocate? + * 16 = can allocate 16 arenas = 16 * ARENA_SIZE = 4MB before growing the + * `arenas` vector. + */ +#define INITIAL_ARENA_OBJECTS 16 + +struct _obmalloc_mgmt { + /* Array of objects used to track chunks of memory (arenas). */ + struct arena_object* arenas; + /* Number of slots currently allocated in the `arenas` vector. */ + uint maxarenas; + + /* The head of the singly-linked, NULL-terminated list of available + * arena_objects. + */ + struct arena_object* unused_arena_objects; + + /* The head of the doubly-linked, NULL-terminated at each end, list of + * arena_objects associated with arenas that have pools available. + */ + struct arena_object* usable_arenas; + + /* nfp2lasta[nfp] is the last arena in usable_arenas with nfp free pools */ + struct arena_object* nfp2lasta[MAX_POOLS_IN_ARENA + 1]; + + /* Number of arenas allocated that haven't been free()'d. */ + size_t narenas_currently_allocated; + + /* Total number of times malloc() called to allocate an arena. */ + size_t ntimes_arena_allocated; + /* High water mark (max value ever seen) for narenas_currently_allocated. */ + size_t narenas_highwater; + + Py_ssize_t raw_allocated_blocks; +}; + + +#if WITH_PYMALLOC_RADIX_TREE +/*==========================================================================*/ +/* radix tree for tracking arena usage. If enabled, used to implement + address_in_range(). + + memory address bit allocation for keys + + 64-bit pointers, IGNORE_BITS=0 and 2^20 arena size: + 15 -> MAP_TOP_BITS + 15 -> MAP_MID_BITS + 14 -> MAP_BOT_BITS + 20 -> ideal aligned arena + ---- + 64 + + 64-bit pointers, IGNORE_BITS=16, and 2^20 arena size: + 16 -> IGNORE_BITS + 10 -> MAP_TOP_BITS + 10 -> MAP_MID_BITS + 8 -> MAP_BOT_BITS + 20 -> ideal aligned arena + ---- + 64 + + 32-bit pointers and 2^18 arena size: + 14 -> MAP_BOT_BITS + 18 -> ideal aligned arena + ---- + 32 + +*/ + +#if SIZEOF_VOID_P == 8 + +/* number of bits in a pointer */ +#define POINTER_BITS 64 + +/* High bits of memory addresses that will be ignored when indexing into the + * radix tree. Setting this to zero is the safe default. For most 64-bit + * machines, setting this to 16 would be safe. The kernel would not give + * user-space virtual memory addresses that have significant information in + * those high bits. The main advantage to setting IGNORE_BITS > 0 is that less + * virtual memory will be used for the top and middle radix tree arrays. Those + * arrays are allocated in the BSS segment and so will typically consume real + * memory only if actually accessed. + */ +#define IGNORE_BITS 0 + +/* use the top and mid layers of the radix tree */ +#define USE_INTERIOR_NODES + +#elif SIZEOF_VOID_P == 4 + +#define POINTER_BITS 32 +#define IGNORE_BITS 0 + +#else + + /* Currently this code works for 64-bit or 32-bit pointers only. */ +#error "obmalloc radix tree requires 64-bit or 32-bit pointers." + +#endif /* SIZEOF_VOID_P */ + +/* arena_coverage_t members require this to be true */ +#if ARENA_BITS >= 32 +# error "arena size must be < 2^32" +#endif + +/* the lower bits of the address that are not ignored */ +#define ADDRESS_BITS (POINTER_BITS - IGNORE_BITS) + +#ifdef USE_INTERIOR_NODES +/* number of bits used for MAP_TOP and MAP_MID nodes */ +#define INTERIOR_BITS ((ADDRESS_BITS - ARENA_BITS + 2) / 3) +#else +#define INTERIOR_BITS 0 +#endif + +#define MAP_TOP_BITS INTERIOR_BITS +#define MAP_TOP_LENGTH (1 << MAP_TOP_BITS) +#define MAP_TOP_MASK (MAP_TOP_LENGTH - 1) + +#define MAP_MID_BITS INTERIOR_BITS +#define MAP_MID_LENGTH (1 << MAP_MID_BITS) +#define MAP_MID_MASK (MAP_MID_LENGTH - 1) + +#define MAP_BOT_BITS (ADDRESS_BITS - ARENA_BITS - 2*INTERIOR_BITS) +#define MAP_BOT_LENGTH (1 << MAP_BOT_BITS) +#define MAP_BOT_MASK (MAP_BOT_LENGTH - 1) + +#define MAP_BOT_SHIFT ARENA_BITS +#define MAP_MID_SHIFT (MAP_BOT_BITS + MAP_BOT_SHIFT) +#define MAP_TOP_SHIFT (MAP_MID_BITS + MAP_MID_SHIFT) + +#define AS_UINT(p) ((uintptr_t)(p)) +#define MAP_BOT_INDEX(p) ((AS_UINT(p) >> MAP_BOT_SHIFT) & MAP_BOT_MASK) +#define MAP_MID_INDEX(p) ((AS_UINT(p) >> MAP_MID_SHIFT) & MAP_MID_MASK) +#define MAP_TOP_INDEX(p) ((AS_UINT(p) >> MAP_TOP_SHIFT) & MAP_TOP_MASK) + +#if IGNORE_BITS > 0 +/* Return the ignored part of the pointer address. Those bits should be same + * for all valid pointers if IGNORE_BITS is set correctly. + */ +#define HIGH_BITS(p) (AS_UINT(p) >> ADDRESS_BITS) +#else +#define HIGH_BITS(p) 0 +#endif + + +/* This is the leaf of the radix tree. See arena_map_mark_used() for the + * meaning of these members. */ +typedef struct { + int32_t tail_hi; + int32_t tail_lo; +} arena_coverage_t; + +typedef struct arena_map_bot { + /* The members tail_hi and tail_lo are accessed together. So, it + * better to have them as an array of structs, rather than two + * arrays. + */ + arena_coverage_t arenas[MAP_BOT_LENGTH]; +} arena_map_bot_t; + +#ifdef USE_INTERIOR_NODES +typedef struct arena_map_mid { + struct arena_map_bot *ptrs[MAP_MID_LENGTH]; +} arena_map_mid_t; + +typedef struct arena_map_top { + struct arena_map_mid *ptrs[MAP_TOP_LENGTH]; +} arena_map_top_t; +#endif + +struct _obmalloc_usage { + /* The root of radix tree. Note that by initializing like this, the memory + * should be in the BSS. The OS will only memory map pages as the MAP_MID + * nodes get used (OS pages are demand loaded as needed). + */ +#ifdef USE_INTERIOR_NODES + arena_map_top_t arena_map_root; + /* accounting for number of used interior nodes */ + int arena_map_mid_count; + int arena_map_bot_count; +#else + arena_map_bot_t arena_map_root; +#endif +}; + +#endif /* WITH_PYMALLOC_RADIX_TREE */ + + +struct _obmalloc_global_state { + int dump_debug_stats; + Py_ssize_t interpreter_leaks; +}; + +struct _obmalloc_state { + struct _obmalloc_pools pools; + struct _obmalloc_mgmt mgmt; +#if WITH_PYMALLOC_RADIX_TREE + struct _obmalloc_usage usage; +#endif +}; + + +#undef uint + + +/* Allocate memory directly from the O/S virtual memory system, + * where supported. Otherwise fallback on malloc */ +void *_PyObject_VirtualAlloc(size_t size); +void _PyObject_VirtualFree(void *, size_t size); + + +/* This function returns the number of allocated memory blocks, regardless of size */ +extern Py_ssize_t _Py_GetGlobalAllocatedBlocks(void); +#define _Py_GetAllocatedBlocks() \ + _Py_GetGlobalAllocatedBlocks() +extern Py_ssize_t _PyInterpreterState_GetAllocatedBlocks(PyInterpreterState *); +extern void _PyInterpreterState_FinalizeAllocatedBlocks(PyInterpreterState *); +extern int _PyMem_init_obmalloc(PyInterpreterState *interp); +extern bool _PyMem_obmalloc_state_on_heap(PyInterpreterState *interp); + + +#ifdef WITH_PYMALLOC +// Export the symbol for the 3rd party 'guppy3' project +PyAPI_FUNC(int) _PyObject_DebugMallocStats(FILE *out); +#endif + + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBMALLOC_H diff --git a/Include/internal/pycore_obmalloc_init.h b/Include/internal/pycore_obmalloc_init.h new file mode 100644 index 0000000000000000000000000000000000000000..e6811b7aeca73c196b16519cb12a902af442f998 --- /dev/null +++ b/Include/internal/pycore_obmalloc_init.h @@ -0,0 +1,66 @@ +#ifndef Py_INTERNAL_OBMALLOC_INIT_H +#define Py_INTERNAL_OBMALLOC_INIT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/****************************************************/ +/* the default object allocator's state initializer */ + +#define PTA(pools, x) \ + ((poolp )((uint8_t *)&(pools.used[2*(x)]) - 2*sizeof(pymem_block *))) +#define PT(p, x) PTA(p, x), PTA(p, x) + +#define PT_8(p, start) \ + PT(p, start), \ + PT(p, start+1), \ + PT(p, start+2), \ + PT(p, start+3), \ + PT(p, start+4), \ + PT(p, start+5), \ + PT(p, start+6), \ + PT(p, start+7) + +#if NB_SMALL_SIZE_CLASSES <= 8 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0) } +#elif NB_SMALL_SIZE_CLASSES <= 16 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8) } +#elif NB_SMALL_SIZE_CLASSES <= 24 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8), PT_8(p, 16) } +#elif NB_SMALL_SIZE_CLASSES <= 32 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8), PT_8(p, 16), PT_8(p, 24) } +#elif NB_SMALL_SIZE_CLASSES <= 40 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8), PT_8(p, 16), PT_8(p, 24), PT_8(p, 32) } +#elif NB_SMALL_SIZE_CLASSES <= 48 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8), PT_8(p, 16), PT_8(p, 24), PT_8(p, 32), PT_8(p, 40) } +#elif NB_SMALL_SIZE_CLASSES <= 56 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8), PT_8(p, 16), PT_8(p, 24), PT_8(p, 32), PT_8(p, 40), PT_8(p, 48) } +#elif NB_SMALL_SIZE_CLASSES <= 64 +# define _obmalloc_pools_INIT(p) \ + { PT_8(p, 0), PT_8(p, 8), PT_8(p, 16), PT_8(p, 24), PT_8(p, 32), PT_8(p, 40), PT_8(p, 48), PT_8(p, 56) } +#else +# error "NB_SMALL_SIZE_CLASSES should be less than 64" +#endif + +#define _obmalloc_global_state_INIT \ + { \ + .dump_debug_stats = -1, \ + } + + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_OBMALLOC_INIT_H diff --git a/Include/internal/pycore_opcode_metadata.h b/Include/internal/pycore_opcode_metadata.h new file mode 100644 index 0000000000000000000000000000000000000000..bbba0bbbf4b0aa914a8da3a145205d88e270ec1f --- /dev/null +++ b/Include/internal/pycore_opcode_metadata.h @@ -0,0 +1,1922 @@ +// This file is generated by Tools/cases_generator/opcode_metadata_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifndef Py_CORE_OPCODE_METADATA_H +#define Py_CORE_OPCODE_METADATA_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include // bool +#include "opcode_ids.h" + + +#define IS_PSEUDO_INSTR(OP) ( \ + ((OP) == LOAD_CLOSURE) || \ + ((OP) == STORE_FAST_MAYBE_NULL) || \ + ((OP) == LOAD_SUPER_METHOD) || \ + ((OP) == LOAD_ZERO_SUPER_METHOD) || \ + ((OP) == LOAD_ZERO_SUPER_ATTR) || \ + ((OP) == LOAD_METHOD) || \ + ((OP) == JUMP) || \ + ((OP) == JUMP_NO_INTERRUPT) || \ + ((OP) == SETUP_FINALLY) || \ + ((OP) == SETUP_CLEANUP) || \ + ((OP) == SETUP_WITH) || \ + ((OP) == POP_BLOCK) || \ + 0) + +#include "pycore_uop_ids.h" +extern int _PyOpcode_num_popped(int opcode, int oparg); +#ifdef NEED_OPCODE_METADATA +int _PyOpcode_num_popped(int opcode, int oparg) { + switch(opcode) { + case BEFORE_ASYNC_WITH: + return 1; + case BEFORE_WITH: + return 1; + case BINARY_OP: + return 2; + case BINARY_OP_ADD_FLOAT: + return 2; + case BINARY_OP_ADD_INT: + return 2; + case BINARY_OP_ADD_UNICODE: + return 2; + case BINARY_OP_INPLACE_ADD_UNICODE: + return 2; + case BINARY_OP_MULTIPLY_FLOAT: + return 2; + case BINARY_OP_MULTIPLY_INT: + return 2; + case BINARY_OP_SUBTRACT_FLOAT: + return 2; + case BINARY_OP_SUBTRACT_INT: + return 2; + case BINARY_SLICE: + return 3; + case BINARY_SUBSCR: + return 2; + case BINARY_SUBSCR_DICT: + return 2; + case BINARY_SUBSCR_GETITEM: + return 2; + case BINARY_SUBSCR_LIST_INT: + return 2; + case BINARY_SUBSCR_STR_INT: + return 2; + case BINARY_SUBSCR_TUPLE_INT: + return 2; + case BUILD_CONST_KEY_MAP: + return 1 + oparg; + case BUILD_LIST: + return oparg; + case BUILD_MAP: + return oparg*2; + case BUILD_SET: + return oparg; + case BUILD_SLICE: + return 2 + ((oparg == 3) ? 1 : 0); + case BUILD_STRING: + return oparg; + case BUILD_TUPLE: + return oparg; + case CACHE: + return 0; + case CALL: + return 2 + oparg; + case CALL_ALLOC_AND_ENTER_INIT: + return 2 + oparg; + case CALL_BOUND_METHOD_EXACT_ARGS: + return 2 + oparg; + case CALL_BOUND_METHOD_GENERAL: + return 2 + oparg; + case CALL_BUILTIN_CLASS: + return 2 + oparg; + case CALL_BUILTIN_FAST: + return 2 + oparg; + case CALL_BUILTIN_FAST_WITH_KEYWORDS: + return 2 + oparg; + case CALL_BUILTIN_O: + return 2 + oparg; + case CALL_FUNCTION_EX: + return 3 + (oparg & 1); + case CALL_INTRINSIC_1: + return 1; + case CALL_INTRINSIC_2: + return 2; + case CALL_ISINSTANCE: + return 2 + oparg; + case CALL_KW: + return 3 + oparg; + case CALL_LEN: + return 2 + oparg; + case CALL_LIST_APPEND: + return 3; + case CALL_METHOD_DESCRIPTOR_FAST: + return 2 + oparg; + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return 2 + oparg; + case CALL_METHOD_DESCRIPTOR_NOARGS: + return 2 + oparg; + case CALL_METHOD_DESCRIPTOR_O: + return 2 + oparg; + case CALL_NON_PY_GENERAL: + return 2 + oparg; + case CALL_PY_EXACT_ARGS: + return 2 + oparg; + case CALL_PY_GENERAL: + return 2 + oparg; + case CALL_STR_1: + return 3; + case CALL_TUPLE_1: + return 3; + case CALL_TYPE_1: + return 3; + case CHECK_EG_MATCH: + return 2; + case CHECK_EXC_MATCH: + return 2; + case CLEANUP_THROW: + return 3; + case COMPARE_OP: + return 2; + case COMPARE_OP_FLOAT: + return 2; + case COMPARE_OP_INT: + return 2; + case COMPARE_OP_STR: + return 2; + case CONTAINS_OP: + return 2; + case CONTAINS_OP_DICT: + return 2; + case CONTAINS_OP_SET: + return 2; + case CONVERT_VALUE: + return 1; + case COPY: + return 1 + (oparg-1); + case COPY_FREE_VARS: + return 0; + case DELETE_ATTR: + return 1; + case DELETE_DEREF: + return 0; + case DELETE_FAST: + return 0; + case DELETE_GLOBAL: + return 0; + case DELETE_NAME: + return 0; + case DELETE_SUBSCR: + return 2; + case DICT_MERGE: + return 5 + (oparg - 1); + case DICT_UPDATE: + return 2 + (oparg - 1); + case END_ASYNC_FOR: + return 2; + case END_FOR: + return 1; + case END_SEND: + return 2; + case ENTER_EXECUTOR: + return 0; + case EXIT_INIT_CHECK: + return 1; + case EXTENDED_ARG: + return 0; + case FORMAT_SIMPLE: + return 1; + case FORMAT_WITH_SPEC: + return 2; + case FOR_ITER: + return 1; + case FOR_ITER_GEN: + return 1; + case FOR_ITER_LIST: + return 1; + case FOR_ITER_RANGE: + return 1; + case FOR_ITER_TUPLE: + return 1; + case GET_AITER: + return 1; + case GET_ANEXT: + return 1; + case GET_AWAITABLE: + return 1; + case GET_ITER: + return 1; + case GET_LEN: + return 1; + case GET_YIELD_FROM_ITER: + return 1; + case IMPORT_FROM: + return 1; + case IMPORT_NAME: + return 2; + case INSTRUMENTED_CALL: + return 0; + case INSTRUMENTED_CALL_FUNCTION_EX: + return 0; + case INSTRUMENTED_CALL_KW: + return 0; + case INSTRUMENTED_END_FOR: + return 2; + case INSTRUMENTED_END_SEND: + return 2; + case INSTRUMENTED_FOR_ITER: + return 0; + case INSTRUMENTED_INSTRUCTION: + return 0; + case INSTRUMENTED_JUMP_BACKWARD: + return 0; + case INSTRUMENTED_JUMP_FORWARD: + return 0; + case INSTRUMENTED_LOAD_SUPER_ATTR: + return 3; + case INSTRUMENTED_POP_JUMP_IF_FALSE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_TRUE: + return 0; + case INSTRUMENTED_RESUME: + return 0; + case INSTRUMENTED_RETURN_CONST: + return 0; + case INSTRUMENTED_RETURN_VALUE: + return 1; + case INSTRUMENTED_YIELD_VALUE: + return 1; + case INTERPRETER_EXIT: + return 1; + case IS_OP: + return 2; + case JUMP_BACKWARD: + return 0; + case JUMP_BACKWARD_NO_INTERRUPT: + return 0; + case JUMP_FORWARD: + return 0; + case LIST_APPEND: + return 2 + (oparg-1); + case LIST_EXTEND: + return 2 + (oparg-1); + case LOAD_ASSERTION_ERROR: + return 0; + case LOAD_ATTR: + return 1; + case LOAD_ATTR_CLASS: + return 1; + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: + return 1; + case LOAD_ATTR_INSTANCE_VALUE: + return 1; + case LOAD_ATTR_METHOD_LAZY_DICT: + return 1; + case LOAD_ATTR_METHOD_NO_DICT: + return 1; + case LOAD_ATTR_METHOD_WITH_VALUES: + return 1; + case LOAD_ATTR_MODULE: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case LOAD_ATTR_PROPERTY: + return 1; + case LOAD_ATTR_SLOT: + return 1; + case LOAD_ATTR_WITH_HINT: + return 1; + case LOAD_BUILD_CLASS: + return 0; + case LOAD_CONST: + return 0; + case LOAD_DEREF: + return 0; + case LOAD_FAST: + return 0; + case LOAD_FAST_AND_CLEAR: + return 0; + case LOAD_FAST_CHECK: + return 0; + case LOAD_FAST_LOAD_FAST: + return 0; + case LOAD_FROM_DICT_OR_DEREF: + return 1; + case LOAD_FROM_DICT_OR_GLOBALS: + return 1; + case LOAD_GLOBAL: + return 0; + case LOAD_GLOBAL_BUILTIN: + return 0; + case LOAD_GLOBAL_MODULE: + return 0; + case LOAD_LOCALS: + return 0; + case LOAD_NAME: + return 0; + case LOAD_SUPER_ATTR: + return 3; + case LOAD_SUPER_ATTR_ATTR: + return 3; + case LOAD_SUPER_ATTR_METHOD: + return 3; + case MAKE_CELL: + return 0; + case MAKE_FUNCTION: + return 1; + case MAP_ADD: + return 3 + (oparg - 1); + case MATCH_CLASS: + return 3; + case MATCH_KEYS: + return 2; + case MATCH_MAPPING: + return 1; + case MATCH_SEQUENCE: + return 1; + case NOP: + return 0; + case POP_EXCEPT: + return 1; + case POP_JUMP_IF_FALSE: + return 1; + case POP_JUMP_IF_NONE: + return 1; + case POP_JUMP_IF_NOT_NONE: + return 1; + case POP_JUMP_IF_TRUE: + return 1; + case POP_TOP: + return 1; + case PUSH_EXC_INFO: + return 1; + case PUSH_NULL: + return 0; + case RAISE_VARARGS: + return oparg; + case RERAISE: + return 1 + oparg; + case RESERVED: + return 0; + case RESUME: + return 0; + case RESUME_CHECK: + return 0; + case RETURN_CONST: + return 0; + case RETURN_GENERATOR: + return 0; + case RETURN_VALUE: + return 1; + case SEND: + return 2; + case SEND_GEN: + return 2; + case SETUP_ANNOTATIONS: + return 0; + case SET_ADD: + return 2 + (oparg-1); + case SET_FUNCTION_ATTRIBUTE: + return 2; + case SET_UPDATE: + return 2 + (oparg-1); + case STORE_ATTR: + return 2; + case STORE_ATTR_INSTANCE_VALUE: + return 2; + case STORE_ATTR_SLOT: + return 2; + case STORE_ATTR_WITH_HINT: + return 2; + case STORE_DEREF: + return 1; + case STORE_FAST: + return 1; + case STORE_FAST_LOAD_FAST: + return 1; + case STORE_FAST_STORE_FAST: + return 2; + case STORE_GLOBAL: + return 1; + case STORE_NAME: + return 1; + case STORE_SLICE: + return 4; + case STORE_SUBSCR: + return 3; + case STORE_SUBSCR_DICT: + return 3; + case STORE_SUBSCR_LIST_INT: + return 3; + case SWAP: + return 2 + (oparg-2); + case TO_BOOL: + return 1; + case TO_BOOL_ALWAYS_TRUE: + return 1; + case TO_BOOL_BOOL: + return 1; + case TO_BOOL_INT: + return 1; + case TO_BOOL_LIST: + return 1; + case TO_BOOL_NONE: + return 1; + case TO_BOOL_STR: + return 1; + case UNARY_INVERT: + return 1; + case UNARY_NEGATIVE: + return 1; + case UNARY_NOT: + return 1; + case UNPACK_EX: + return 1; + case UNPACK_SEQUENCE: + return 1; + case UNPACK_SEQUENCE_LIST: + return 1; + case UNPACK_SEQUENCE_TUPLE: + return 1; + case UNPACK_SEQUENCE_TWO_TUPLE: + return 1; + case WITH_EXCEPT_START: + return 4; + case YIELD_VALUE: + return 1; + default: + return -1; + } +} + +#endif + +extern int _PyOpcode_num_pushed(int opcode, int oparg); +#ifdef NEED_OPCODE_METADATA +int _PyOpcode_num_pushed(int opcode, int oparg) { + switch(opcode) { + case BEFORE_ASYNC_WITH: + return 2; + case BEFORE_WITH: + return 2; + case BINARY_OP: + return 1; + case BINARY_OP_ADD_FLOAT: + return 1; + case BINARY_OP_ADD_INT: + return 1; + case BINARY_OP_ADD_UNICODE: + return 1; + case BINARY_OP_INPLACE_ADD_UNICODE: + return 0; + case BINARY_OP_MULTIPLY_FLOAT: + return 1; + case BINARY_OP_MULTIPLY_INT: + return 1; + case BINARY_OP_SUBTRACT_FLOAT: + return 1; + case BINARY_OP_SUBTRACT_INT: + return 1; + case BINARY_SLICE: + return 1; + case BINARY_SUBSCR: + return 1; + case BINARY_SUBSCR_DICT: + return 1; + case BINARY_SUBSCR_GETITEM: + return 1; + case BINARY_SUBSCR_LIST_INT: + return 1; + case BINARY_SUBSCR_STR_INT: + return 1; + case BINARY_SUBSCR_TUPLE_INT: + return 1; + case BUILD_CONST_KEY_MAP: + return 1; + case BUILD_LIST: + return 1; + case BUILD_MAP: + return 1; + case BUILD_SET: + return 1; + case BUILD_SLICE: + return 1; + case BUILD_STRING: + return 1; + case BUILD_TUPLE: + return 1; + case CACHE: + return 0; + case CALL: + return 1; + case CALL_ALLOC_AND_ENTER_INIT: + return 1; + case CALL_BOUND_METHOD_EXACT_ARGS: + return 0; + case CALL_BOUND_METHOD_GENERAL: + return 0; + case CALL_BUILTIN_CLASS: + return 1; + case CALL_BUILTIN_FAST: + return 1; + case CALL_BUILTIN_FAST_WITH_KEYWORDS: + return 1; + case CALL_BUILTIN_O: + return 1; + case CALL_FUNCTION_EX: + return 1; + case CALL_INTRINSIC_1: + return 1; + case CALL_INTRINSIC_2: + return 1; + case CALL_ISINSTANCE: + return 1; + case CALL_KW: + return 1; + case CALL_LEN: + return 1; + case CALL_LIST_APPEND: + return 1; + case CALL_METHOD_DESCRIPTOR_FAST: + return 1; + case CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return 1; + case CALL_METHOD_DESCRIPTOR_NOARGS: + return 1; + case CALL_METHOD_DESCRIPTOR_O: + return 1; + case CALL_NON_PY_GENERAL: + return 1; + case CALL_PY_EXACT_ARGS: + return 0; + case CALL_PY_GENERAL: + return 0; + case CALL_STR_1: + return 1; + case CALL_TUPLE_1: + return 1; + case CALL_TYPE_1: + return 1; + case CHECK_EG_MATCH: + return 2; + case CHECK_EXC_MATCH: + return 2; + case CLEANUP_THROW: + return 2; + case COMPARE_OP: + return 1; + case COMPARE_OP_FLOAT: + return 1; + case COMPARE_OP_INT: + return 1; + case COMPARE_OP_STR: + return 1; + case CONTAINS_OP: + return 1; + case CONTAINS_OP_DICT: + return 1; + case CONTAINS_OP_SET: + return 1; + case CONVERT_VALUE: + return 1; + case COPY: + return 2 + (oparg-1); + case COPY_FREE_VARS: + return 0; + case DELETE_ATTR: + return 0; + case DELETE_DEREF: + return 0; + case DELETE_FAST: + return 0; + case DELETE_GLOBAL: + return 0; + case DELETE_NAME: + return 0; + case DELETE_SUBSCR: + return 0; + case DICT_MERGE: + return 4 + (oparg - 1); + case DICT_UPDATE: + return 1 + (oparg - 1); + case END_ASYNC_FOR: + return 0; + case END_FOR: + return 0; + case END_SEND: + return 1; + case ENTER_EXECUTOR: + return 0; + case EXIT_INIT_CHECK: + return 0; + case EXTENDED_ARG: + return 0; + case FORMAT_SIMPLE: + return 1; + case FORMAT_WITH_SPEC: + return 1; + case FOR_ITER: + return 2; + case FOR_ITER_GEN: + return 1; + case FOR_ITER_LIST: + return 2; + case FOR_ITER_RANGE: + return 2; + case FOR_ITER_TUPLE: + return 2; + case GET_AITER: + return 1; + case GET_ANEXT: + return 2; + case GET_AWAITABLE: + return 1; + case GET_ITER: + return 1; + case GET_LEN: + return 2; + case GET_YIELD_FROM_ITER: + return 1; + case IMPORT_FROM: + return 2; + case IMPORT_NAME: + return 1; + case INSTRUMENTED_CALL: + return 0; + case INSTRUMENTED_CALL_FUNCTION_EX: + return 0; + case INSTRUMENTED_CALL_KW: + return 0; + case INSTRUMENTED_END_FOR: + return 1; + case INSTRUMENTED_END_SEND: + return 1; + case INSTRUMENTED_FOR_ITER: + return 0; + case INSTRUMENTED_INSTRUCTION: + return 0; + case INSTRUMENTED_JUMP_BACKWARD: + return 0; + case INSTRUMENTED_JUMP_FORWARD: + return 0; + case INSTRUMENTED_LOAD_SUPER_ATTR: + return 1 + (oparg & 1); + case INSTRUMENTED_POP_JUMP_IF_FALSE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_NOT_NONE: + return 0; + case INSTRUMENTED_POP_JUMP_IF_TRUE: + return 0; + case INSTRUMENTED_RESUME: + return 0; + case INSTRUMENTED_RETURN_CONST: + return 0; + case INSTRUMENTED_RETURN_VALUE: + return 0; + case INSTRUMENTED_YIELD_VALUE: + return 1; + case INTERPRETER_EXIT: + return 0; + case IS_OP: + return 1; + case JUMP_BACKWARD: + return 0; + case JUMP_BACKWARD_NO_INTERRUPT: + return 0; + case JUMP_FORWARD: + return 0; + case LIST_APPEND: + return 1 + (oparg-1); + case LIST_EXTEND: + return 1 + (oparg-1); + case LOAD_ASSERTION_ERROR: + return 1; + case LOAD_ATTR: + return 1 + (oparg & 1); + case LOAD_ATTR_CLASS: + return 1 + (oparg & 1); + case LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN: + return 1; + case LOAD_ATTR_INSTANCE_VALUE: + return 1 + (oparg & 1); + case LOAD_ATTR_METHOD_LAZY_DICT: + return 2; + case LOAD_ATTR_METHOD_NO_DICT: + return 2; + case LOAD_ATTR_METHOD_WITH_VALUES: + return 2; + case LOAD_ATTR_MODULE: + return 1 + (oparg & 1); + case LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case LOAD_ATTR_PROPERTY: + return 1; + case LOAD_ATTR_SLOT: + return 1 + (oparg & 1); + case LOAD_ATTR_WITH_HINT: + return 1 + (oparg & 1); + case LOAD_BUILD_CLASS: + return 1; + case LOAD_CONST: + return 1; + case LOAD_DEREF: + return 1; + case LOAD_FAST: + return 1; + case LOAD_FAST_AND_CLEAR: + return 1; + case LOAD_FAST_CHECK: + return 1; + case LOAD_FAST_LOAD_FAST: + return 2; + case LOAD_FROM_DICT_OR_DEREF: + return 1; + case LOAD_FROM_DICT_OR_GLOBALS: + return 1; + case LOAD_GLOBAL: + return 1 + (oparg & 1); + case LOAD_GLOBAL_BUILTIN: + return 1 + (oparg & 1); + case LOAD_GLOBAL_MODULE: + return 1 + (oparg & 1); + case LOAD_LOCALS: + return 1; + case LOAD_NAME: + return 1; + case LOAD_SUPER_ATTR: + return 1 + (oparg & 1); + case LOAD_SUPER_ATTR_ATTR: + return 1; + case LOAD_SUPER_ATTR_METHOD: + return 2; + case MAKE_CELL: + return 0; + case MAKE_FUNCTION: + return 1; + case MAP_ADD: + return 1 + (oparg - 1); + case MATCH_CLASS: + return 1; + case MATCH_KEYS: + return 3; + case MATCH_MAPPING: + return 2; + case MATCH_SEQUENCE: + return 2; + case NOP: + return 0; + case POP_EXCEPT: + return 0; + case POP_JUMP_IF_FALSE: + return 0; + case POP_JUMP_IF_NONE: + return 0; + case POP_JUMP_IF_NOT_NONE: + return 0; + case POP_JUMP_IF_TRUE: + return 0; + case POP_TOP: + return 0; + case PUSH_EXC_INFO: + return 2; + case PUSH_NULL: + return 1; + case RAISE_VARARGS: + return 0; + case RERAISE: + return oparg; + case RESERVED: + return 0; + case RESUME: + return 0; + case RESUME_CHECK: + return 0; + case RETURN_CONST: + return 0; + case RETURN_GENERATOR: + return 1; + case RETURN_VALUE: + return 0; + case SEND: + return 2; + case SEND_GEN: + return 2; + case SETUP_ANNOTATIONS: + return 0; + case SET_ADD: + return 1 + (oparg-1); + case SET_FUNCTION_ATTRIBUTE: + return 1; + case SET_UPDATE: + return 1 + (oparg-1); + case STORE_ATTR: + return 0; + case STORE_ATTR_INSTANCE_VALUE: + return 0; + case STORE_ATTR_SLOT: + return 0; + case STORE_ATTR_WITH_HINT: + return 0; + case STORE_DEREF: + return 0; + case STORE_FAST: + return 0; + case STORE_FAST_LOAD_FAST: + return 1; + case STORE_FAST_STORE_FAST: + return 0; + case STORE_GLOBAL: + return 0; + case STORE_NAME: + return 0; + case STORE_SLICE: + return 0; + case STORE_SUBSCR: + return 0; + case STORE_SUBSCR_DICT: + return 0; + case STORE_SUBSCR_LIST_INT: + return 0; + case SWAP: + return 2 + (oparg-2); + case TO_BOOL: + return 1; + case TO_BOOL_ALWAYS_TRUE: + return 1; + case TO_BOOL_BOOL: + return 1; + case TO_BOOL_INT: + return 1; + case TO_BOOL_LIST: + return 1; + case TO_BOOL_NONE: + return 1; + case TO_BOOL_STR: + return 1; + case UNARY_INVERT: + return 1; + case UNARY_NEGATIVE: + return 1; + case UNARY_NOT: + return 1; + case UNPACK_EX: + return 1 + (oparg >> 8) + (oparg & 0xFF); + case UNPACK_SEQUENCE: + return oparg; + case UNPACK_SEQUENCE_LIST: + return oparg; + case UNPACK_SEQUENCE_TUPLE: + return oparg; + case UNPACK_SEQUENCE_TWO_TUPLE: + return 2; + case WITH_EXCEPT_START: + return 5; + case YIELD_VALUE: + return 1; + default: + return -1; + } +} + +#endif + +enum InstructionFormat { + INSTR_FMT_IB = 1, + INSTR_FMT_IBC = 2, + INSTR_FMT_IBC00 = 3, + INSTR_FMT_IBC000 = 4, + INSTR_FMT_IBC00000000 = 5, + INSTR_FMT_IX = 6, + INSTR_FMT_IXC = 7, + INSTR_FMT_IXC00 = 8, + INSTR_FMT_IXC000 = 9, +}; + +#define IS_VALID_OPCODE(OP) \ + (((OP) >= 0) && ((OP) < 268) && \ + (_PyOpcode_opcode_metadata[(OP)].valid_entry)) + +#define HAS_ARG_FLAG (1) +#define HAS_CONST_FLAG (2) +#define HAS_NAME_FLAG (4) +#define HAS_JUMP_FLAG (8) +#define HAS_FREE_FLAG (16) +#define HAS_LOCAL_FLAG (32) +#define HAS_EVAL_BREAK_FLAG (64) +#define HAS_DEOPT_FLAG (128) +#define HAS_ERROR_FLAG (256) +#define HAS_ESCAPES_FLAG (512) +#define HAS_EXIT_FLAG (1024) +#define HAS_PURE_FLAG (2048) +#define HAS_PASSTHROUGH_FLAG (4096) +#define HAS_OPARG_AND_1_FLAG (8192) +#define HAS_ERROR_NO_POP_FLAG (16384) +#define OPCODE_HAS_ARG(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ARG_FLAG)) +#define OPCODE_HAS_CONST(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_CONST_FLAG)) +#define OPCODE_HAS_NAME(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_NAME_FLAG)) +#define OPCODE_HAS_JUMP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_JUMP_FLAG)) +#define OPCODE_HAS_FREE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_FREE_FLAG)) +#define OPCODE_HAS_LOCAL(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_LOCAL_FLAG)) +#define OPCODE_HAS_EVAL_BREAK(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_EVAL_BREAK_FLAG)) +#define OPCODE_HAS_DEOPT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_DEOPT_FLAG)) +#define OPCODE_HAS_ERROR(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_FLAG)) +#define OPCODE_HAS_ESCAPES(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ESCAPES_FLAG)) +#define OPCODE_HAS_EXIT(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_EXIT_FLAG)) +#define OPCODE_HAS_PURE(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PURE_FLAG)) +#define OPCODE_HAS_PASSTHROUGH(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_PASSTHROUGH_FLAG)) +#define OPCODE_HAS_OPARG_AND_1(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_OPARG_AND_1_FLAG)) +#define OPCODE_HAS_ERROR_NO_POP(OP) (_PyOpcode_opcode_metadata[OP].flags & (HAS_ERROR_NO_POP_FLAG)) + +#define OPARG_FULL 0 +#define OPARG_CACHE_1 1 +#define OPARG_CACHE_2 2 +#define OPARG_CACHE_4 4 +#define OPARG_TOP 5 +#define OPARG_BOTTOM 6 +#define OPARG_SAVE_RETURN_OFFSET 7 +#define OPARG_REPLACED 9 + +struct opcode_metadata { + uint8_t valid_entry; + int8_t instr_format; + int16_t flags; +}; + +extern const struct opcode_metadata _PyOpcode_opcode_metadata[268]; +#ifdef NEED_OPCODE_METADATA +const struct opcode_metadata _PyOpcode_opcode_metadata[268] = { + [BEFORE_ASYNC_WITH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [BEFORE_WITH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_ADD_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_ADD_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_INPLACE_ADD_UNICODE] = { true, INSTR_FMT_IXC, HAS_LOCAL_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_OP_MULTIPLY_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_MULTIPLY_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_OP_SUBTRACT_FLOAT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG }, + [BINARY_OP_SUBTRACT_INT] = { true, INSTR_FMT_IXC, HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [BINARY_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_SUBSCR_GETITEM] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [BINARY_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_STR_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BINARY_SUBSCR_TUPLE_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [BUILD_CONST_KEY_MAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_LIST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [BUILD_MAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_SET] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [BUILD_SLICE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [BUILD_STRING] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [BUILD_TUPLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [CACHE] = { true, INSTR_FMT_IX, 0 }, + [CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_ALLOC_AND_ENTER_INIT] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [CALL_BOUND_METHOD_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_BUILTIN_CLASS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_BUILTIN_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_BUILTIN_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_FUNCTION_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_INTRINSIC_1] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_INTRINSIC_2] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_ISINSTANCE] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_KW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LEN] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_LIST_APPEND] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, + [CALL_METHOD_DESCRIPTOR_FAST] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_METHOD_DESCRIPTOR_NOARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_METHOD_DESCRIPTOR_O] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_NON_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_PY_EXACT_ARGS] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [CALL_PY_GENERAL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [CALL_STR_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_TUPLE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CALL_TYPE_1] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [CHECK_EG_MATCH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CHECK_EXC_MATCH] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CLEANUP_THROW] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [COMPARE_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [COMPARE_OP_FLOAT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [COMPARE_OP_INT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [COMPARE_OP_STR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [CONTAINS_OP] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CONTAINS_OP_DICT] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CONTAINS_OP_SET] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [CONVERT_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [COPY] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, + [COPY_FREE_VARS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [DELETE_ATTR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [DELETE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [DELETE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [DELETE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [DELETE_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [DELETE_SUBSCR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [DICT_MERGE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [DICT_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [END_ASYNC_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [END_FOR] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [END_SEND] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [ENTER_EXECUTOR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [EXIT_INIT_CHECK] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [EXTENDED_ARG] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [FORMAT_SIMPLE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [FORMAT_WITH_SPEC] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [FOR_ITER_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [FOR_ITER_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, + [FOR_ITER_RANGE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG | HAS_ERROR_FLAG }, + [FOR_ITER_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EXIT_FLAG }, + [GET_AITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [GET_ANEXT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [GET_AWAITABLE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [GET_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [GET_LEN] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [GET_YIELD_FROM_ITER] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [IMPORT_FROM] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [IMPORT_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL] = { true, INSTR_FMT_IBC00, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_CALL_FUNCTION_EX] = { true, INSTR_FMT_IX, 0 }, + [INSTRUMENTED_CALL_KW] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_END_FOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, + [INSTRUMENTED_END_SEND] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, + [INSTRUMENTED_FOR_ITER] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_INSTRUCTION] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG }, + [INSTRUMENTED_JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [INSTRUMENTED_LOAD_SUPER_ATTR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG }, + [INSTRUMENTED_RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_RETURN_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_RETURN_VALUE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INSTRUMENTED_YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [INTERPRETER_EXIT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [IS_OP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG }, + [JUMP_BACKWARD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [JUMP_BACKWARD_NO_INTERRUPT] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [JUMP_FORWARD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [LIST_APPEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG }, + [LIST_EXTEND] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ASSERTION_ERROR] = { true, INSTR_FMT_IX, 0 }, + [LOAD_ATTR] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ATTR_CLASS] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_METHOD_LAZY_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_METHOD_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_METHOD_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_MODULE] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_PROPERTY] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ATTR_SLOT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC00000000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [LOAD_BUILD_CLASS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG }, + [LOAD_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_FAST_AND_CLEAR] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FAST_CHECK] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [LOAD_FROM_DICT_OR_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_FROM_DICT_OR_GLOBALS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_GLOBAL] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_GLOBAL_BUILTIN] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [LOAD_GLOBAL_MODULE] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [LOAD_LOCALS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_SUPER_ATTR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_SUPER_ATTR_ATTR] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_SUPER_ATTR_METHOD] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [MAKE_CELL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG }, + [MAKE_FUNCTION] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [MAP_ADD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [MATCH_CLASS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [MATCH_KEYS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [MATCH_MAPPING] = { true, INSTR_FMT_IX, 0 }, + [MATCH_SEQUENCE] = { true, INSTR_FMT_IX, 0 }, + [NOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [POP_EXCEPT] = { true, INSTR_FMT_IX, HAS_ESCAPES_FLAG }, + [POP_JUMP_IF_FALSE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_NOT_NONE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_JUMP_IF_TRUE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [POP_TOP] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [PUSH_EXC_INFO] = { true, INSTR_FMT_IX, 0 }, + [PUSH_NULL] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [RAISE_VARARGS] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [RERAISE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [RESERVED] = { true, INSTR_FMT_IX, 0 }, + [RESUME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [RESUME_CHECK] = { true, INSTR_FMT_IX, HAS_DEOPT_FLAG }, + [RETURN_CONST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_CONST_FLAG }, + [RETURN_GENERATOR] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [RETURN_VALUE] = { true, INSTR_FMT_IX, 0 }, + [SEND] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG }, + [SEND_GEN] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [SETUP_ANNOTATIONS] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [SET_ADD] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [SET_FUNCTION_ATTRIBUTE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [SET_UPDATE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_ATTR] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_ATTR_INSTANCE_VALUE] = { true, INSTR_FMT_IXC000, HAS_DEOPT_FLAG | HAS_EXIT_FLAG }, + [STORE_ATTR_SLOT] = { true, INSTR_FMT_IXC000, HAS_EXIT_FLAG }, + [STORE_ATTR_WITH_HINT] = { true, INSTR_FMT_IBC000, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ESCAPES_FLAG }, + [STORE_DEREF] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG }, + [STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [STORE_FAST_LOAD_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [STORE_FAST_STORE_FAST] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, + [STORE_GLOBAL] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_NAME] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_SLICE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_SUBSCR] = { true, INSTR_FMT_IXC, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_SUBSCR_DICT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [STORE_SUBSCR_LIST_INT] = { true, INSTR_FMT_IXC, HAS_DEOPT_FLAG }, + [SWAP] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_PURE_FLAG }, + [TO_BOOL] = { true, INSTR_FMT_IXC00, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [TO_BOOL_ALWAYS_TRUE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, + [TO_BOOL_BOOL] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, + [TO_BOOL_INT] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [TO_BOOL_LIST] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, + [TO_BOOL_NONE] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG }, + [TO_BOOL_STR] = { true, INSTR_FMT_IXC00, HAS_EXIT_FLAG | HAS_ESCAPES_FLAG }, + [UNARY_INVERT] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [UNARY_NEGATIVE] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [UNARY_NOT] = { true, INSTR_FMT_IX, HAS_PURE_FLAG }, + [UNPACK_EX] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [UNPACK_SEQUENCE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [UNPACK_SEQUENCE_LIST] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [UNPACK_SEQUENCE_TWO_TUPLE] = { true, INSTR_FMT_IBC, HAS_ARG_FLAG | HAS_DEOPT_FLAG }, + [WITH_EXCEPT_START] = { true, INSTR_FMT_IX, HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [YIELD_VALUE] = { true, INSTR_FMT_IB, HAS_ARG_FLAG | HAS_ESCAPES_FLAG }, + [JUMP] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG | HAS_EVAL_BREAK_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [JUMP_NO_INTERRUPT] = { true, -1, HAS_ARG_FLAG | HAS_JUMP_FLAG }, + [LOAD_CLOSURE] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG }, + [LOAD_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ZERO_SUPER_ATTR] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [LOAD_ZERO_SUPER_METHOD] = { true, -1, HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG }, + [POP_BLOCK] = { true, -1, HAS_PURE_FLAG }, + [SETUP_CLEANUP] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_FINALLY] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [SETUP_WITH] = { true, -1, HAS_PURE_FLAG | HAS_ARG_FLAG }, + [STORE_FAST_MAYBE_NULL] = { true, -1, HAS_ARG_FLAG | HAS_LOCAL_FLAG }, +}; +#endif + +#define MAX_UOP_PER_EXPANSION 8 +struct opcode_macro_expansion { + int nuops; + struct { int16_t uop; int8_t size; int8_t offset; } uops[MAX_UOP_PER_EXPANSION]; +}; +extern const struct opcode_macro_expansion _PyOpcode_macro_expansion[256]; + +#ifdef NEED_OPCODE_METADATA +const struct opcode_macro_expansion +_PyOpcode_macro_expansion[256] = { + [BINARY_OP] = { .nuops = 1, .uops = { { _BINARY_OP, 0, 0 } } }, + [BINARY_OP_ADD_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_ADD_FLOAT, 0, 0 } } }, + [BINARY_OP_ADD_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_ADD_INT, 0, 0 } } }, + [BINARY_OP_ADD_UNICODE] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _BINARY_OP_ADD_UNICODE, 0, 0 } } }, + [BINARY_OP_MULTIPLY_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_MULTIPLY_FLOAT, 0, 0 } } }, + [BINARY_OP_MULTIPLY_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_MULTIPLY_INT, 0, 0 } } }, + [BINARY_OP_SUBTRACT_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _BINARY_OP_SUBTRACT_FLOAT, 0, 0 } } }, + [BINARY_OP_SUBTRACT_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _BINARY_OP_SUBTRACT_INT, 0, 0 } } }, + [BINARY_SLICE] = { .nuops = 1, .uops = { { _BINARY_SLICE, 0, 0 } } }, + [BINARY_SUBSCR] = { .nuops = 1, .uops = { { _BINARY_SUBSCR, 0, 0 } } }, + [BINARY_SUBSCR_DICT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_DICT, 0, 0 } } }, + [BINARY_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_LIST_INT, 0, 0 } } }, + [BINARY_SUBSCR_STR_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_STR_INT, 0, 0 } } }, + [BINARY_SUBSCR_TUPLE_INT] = { .nuops = 1, .uops = { { _BINARY_SUBSCR_TUPLE_INT, 0, 0 } } }, + [BUILD_CONST_KEY_MAP] = { .nuops = 1, .uops = { { _BUILD_CONST_KEY_MAP, 0, 0 } } }, + [BUILD_LIST] = { .nuops = 1, .uops = { { _BUILD_LIST, 0, 0 } } }, + [BUILD_MAP] = { .nuops = 1, .uops = { { _BUILD_MAP, 0, 0 } } }, + [BUILD_SLICE] = { .nuops = 1, .uops = { { _BUILD_SLICE, 0, 0 } } }, + [BUILD_STRING] = { .nuops = 1, .uops = { { _BUILD_STRING, 0, 0 } } }, + [BUILD_TUPLE] = { .nuops = 1, .uops = { { _BUILD_TUPLE, 0, 0 } } }, + [CALL_BOUND_METHOD_EXACT_ARGS] = { .nuops = 8, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _INIT_CALL_BOUND_METHOD_EXACT_ARGS, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { _SAVE_RETURN_OFFSET, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, + [CALL_BOUND_METHOD_GENERAL] = { .nuops = 6, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_METHOD_VERSION, 2, 1 }, { _EXPAND_METHOD, 0, 0 }, { _PY_FRAME_GENERAL, 0, 0 }, { _SAVE_RETURN_OFFSET, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, + [CALL_BUILTIN_CLASS] = { .nuops = 2, .uops = { { _CALL_BUILTIN_CLASS, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_BUILTIN_FAST] = { .nuops = 2, .uops = { { _CALL_BUILTIN_FAST, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = { .nuops = 2, .uops = { { _CALL_BUILTIN_FAST_WITH_KEYWORDS, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_BUILTIN_O] = { .nuops = 2, .uops = { { _CALL_BUILTIN_O, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_INTRINSIC_1] = { .nuops = 1, .uops = { { _CALL_INTRINSIC_1, 0, 0 } } }, + [CALL_INTRINSIC_2] = { .nuops = 1, .uops = { { _CALL_INTRINSIC_2, 0, 0 } } }, + [CALL_ISINSTANCE] = { .nuops = 1, .uops = { { _CALL_ISINSTANCE, 0, 0 } } }, + [CALL_LEN] = { .nuops = 1, .uops = { { _CALL_LEN, 0, 0 } } }, + [CALL_METHOD_DESCRIPTOR_FAST] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_METHOD_DESCRIPTOR_NOARGS] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_NOARGS, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_METHOD_DESCRIPTOR_O] = { .nuops = 2, .uops = { { _CALL_METHOD_DESCRIPTOR_O, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_NON_PY_GENERAL] = { .nuops = 3, .uops = { { _CHECK_IS_NOT_PY_CALLABLE, 0, 0 }, { _CALL_NON_PY_GENERAL, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_PY_EXACT_ARGS] = { .nuops = 6, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_EXACT_ARGS, 2, 1 }, { _CHECK_STACK_SPACE, 0, 0 }, { _INIT_CALL_PY_EXACT_ARGS, 0, 0 }, { _SAVE_RETURN_OFFSET, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, + [CALL_PY_GENERAL] = { .nuops = 5, .uops = { { _CHECK_PEP_523, 0, 0 }, { _CHECK_FUNCTION_VERSION, 2, 1 }, { _PY_FRAME_GENERAL, 0, 0 }, { _SAVE_RETURN_OFFSET, 7, 3 }, { _PUSH_FRAME, 0, 0 } } }, + [CALL_STR_1] = { .nuops = 2, .uops = { { _CALL_STR_1, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_TUPLE_1] = { .nuops = 2, .uops = { { _CALL_TUPLE_1, 0, 0 }, { _CHECK_PERIODIC, 0, 0 } } }, + [CALL_TYPE_1] = { .nuops = 1, .uops = { { _CALL_TYPE_1, 0, 0 } } }, + [CHECK_EG_MATCH] = { .nuops = 1, .uops = { { _CHECK_EG_MATCH, 0, 0 } } }, + [CHECK_EXC_MATCH] = { .nuops = 1, .uops = { { _CHECK_EXC_MATCH, 0, 0 } } }, + [COMPARE_OP] = { .nuops = 1, .uops = { { _COMPARE_OP, 0, 0 } } }, + [COMPARE_OP_FLOAT] = { .nuops = 2, .uops = { { _GUARD_BOTH_FLOAT, 0, 0 }, { _COMPARE_OP_FLOAT, 0, 0 } } }, + [COMPARE_OP_INT] = { .nuops = 2, .uops = { { _GUARD_BOTH_INT, 0, 0 }, { _COMPARE_OP_INT, 0, 0 } } }, + [COMPARE_OP_STR] = { .nuops = 2, .uops = { { _GUARD_BOTH_UNICODE, 0, 0 }, { _COMPARE_OP_STR, 0, 0 } } }, + [CONTAINS_OP] = { .nuops = 1, .uops = { { _CONTAINS_OP, 0, 0 } } }, + [CONTAINS_OP_DICT] = { .nuops = 1, .uops = { { _CONTAINS_OP_DICT, 0, 0 } } }, + [CONTAINS_OP_SET] = { .nuops = 1, .uops = { { _CONTAINS_OP_SET, 0, 0 } } }, + [CONVERT_VALUE] = { .nuops = 1, .uops = { { _CONVERT_VALUE, 0, 0 } } }, + [COPY] = { .nuops = 1, .uops = { { _COPY, 0, 0 } } }, + [COPY_FREE_VARS] = { .nuops = 1, .uops = { { _COPY_FREE_VARS, 0, 0 } } }, + [DELETE_ATTR] = { .nuops = 1, .uops = { { _DELETE_ATTR, 0, 0 } } }, + [DELETE_DEREF] = { .nuops = 1, .uops = { { _DELETE_DEREF, 0, 0 } } }, + [DELETE_FAST] = { .nuops = 1, .uops = { { _DELETE_FAST, 0, 0 } } }, + [DELETE_GLOBAL] = { .nuops = 1, .uops = { { _DELETE_GLOBAL, 0, 0 } } }, + [DELETE_NAME] = { .nuops = 1, .uops = { { _DELETE_NAME, 0, 0 } } }, + [DELETE_SUBSCR] = { .nuops = 1, .uops = { { _DELETE_SUBSCR, 0, 0 } } }, + [DICT_MERGE] = { .nuops = 1, .uops = { { _DICT_MERGE, 0, 0 } } }, + [DICT_UPDATE] = { .nuops = 1, .uops = { { _DICT_UPDATE, 0, 0 } } }, + [END_FOR] = { .nuops = 1, .uops = { { _POP_TOP, 0, 0 } } }, + [END_SEND] = { .nuops = 1, .uops = { { _END_SEND, 0, 0 } } }, + [EXIT_INIT_CHECK] = { .nuops = 1, .uops = { { _EXIT_INIT_CHECK, 0, 0 } } }, + [FORMAT_SIMPLE] = { .nuops = 1, .uops = { { _FORMAT_SIMPLE, 0, 0 } } }, + [FORMAT_WITH_SPEC] = { .nuops = 1, .uops = { { _FORMAT_WITH_SPEC, 0, 0 } } }, + [FOR_ITER] = { .nuops = 1, .uops = { { _FOR_ITER, 9, 0 } } }, + [FOR_ITER_GEN] = { .nuops = 3, .uops = { { _CHECK_PEP_523, 0, 0 }, { _FOR_ITER_GEN_FRAME, 0, 0 }, { _PUSH_FRAME, 0, 0 } } }, + [FOR_ITER_LIST] = { .nuops = 3, .uops = { { _ITER_CHECK_LIST, 0, 0 }, { _ITER_JUMP_LIST, 9, 1 }, { _ITER_NEXT_LIST, 0, 0 } } }, + [FOR_ITER_RANGE] = { .nuops = 3, .uops = { { _ITER_CHECK_RANGE, 0, 0 }, { _ITER_JUMP_RANGE, 9, 1 }, { _ITER_NEXT_RANGE, 0, 0 } } }, + [FOR_ITER_TUPLE] = { .nuops = 3, .uops = { { _ITER_CHECK_TUPLE, 0, 0 }, { _ITER_JUMP_TUPLE, 9, 1 }, { _ITER_NEXT_TUPLE, 0, 0 } } }, + [GET_AITER] = { .nuops = 1, .uops = { { _GET_AITER, 0, 0 } } }, + [GET_ANEXT] = { .nuops = 1, .uops = { { _GET_ANEXT, 0, 0 } } }, + [GET_AWAITABLE] = { .nuops = 1, .uops = { { _GET_AWAITABLE, 0, 0 } } }, + [GET_ITER] = { .nuops = 1, .uops = { { _GET_ITER, 0, 0 } } }, + [GET_LEN] = { .nuops = 1, .uops = { { _GET_LEN, 0, 0 } } }, + [GET_YIELD_FROM_ITER] = { .nuops = 1, .uops = { { _GET_YIELD_FROM_ITER, 0, 0 } } }, + [IS_OP] = { .nuops = 1, .uops = { { _IS_OP, 0, 0 } } }, + [LIST_APPEND] = { .nuops = 1, .uops = { { _LIST_APPEND, 0, 0 } } }, + [LIST_EXTEND] = { .nuops = 1, .uops = { { _LIST_EXTEND, 0, 0 } } }, + [LOAD_ASSERTION_ERROR] = { .nuops = 1, .uops = { { _LOAD_ASSERTION_ERROR, 0, 0 } } }, + [LOAD_ATTR] = { .nuops = 1, .uops = { { _LOAD_ATTR, 0, 0 } } }, + [LOAD_ATTR_CLASS] = { .nuops = 2, .uops = { { _CHECK_ATTR_CLASS, 2, 1 }, { _LOAD_ATTR_CLASS, 4, 5 } } }, + [LOAD_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_MANAGED_OBJECT_HAS_VALUES, 0, 0 }, { _LOAD_ATTR_INSTANCE_VALUE, 1, 3 } } }, + [LOAD_ATTR_METHOD_LAZY_DICT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_METHOD_LAZY_DICT, 1, 3 }, { _LOAD_ATTR_METHOD_LAZY_DICT, 4, 5 } } }, + [LOAD_ATTR_METHOD_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_METHOD_NO_DICT, 4, 5 } } }, + [LOAD_ATTR_METHOD_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_METHOD_WITH_VALUES, 4, 5 } } }, + [LOAD_ATTR_MODULE] = { .nuops = 2, .uops = { { _CHECK_ATTR_MODULE, 2, 1 }, { _LOAD_ATTR_MODULE, 1, 3 } } }, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_NONDESCRIPTOR_NO_DICT, 4, 5 } } }, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = { .nuops = 4, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT, 0, 0 }, { _GUARD_KEYS_VERSION, 2, 3 }, { _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES, 4, 5 } } }, + [LOAD_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _LOAD_ATTR_SLOT, 1, 3 } } }, + [LOAD_ATTR_WITH_HINT] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _CHECK_ATTR_WITH_HINT, 0, 0 }, { _LOAD_ATTR_WITH_HINT, 1, 3 } } }, + [LOAD_BUILD_CLASS] = { .nuops = 1, .uops = { { _LOAD_BUILD_CLASS, 0, 0 } } }, + [LOAD_CONST] = { .nuops = 1, .uops = { { _LOAD_CONST, 0, 0 } } }, + [LOAD_DEREF] = { .nuops = 1, .uops = { { _LOAD_DEREF, 0, 0 } } }, + [LOAD_FAST] = { .nuops = 1, .uops = { { _LOAD_FAST, 0, 0 } } }, + [LOAD_FAST_AND_CLEAR] = { .nuops = 1, .uops = { { _LOAD_FAST_AND_CLEAR, 0, 0 } } }, + [LOAD_FAST_CHECK] = { .nuops = 1, .uops = { { _LOAD_FAST_CHECK, 0, 0 } } }, + [LOAD_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _LOAD_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, + [LOAD_FROM_DICT_OR_DEREF] = { .nuops = 1, .uops = { { _LOAD_FROM_DICT_OR_DEREF, 0, 0 } } }, + [LOAD_GLOBAL] = { .nuops = 1, .uops = { { _LOAD_GLOBAL, 0, 0 } } }, + [LOAD_GLOBAL_BUILTIN] = { .nuops = 3, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _GUARD_BUILTINS_VERSION, 1, 2 }, { _LOAD_GLOBAL_BUILTINS, 1, 3 } } }, + [LOAD_GLOBAL_MODULE] = { .nuops = 2, .uops = { { _GUARD_GLOBALS_VERSION, 1, 1 }, { _LOAD_GLOBAL_MODULE, 1, 3 } } }, + [LOAD_LOCALS] = { .nuops = 1, .uops = { { _LOAD_LOCALS, 0, 0 } } }, + [LOAD_SUPER_ATTR_ATTR] = { .nuops = 1, .uops = { { _LOAD_SUPER_ATTR_ATTR, 0, 0 } } }, + [LOAD_SUPER_ATTR_METHOD] = { .nuops = 1, .uops = { { _LOAD_SUPER_ATTR_METHOD, 0, 0 } } }, + [MAKE_CELL] = { .nuops = 1, .uops = { { _MAKE_CELL, 0, 0 } } }, + [MAKE_FUNCTION] = { .nuops = 1, .uops = { { _MAKE_FUNCTION, 0, 0 } } }, + [MAP_ADD] = { .nuops = 1, .uops = { { _MAP_ADD, 0, 0 } } }, + [MATCH_CLASS] = { .nuops = 1, .uops = { { _MATCH_CLASS, 0, 0 } } }, + [MATCH_KEYS] = { .nuops = 1, .uops = { { _MATCH_KEYS, 0, 0 } } }, + [MATCH_MAPPING] = { .nuops = 1, .uops = { { _MATCH_MAPPING, 0, 0 } } }, + [MATCH_SEQUENCE] = { .nuops = 1, .uops = { { _MATCH_SEQUENCE, 0, 0 } } }, + [NOP] = { .nuops = 1, .uops = { { _NOP, 0, 0 } } }, + [POP_EXCEPT] = { .nuops = 1, .uops = { { _POP_EXCEPT, 0, 0 } } }, + [POP_JUMP_IF_FALSE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_FALSE, 9, 1 } } }, + [POP_JUMP_IF_NONE] = { .nuops = 2, .uops = { { _IS_NONE, 0, 0 }, { _POP_JUMP_IF_TRUE, 9, 1 } } }, + [POP_JUMP_IF_NOT_NONE] = { .nuops = 2, .uops = { { _IS_NONE, 0, 0 }, { _POP_JUMP_IF_FALSE, 9, 1 } } }, + [POP_JUMP_IF_TRUE] = { .nuops = 1, .uops = { { _POP_JUMP_IF_TRUE, 9, 1 } } }, + [POP_TOP] = { .nuops = 1, .uops = { { _POP_TOP, 0, 0 } } }, + [PUSH_EXC_INFO] = { .nuops = 1, .uops = { { _PUSH_EXC_INFO, 0, 0 } } }, + [PUSH_NULL] = { .nuops = 1, .uops = { { _PUSH_NULL, 0, 0 } } }, + [RESUME_CHECK] = { .nuops = 1, .uops = { { _RESUME_CHECK, 0, 0 } } }, + [RETURN_CONST] = { .nuops = 2, .uops = { { _LOAD_CONST, 0, 0 }, { _POP_FRAME, 0, 0 } } }, + [RETURN_GENERATOR] = { .nuops = 1, .uops = { { _RETURN_GENERATOR, 0, 0 } } }, + [RETURN_VALUE] = { .nuops = 1, .uops = { { _POP_FRAME, 0, 0 } } }, + [SETUP_ANNOTATIONS] = { .nuops = 1, .uops = { { _SETUP_ANNOTATIONS, 0, 0 } } }, + [SET_ADD] = { .nuops = 1, .uops = { { _SET_ADD, 0, 0 } } }, + [SET_FUNCTION_ATTRIBUTE] = { .nuops = 1, .uops = { { _SET_FUNCTION_ATTRIBUTE, 0, 0 } } }, + [SET_UPDATE] = { .nuops = 1, .uops = { { _SET_UPDATE, 0, 0 } } }, + [STORE_ATTR] = { .nuops = 1, .uops = { { _STORE_ATTR, 0, 0 } } }, + [STORE_ATTR_INSTANCE_VALUE] = { .nuops = 3, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _GUARD_DORV_NO_DICT, 0, 0 }, { _STORE_ATTR_INSTANCE_VALUE, 1, 3 } } }, + [STORE_ATTR_SLOT] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _STORE_ATTR_SLOT, 1, 3 } } }, + [STORE_DEREF] = { .nuops = 1, .uops = { { _STORE_DEREF, 0, 0 } } }, + [STORE_FAST] = { .nuops = 1, .uops = { { _STORE_FAST, 0, 0 } } }, + [STORE_FAST_LOAD_FAST] = { .nuops = 2, .uops = { { _STORE_FAST, 5, 0 }, { _LOAD_FAST, 6, 0 } } }, + [STORE_FAST_STORE_FAST] = { .nuops = 2, .uops = { { _STORE_FAST, 5, 0 }, { _STORE_FAST, 6, 0 } } }, + [STORE_GLOBAL] = { .nuops = 1, .uops = { { _STORE_GLOBAL, 0, 0 } } }, + [STORE_NAME] = { .nuops = 1, .uops = { { _STORE_NAME, 0, 0 } } }, + [STORE_SLICE] = { .nuops = 1, .uops = { { _STORE_SLICE, 0, 0 } } }, + [STORE_SUBSCR] = { .nuops = 1, .uops = { { _STORE_SUBSCR, 0, 0 } } }, + [STORE_SUBSCR_DICT] = { .nuops = 1, .uops = { { _STORE_SUBSCR_DICT, 0, 0 } } }, + [STORE_SUBSCR_LIST_INT] = { .nuops = 1, .uops = { { _STORE_SUBSCR_LIST_INT, 0, 0 } } }, + [SWAP] = { .nuops = 1, .uops = { { _SWAP, 0, 0 } } }, + [TO_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL, 0, 0 } } }, + [TO_BOOL_ALWAYS_TRUE] = { .nuops = 2, .uops = { { _GUARD_TYPE_VERSION, 2, 1 }, { _REPLACE_WITH_TRUE, 0, 0 } } }, + [TO_BOOL_BOOL] = { .nuops = 1, .uops = { { _TO_BOOL_BOOL, 0, 0 } } }, + [TO_BOOL_INT] = { .nuops = 1, .uops = { { _TO_BOOL_INT, 0, 0 } } }, + [TO_BOOL_LIST] = { .nuops = 1, .uops = { { _TO_BOOL_LIST, 0, 0 } } }, + [TO_BOOL_NONE] = { .nuops = 1, .uops = { { _TO_BOOL_NONE, 0, 0 } } }, + [TO_BOOL_STR] = { .nuops = 1, .uops = { { _TO_BOOL_STR, 0, 0 } } }, + [UNARY_INVERT] = { .nuops = 1, .uops = { { _UNARY_INVERT, 0, 0 } } }, + [UNARY_NEGATIVE] = { .nuops = 1, .uops = { { _UNARY_NEGATIVE, 0, 0 } } }, + [UNARY_NOT] = { .nuops = 1, .uops = { { _UNARY_NOT, 0, 0 } } }, + [UNPACK_EX] = { .nuops = 1, .uops = { { _UNPACK_EX, 0, 0 } } }, + [UNPACK_SEQUENCE] = { .nuops = 1, .uops = { { _UNPACK_SEQUENCE, 0, 0 } } }, + [UNPACK_SEQUENCE_LIST] = { .nuops = 1, .uops = { { _UNPACK_SEQUENCE_LIST, 0, 0 } } }, + [UNPACK_SEQUENCE_TUPLE] = { .nuops = 1, .uops = { { _UNPACK_SEQUENCE_TUPLE, 0, 0 } } }, + [UNPACK_SEQUENCE_TWO_TUPLE] = { .nuops = 1, .uops = { { _UNPACK_SEQUENCE_TWO_TUPLE, 0, 0 } } }, + [WITH_EXCEPT_START] = { .nuops = 1, .uops = { { _WITH_EXCEPT_START, 0, 0 } } }, + [YIELD_VALUE] = { .nuops = 1, .uops = { { _YIELD_VALUE, 0, 0 } } }, +}; +#endif // NEED_OPCODE_METADATA + +extern const char *_PyOpcode_OpName[268]; +#ifdef NEED_OPCODE_METADATA +const char *_PyOpcode_OpName[268] = { + [BEFORE_ASYNC_WITH] = "BEFORE_ASYNC_WITH", + [BEFORE_WITH] = "BEFORE_WITH", + [BINARY_OP] = "BINARY_OP", + [BINARY_OP_ADD_FLOAT] = "BINARY_OP_ADD_FLOAT", + [BINARY_OP_ADD_INT] = "BINARY_OP_ADD_INT", + [BINARY_OP_ADD_UNICODE] = "BINARY_OP_ADD_UNICODE", + [BINARY_OP_INPLACE_ADD_UNICODE] = "BINARY_OP_INPLACE_ADD_UNICODE", + [BINARY_OP_MULTIPLY_FLOAT] = "BINARY_OP_MULTIPLY_FLOAT", + [BINARY_OP_MULTIPLY_INT] = "BINARY_OP_MULTIPLY_INT", + [BINARY_OP_SUBTRACT_FLOAT] = "BINARY_OP_SUBTRACT_FLOAT", + [BINARY_OP_SUBTRACT_INT] = "BINARY_OP_SUBTRACT_INT", + [BINARY_SLICE] = "BINARY_SLICE", + [BINARY_SUBSCR] = "BINARY_SUBSCR", + [BINARY_SUBSCR_DICT] = "BINARY_SUBSCR_DICT", + [BINARY_SUBSCR_GETITEM] = "BINARY_SUBSCR_GETITEM", + [BINARY_SUBSCR_LIST_INT] = "BINARY_SUBSCR_LIST_INT", + [BINARY_SUBSCR_STR_INT] = "BINARY_SUBSCR_STR_INT", + [BINARY_SUBSCR_TUPLE_INT] = "BINARY_SUBSCR_TUPLE_INT", + [BUILD_CONST_KEY_MAP] = "BUILD_CONST_KEY_MAP", + [BUILD_LIST] = "BUILD_LIST", + [BUILD_MAP] = "BUILD_MAP", + [BUILD_SET] = "BUILD_SET", + [BUILD_SLICE] = "BUILD_SLICE", + [BUILD_STRING] = "BUILD_STRING", + [BUILD_TUPLE] = "BUILD_TUPLE", + [CACHE] = "CACHE", + [CALL] = "CALL", + [CALL_ALLOC_AND_ENTER_INIT] = "CALL_ALLOC_AND_ENTER_INIT", + [CALL_BOUND_METHOD_EXACT_ARGS] = "CALL_BOUND_METHOD_EXACT_ARGS", + [CALL_BOUND_METHOD_GENERAL] = "CALL_BOUND_METHOD_GENERAL", + [CALL_BUILTIN_CLASS] = "CALL_BUILTIN_CLASS", + [CALL_BUILTIN_FAST] = "CALL_BUILTIN_FAST", + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = "CALL_BUILTIN_FAST_WITH_KEYWORDS", + [CALL_BUILTIN_O] = "CALL_BUILTIN_O", + [CALL_FUNCTION_EX] = "CALL_FUNCTION_EX", + [CALL_INTRINSIC_1] = "CALL_INTRINSIC_1", + [CALL_INTRINSIC_2] = "CALL_INTRINSIC_2", + [CALL_ISINSTANCE] = "CALL_ISINSTANCE", + [CALL_KW] = "CALL_KW", + [CALL_LEN] = "CALL_LEN", + [CALL_LIST_APPEND] = "CALL_LIST_APPEND", + [CALL_METHOD_DESCRIPTOR_FAST] = "CALL_METHOD_DESCRIPTOR_FAST", + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + [CALL_METHOD_DESCRIPTOR_NOARGS] = "CALL_METHOD_DESCRIPTOR_NOARGS", + [CALL_METHOD_DESCRIPTOR_O] = "CALL_METHOD_DESCRIPTOR_O", + [CALL_NON_PY_GENERAL] = "CALL_NON_PY_GENERAL", + [CALL_PY_EXACT_ARGS] = "CALL_PY_EXACT_ARGS", + [CALL_PY_GENERAL] = "CALL_PY_GENERAL", + [CALL_STR_1] = "CALL_STR_1", + [CALL_TUPLE_1] = "CALL_TUPLE_1", + [CALL_TYPE_1] = "CALL_TYPE_1", + [CHECK_EG_MATCH] = "CHECK_EG_MATCH", + [CHECK_EXC_MATCH] = "CHECK_EXC_MATCH", + [CLEANUP_THROW] = "CLEANUP_THROW", + [COMPARE_OP] = "COMPARE_OP", + [COMPARE_OP_FLOAT] = "COMPARE_OP_FLOAT", + [COMPARE_OP_INT] = "COMPARE_OP_INT", + [COMPARE_OP_STR] = "COMPARE_OP_STR", + [CONTAINS_OP] = "CONTAINS_OP", + [CONTAINS_OP_DICT] = "CONTAINS_OP_DICT", + [CONTAINS_OP_SET] = "CONTAINS_OP_SET", + [CONVERT_VALUE] = "CONVERT_VALUE", + [COPY] = "COPY", + [COPY_FREE_VARS] = "COPY_FREE_VARS", + [DELETE_ATTR] = "DELETE_ATTR", + [DELETE_DEREF] = "DELETE_DEREF", + [DELETE_FAST] = "DELETE_FAST", + [DELETE_GLOBAL] = "DELETE_GLOBAL", + [DELETE_NAME] = "DELETE_NAME", + [DELETE_SUBSCR] = "DELETE_SUBSCR", + [DICT_MERGE] = "DICT_MERGE", + [DICT_UPDATE] = "DICT_UPDATE", + [END_ASYNC_FOR] = "END_ASYNC_FOR", + [END_FOR] = "END_FOR", + [END_SEND] = "END_SEND", + [ENTER_EXECUTOR] = "ENTER_EXECUTOR", + [EXIT_INIT_CHECK] = "EXIT_INIT_CHECK", + [EXTENDED_ARG] = "EXTENDED_ARG", + [FORMAT_SIMPLE] = "FORMAT_SIMPLE", + [FORMAT_WITH_SPEC] = "FORMAT_WITH_SPEC", + [FOR_ITER] = "FOR_ITER", + [FOR_ITER_GEN] = "FOR_ITER_GEN", + [FOR_ITER_LIST] = "FOR_ITER_LIST", + [FOR_ITER_RANGE] = "FOR_ITER_RANGE", + [FOR_ITER_TUPLE] = "FOR_ITER_TUPLE", + [GET_AITER] = "GET_AITER", + [GET_ANEXT] = "GET_ANEXT", + [GET_AWAITABLE] = "GET_AWAITABLE", + [GET_ITER] = "GET_ITER", + [GET_LEN] = "GET_LEN", + [GET_YIELD_FROM_ITER] = "GET_YIELD_FROM_ITER", + [IMPORT_FROM] = "IMPORT_FROM", + [IMPORT_NAME] = "IMPORT_NAME", + [INSTRUMENTED_CALL] = "INSTRUMENTED_CALL", + [INSTRUMENTED_CALL_FUNCTION_EX] = "INSTRUMENTED_CALL_FUNCTION_EX", + [INSTRUMENTED_CALL_KW] = "INSTRUMENTED_CALL_KW", + [INSTRUMENTED_END_FOR] = "INSTRUMENTED_END_FOR", + [INSTRUMENTED_END_SEND] = "INSTRUMENTED_END_SEND", + [INSTRUMENTED_FOR_ITER] = "INSTRUMENTED_FOR_ITER", + [INSTRUMENTED_INSTRUCTION] = "INSTRUMENTED_INSTRUCTION", + [INSTRUMENTED_JUMP_BACKWARD] = "INSTRUMENTED_JUMP_BACKWARD", + [INSTRUMENTED_JUMP_FORWARD] = "INSTRUMENTED_JUMP_FORWARD", + [INSTRUMENTED_LINE] = "INSTRUMENTED_LINE", + [INSTRUMENTED_LOAD_SUPER_ATTR] = "INSTRUMENTED_LOAD_SUPER_ATTR", + [INSTRUMENTED_POP_JUMP_IF_FALSE] = "INSTRUMENTED_POP_JUMP_IF_FALSE", + [INSTRUMENTED_POP_JUMP_IF_NONE] = "INSTRUMENTED_POP_JUMP_IF_NONE", + [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = "INSTRUMENTED_POP_JUMP_IF_NOT_NONE", + [INSTRUMENTED_POP_JUMP_IF_TRUE] = "INSTRUMENTED_POP_JUMP_IF_TRUE", + [INSTRUMENTED_RESUME] = "INSTRUMENTED_RESUME", + [INSTRUMENTED_RETURN_CONST] = "INSTRUMENTED_RETURN_CONST", + [INSTRUMENTED_RETURN_VALUE] = "INSTRUMENTED_RETURN_VALUE", + [INSTRUMENTED_YIELD_VALUE] = "INSTRUMENTED_YIELD_VALUE", + [INTERPRETER_EXIT] = "INTERPRETER_EXIT", + [IS_OP] = "IS_OP", + [JUMP] = "JUMP", + [JUMP_BACKWARD] = "JUMP_BACKWARD", + [JUMP_BACKWARD_NO_INTERRUPT] = "JUMP_BACKWARD_NO_INTERRUPT", + [JUMP_FORWARD] = "JUMP_FORWARD", + [JUMP_NO_INTERRUPT] = "JUMP_NO_INTERRUPT", + [LIST_APPEND] = "LIST_APPEND", + [LIST_EXTEND] = "LIST_EXTEND", + [LOAD_ASSERTION_ERROR] = "LOAD_ASSERTION_ERROR", + [LOAD_ATTR] = "LOAD_ATTR", + [LOAD_ATTR_CLASS] = "LOAD_ATTR_CLASS", + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = "LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN", + [LOAD_ATTR_INSTANCE_VALUE] = "LOAD_ATTR_INSTANCE_VALUE", + [LOAD_ATTR_METHOD_LAZY_DICT] = "LOAD_ATTR_METHOD_LAZY_DICT", + [LOAD_ATTR_METHOD_NO_DICT] = "LOAD_ATTR_METHOD_NO_DICT", + [LOAD_ATTR_METHOD_WITH_VALUES] = "LOAD_ATTR_METHOD_WITH_VALUES", + [LOAD_ATTR_MODULE] = "LOAD_ATTR_MODULE", + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = "LOAD_ATTR_NONDESCRIPTOR_NO_DICT", + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = "LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", + [LOAD_ATTR_PROPERTY] = "LOAD_ATTR_PROPERTY", + [LOAD_ATTR_SLOT] = "LOAD_ATTR_SLOT", + [LOAD_ATTR_WITH_HINT] = "LOAD_ATTR_WITH_HINT", + [LOAD_BUILD_CLASS] = "LOAD_BUILD_CLASS", + [LOAD_CLOSURE] = "LOAD_CLOSURE", + [LOAD_CONST] = "LOAD_CONST", + [LOAD_DEREF] = "LOAD_DEREF", + [LOAD_FAST] = "LOAD_FAST", + [LOAD_FAST_AND_CLEAR] = "LOAD_FAST_AND_CLEAR", + [LOAD_FAST_CHECK] = "LOAD_FAST_CHECK", + [LOAD_FAST_LOAD_FAST] = "LOAD_FAST_LOAD_FAST", + [LOAD_FROM_DICT_OR_DEREF] = "LOAD_FROM_DICT_OR_DEREF", + [LOAD_FROM_DICT_OR_GLOBALS] = "LOAD_FROM_DICT_OR_GLOBALS", + [LOAD_GLOBAL] = "LOAD_GLOBAL", + [LOAD_GLOBAL_BUILTIN] = "LOAD_GLOBAL_BUILTIN", + [LOAD_GLOBAL_MODULE] = "LOAD_GLOBAL_MODULE", + [LOAD_LOCALS] = "LOAD_LOCALS", + [LOAD_METHOD] = "LOAD_METHOD", + [LOAD_NAME] = "LOAD_NAME", + [LOAD_SUPER_ATTR] = "LOAD_SUPER_ATTR", + [LOAD_SUPER_ATTR_ATTR] = "LOAD_SUPER_ATTR_ATTR", + [LOAD_SUPER_ATTR_METHOD] = "LOAD_SUPER_ATTR_METHOD", + [LOAD_SUPER_METHOD] = "LOAD_SUPER_METHOD", + [LOAD_ZERO_SUPER_ATTR] = "LOAD_ZERO_SUPER_ATTR", + [LOAD_ZERO_SUPER_METHOD] = "LOAD_ZERO_SUPER_METHOD", + [MAKE_CELL] = "MAKE_CELL", + [MAKE_FUNCTION] = "MAKE_FUNCTION", + [MAP_ADD] = "MAP_ADD", + [MATCH_CLASS] = "MATCH_CLASS", + [MATCH_KEYS] = "MATCH_KEYS", + [MATCH_MAPPING] = "MATCH_MAPPING", + [MATCH_SEQUENCE] = "MATCH_SEQUENCE", + [NOP] = "NOP", + [POP_BLOCK] = "POP_BLOCK", + [POP_EXCEPT] = "POP_EXCEPT", + [POP_JUMP_IF_FALSE] = "POP_JUMP_IF_FALSE", + [POP_JUMP_IF_NONE] = "POP_JUMP_IF_NONE", + [POP_JUMP_IF_NOT_NONE] = "POP_JUMP_IF_NOT_NONE", + [POP_JUMP_IF_TRUE] = "POP_JUMP_IF_TRUE", + [POP_TOP] = "POP_TOP", + [PUSH_EXC_INFO] = "PUSH_EXC_INFO", + [PUSH_NULL] = "PUSH_NULL", + [RAISE_VARARGS] = "RAISE_VARARGS", + [RERAISE] = "RERAISE", + [RESERVED] = "RESERVED", + [RESUME] = "RESUME", + [RESUME_CHECK] = "RESUME_CHECK", + [RETURN_CONST] = "RETURN_CONST", + [RETURN_GENERATOR] = "RETURN_GENERATOR", + [RETURN_VALUE] = "RETURN_VALUE", + [SEND] = "SEND", + [SEND_GEN] = "SEND_GEN", + [SETUP_ANNOTATIONS] = "SETUP_ANNOTATIONS", + [SETUP_CLEANUP] = "SETUP_CLEANUP", + [SETUP_FINALLY] = "SETUP_FINALLY", + [SETUP_WITH] = "SETUP_WITH", + [SET_ADD] = "SET_ADD", + [SET_FUNCTION_ATTRIBUTE] = "SET_FUNCTION_ATTRIBUTE", + [SET_UPDATE] = "SET_UPDATE", + [STORE_ATTR] = "STORE_ATTR", + [STORE_ATTR_INSTANCE_VALUE] = "STORE_ATTR_INSTANCE_VALUE", + [STORE_ATTR_SLOT] = "STORE_ATTR_SLOT", + [STORE_ATTR_WITH_HINT] = "STORE_ATTR_WITH_HINT", + [STORE_DEREF] = "STORE_DEREF", + [STORE_FAST] = "STORE_FAST", + [STORE_FAST_LOAD_FAST] = "STORE_FAST_LOAD_FAST", + [STORE_FAST_MAYBE_NULL] = "STORE_FAST_MAYBE_NULL", + [STORE_FAST_STORE_FAST] = "STORE_FAST_STORE_FAST", + [STORE_GLOBAL] = "STORE_GLOBAL", + [STORE_NAME] = "STORE_NAME", + [STORE_SLICE] = "STORE_SLICE", + [STORE_SUBSCR] = "STORE_SUBSCR", + [STORE_SUBSCR_DICT] = "STORE_SUBSCR_DICT", + [STORE_SUBSCR_LIST_INT] = "STORE_SUBSCR_LIST_INT", + [SWAP] = "SWAP", + [TO_BOOL] = "TO_BOOL", + [TO_BOOL_ALWAYS_TRUE] = "TO_BOOL_ALWAYS_TRUE", + [TO_BOOL_BOOL] = "TO_BOOL_BOOL", + [TO_BOOL_INT] = "TO_BOOL_INT", + [TO_BOOL_LIST] = "TO_BOOL_LIST", + [TO_BOOL_NONE] = "TO_BOOL_NONE", + [TO_BOOL_STR] = "TO_BOOL_STR", + [UNARY_INVERT] = "UNARY_INVERT", + [UNARY_NEGATIVE] = "UNARY_NEGATIVE", + [UNARY_NOT] = "UNARY_NOT", + [UNPACK_EX] = "UNPACK_EX", + [UNPACK_SEQUENCE] = "UNPACK_SEQUENCE", + [UNPACK_SEQUENCE_LIST] = "UNPACK_SEQUENCE_LIST", + [UNPACK_SEQUENCE_TUPLE] = "UNPACK_SEQUENCE_TUPLE", + [UNPACK_SEQUENCE_TWO_TUPLE] = "UNPACK_SEQUENCE_TWO_TUPLE", + [WITH_EXCEPT_START] = "WITH_EXCEPT_START", + [YIELD_VALUE] = "YIELD_VALUE", +}; +#endif + +extern const uint8_t _PyOpcode_Caches[256]; +#ifdef NEED_OPCODE_METADATA +const uint8_t _PyOpcode_Caches[256] = { + [JUMP_BACKWARD] = 1, + [TO_BOOL] = 3, + [BINARY_SUBSCR] = 1, + [STORE_SUBSCR] = 1, + [SEND] = 1, + [UNPACK_SEQUENCE] = 1, + [STORE_ATTR] = 4, + [LOAD_GLOBAL] = 4, + [LOAD_SUPER_ATTR] = 1, + [LOAD_ATTR] = 9, + [COMPARE_OP] = 1, + [CONTAINS_OP] = 1, + [POP_JUMP_IF_TRUE] = 1, + [POP_JUMP_IF_FALSE] = 1, + [POP_JUMP_IF_NONE] = 1, + [POP_JUMP_IF_NOT_NONE] = 1, + [FOR_ITER] = 1, + [CALL] = 3, + [BINARY_OP] = 1, +}; +#endif + +extern const uint8_t _PyOpcode_Deopt[256]; +#ifdef NEED_OPCODE_METADATA +const uint8_t _PyOpcode_Deopt[256] = { + [BEFORE_ASYNC_WITH] = BEFORE_ASYNC_WITH, + [BEFORE_WITH] = BEFORE_WITH, + [BINARY_OP] = BINARY_OP, + [BINARY_OP_ADD_FLOAT] = BINARY_OP, + [BINARY_OP_ADD_INT] = BINARY_OP, + [BINARY_OP_ADD_UNICODE] = BINARY_OP, + [BINARY_OP_INPLACE_ADD_UNICODE] = BINARY_OP, + [BINARY_OP_MULTIPLY_FLOAT] = BINARY_OP, + [BINARY_OP_MULTIPLY_INT] = BINARY_OP, + [BINARY_OP_SUBTRACT_FLOAT] = BINARY_OP, + [BINARY_OP_SUBTRACT_INT] = BINARY_OP, + [BINARY_SLICE] = BINARY_SLICE, + [BINARY_SUBSCR] = BINARY_SUBSCR, + [BINARY_SUBSCR_DICT] = BINARY_SUBSCR, + [BINARY_SUBSCR_GETITEM] = BINARY_SUBSCR, + [BINARY_SUBSCR_LIST_INT] = BINARY_SUBSCR, + [BINARY_SUBSCR_STR_INT] = BINARY_SUBSCR, + [BINARY_SUBSCR_TUPLE_INT] = BINARY_SUBSCR, + [BUILD_CONST_KEY_MAP] = BUILD_CONST_KEY_MAP, + [BUILD_LIST] = BUILD_LIST, + [BUILD_MAP] = BUILD_MAP, + [BUILD_SET] = BUILD_SET, + [BUILD_SLICE] = BUILD_SLICE, + [BUILD_STRING] = BUILD_STRING, + [BUILD_TUPLE] = BUILD_TUPLE, + [CACHE] = CACHE, + [CALL] = CALL, + [CALL_ALLOC_AND_ENTER_INIT] = CALL, + [CALL_BOUND_METHOD_EXACT_ARGS] = CALL, + [CALL_BOUND_METHOD_GENERAL] = CALL, + [CALL_BUILTIN_CLASS] = CALL, + [CALL_BUILTIN_FAST] = CALL, + [CALL_BUILTIN_FAST_WITH_KEYWORDS] = CALL, + [CALL_BUILTIN_O] = CALL, + [CALL_FUNCTION_EX] = CALL_FUNCTION_EX, + [CALL_INTRINSIC_1] = CALL_INTRINSIC_1, + [CALL_INTRINSIC_2] = CALL_INTRINSIC_2, + [CALL_ISINSTANCE] = CALL, + [CALL_KW] = CALL_KW, + [CALL_LEN] = CALL, + [CALL_LIST_APPEND] = CALL, + [CALL_METHOD_DESCRIPTOR_FAST] = CALL, + [CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = CALL, + [CALL_METHOD_DESCRIPTOR_NOARGS] = CALL, + [CALL_METHOD_DESCRIPTOR_O] = CALL, + [CALL_NON_PY_GENERAL] = CALL, + [CALL_PY_EXACT_ARGS] = CALL, + [CALL_PY_GENERAL] = CALL, + [CALL_STR_1] = CALL, + [CALL_TUPLE_1] = CALL, + [CALL_TYPE_1] = CALL, + [CHECK_EG_MATCH] = CHECK_EG_MATCH, + [CHECK_EXC_MATCH] = CHECK_EXC_MATCH, + [CLEANUP_THROW] = CLEANUP_THROW, + [COMPARE_OP] = COMPARE_OP, + [COMPARE_OP_FLOAT] = COMPARE_OP, + [COMPARE_OP_INT] = COMPARE_OP, + [COMPARE_OP_STR] = COMPARE_OP, + [CONTAINS_OP] = CONTAINS_OP, + [CONTAINS_OP_DICT] = CONTAINS_OP, + [CONTAINS_OP_SET] = CONTAINS_OP, + [CONVERT_VALUE] = CONVERT_VALUE, + [COPY] = COPY, + [COPY_FREE_VARS] = COPY_FREE_VARS, + [DELETE_ATTR] = DELETE_ATTR, + [DELETE_DEREF] = DELETE_DEREF, + [DELETE_FAST] = DELETE_FAST, + [DELETE_GLOBAL] = DELETE_GLOBAL, + [DELETE_NAME] = DELETE_NAME, + [DELETE_SUBSCR] = DELETE_SUBSCR, + [DICT_MERGE] = DICT_MERGE, + [DICT_UPDATE] = DICT_UPDATE, + [END_ASYNC_FOR] = END_ASYNC_FOR, + [END_FOR] = END_FOR, + [END_SEND] = END_SEND, + [ENTER_EXECUTOR] = ENTER_EXECUTOR, + [EXIT_INIT_CHECK] = EXIT_INIT_CHECK, + [EXTENDED_ARG] = EXTENDED_ARG, + [FORMAT_SIMPLE] = FORMAT_SIMPLE, + [FORMAT_WITH_SPEC] = FORMAT_WITH_SPEC, + [FOR_ITER] = FOR_ITER, + [FOR_ITER_GEN] = FOR_ITER, + [FOR_ITER_LIST] = FOR_ITER, + [FOR_ITER_RANGE] = FOR_ITER, + [FOR_ITER_TUPLE] = FOR_ITER, + [GET_AITER] = GET_AITER, + [GET_ANEXT] = GET_ANEXT, + [GET_AWAITABLE] = GET_AWAITABLE, + [GET_ITER] = GET_ITER, + [GET_LEN] = GET_LEN, + [GET_YIELD_FROM_ITER] = GET_YIELD_FROM_ITER, + [IMPORT_FROM] = IMPORT_FROM, + [IMPORT_NAME] = IMPORT_NAME, + [INSTRUMENTED_CALL] = INSTRUMENTED_CALL, + [INSTRUMENTED_CALL_FUNCTION_EX] = INSTRUMENTED_CALL_FUNCTION_EX, + [INSTRUMENTED_CALL_KW] = INSTRUMENTED_CALL_KW, + [INSTRUMENTED_END_FOR] = INSTRUMENTED_END_FOR, + [INSTRUMENTED_END_SEND] = INSTRUMENTED_END_SEND, + [INSTRUMENTED_FOR_ITER] = INSTRUMENTED_FOR_ITER, + [INSTRUMENTED_INSTRUCTION] = INSTRUMENTED_INSTRUCTION, + [INSTRUMENTED_JUMP_BACKWARD] = INSTRUMENTED_JUMP_BACKWARD, + [INSTRUMENTED_JUMP_FORWARD] = INSTRUMENTED_JUMP_FORWARD, + [INSTRUMENTED_LINE] = INSTRUMENTED_LINE, + [INSTRUMENTED_LOAD_SUPER_ATTR] = INSTRUMENTED_LOAD_SUPER_ATTR, + [INSTRUMENTED_POP_JUMP_IF_FALSE] = INSTRUMENTED_POP_JUMP_IF_FALSE, + [INSTRUMENTED_POP_JUMP_IF_NONE] = INSTRUMENTED_POP_JUMP_IF_NONE, + [INSTRUMENTED_POP_JUMP_IF_NOT_NONE] = INSTRUMENTED_POP_JUMP_IF_NOT_NONE, + [INSTRUMENTED_POP_JUMP_IF_TRUE] = INSTRUMENTED_POP_JUMP_IF_TRUE, + [INSTRUMENTED_RESUME] = INSTRUMENTED_RESUME, + [INSTRUMENTED_RETURN_CONST] = INSTRUMENTED_RETURN_CONST, + [INSTRUMENTED_RETURN_VALUE] = INSTRUMENTED_RETURN_VALUE, + [INSTRUMENTED_YIELD_VALUE] = INSTRUMENTED_YIELD_VALUE, + [INTERPRETER_EXIT] = INTERPRETER_EXIT, + [IS_OP] = IS_OP, + [JUMP_BACKWARD] = JUMP_BACKWARD, + [JUMP_BACKWARD_NO_INTERRUPT] = JUMP_BACKWARD_NO_INTERRUPT, + [JUMP_FORWARD] = JUMP_FORWARD, + [LIST_APPEND] = LIST_APPEND, + [LIST_EXTEND] = LIST_EXTEND, + [LOAD_ASSERTION_ERROR] = LOAD_ASSERTION_ERROR, + [LOAD_ATTR] = LOAD_ATTR, + [LOAD_ATTR_CLASS] = LOAD_ATTR, + [LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN] = LOAD_ATTR, + [LOAD_ATTR_INSTANCE_VALUE] = LOAD_ATTR, + [LOAD_ATTR_METHOD_LAZY_DICT] = LOAD_ATTR, + [LOAD_ATTR_METHOD_NO_DICT] = LOAD_ATTR, + [LOAD_ATTR_METHOD_WITH_VALUES] = LOAD_ATTR, + [LOAD_ATTR_MODULE] = LOAD_ATTR, + [LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = LOAD_ATTR, + [LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = LOAD_ATTR, + [LOAD_ATTR_PROPERTY] = LOAD_ATTR, + [LOAD_ATTR_SLOT] = LOAD_ATTR, + [LOAD_ATTR_WITH_HINT] = LOAD_ATTR, + [LOAD_BUILD_CLASS] = LOAD_BUILD_CLASS, + [LOAD_CONST] = LOAD_CONST, + [LOAD_DEREF] = LOAD_DEREF, + [LOAD_FAST] = LOAD_FAST, + [LOAD_FAST_AND_CLEAR] = LOAD_FAST_AND_CLEAR, + [LOAD_FAST_CHECK] = LOAD_FAST_CHECK, + [LOAD_FAST_LOAD_FAST] = LOAD_FAST_LOAD_FAST, + [LOAD_FROM_DICT_OR_DEREF] = LOAD_FROM_DICT_OR_DEREF, + [LOAD_FROM_DICT_OR_GLOBALS] = LOAD_FROM_DICT_OR_GLOBALS, + [LOAD_GLOBAL] = LOAD_GLOBAL, + [LOAD_GLOBAL_BUILTIN] = LOAD_GLOBAL, + [LOAD_GLOBAL_MODULE] = LOAD_GLOBAL, + [LOAD_LOCALS] = LOAD_LOCALS, + [LOAD_NAME] = LOAD_NAME, + [LOAD_SUPER_ATTR] = LOAD_SUPER_ATTR, + [LOAD_SUPER_ATTR_ATTR] = LOAD_SUPER_ATTR, + [LOAD_SUPER_ATTR_METHOD] = LOAD_SUPER_ATTR, + [MAKE_CELL] = MAKE_CELL, + [MAKE_FUNCTION] = MAKE_FUNCTION, + [MAP_ADD] = MAP_ADD, + [MATCH_CLASS] = MATCH_CLASS, + [MATCH_KEYS] = MATCH_KEYS, + [MATCH_MAPPING] = MATCH_MAPPING, + [MATCH_SEQUENCE] = MATCH_SEQUENCE, + [NOP] = NOP, + [POP_EXCEPT] = POP_EXCEPT, + [POP_JUMP_IF_FALSE] = POP_JUMP_IF_FALSE, + [POP_JUMP_IF_NONE] = POP_JUMP_IF_NONE, + [POP_JUMP_IF_NOT_NONE] = POP_JUMP_IF_NOT_NONE, + [POP_JUMP_IF_TRUE] = POP_JUMP_IF_TRUE, + [POP_TOP] = POP_TOP, + [PUSH_EXC_INFO] = PUSH_EXC_INFO, + [PUSH_NULL] = PUSH_NULL, + [RAISE_VARARGS] = RAISE_VARARGS, + [RERAISE] = RERAISE, + [RESERVED] = RESERVED, + [RESUME] = RESUME, + [RESUME_CHECK] = RESUME, + [RETURN_CONST] = RETURN_CONST, + [RETURN_GENERATOR] = RETURN_GENERATOR, + [RETURN_VALUE] = RETURN_VALUE, + [SEND] = SEND, + [SEND_GEN] = SEND, + [SETUP_ANNOTATIONS] = SETUP_ANNOTATIONS, + [SET_ADD] = SET_ADD, + [SET_FUNCTION_ATTRIBUTE] = SET_FUNCTION_ATTRIBUTE, + [SET_UPDATE] = SET_UPDATE, + [STORE_ATTR] = STORE_ATTR, + [STORE_ATTR_INSTANCE_VALUE] = STORE_ATTR, + [STORE_ATTR_SLOT] = STORE_ATTR, + [STORE_ATTR_WITH_HINT] = STORE_ATTR, + [STORE_DEREF] = STORE_DEREF, + [STORE_FAST] = STORE_FAST, + [STORE_FAST_LOAD_FAST] = STORE_FAST_LOAD_FAST, + [STORE_FAST_STORE_FAST] = STORE_FAST_STORE_FAST, + [STORE_GLOBAL] = STORE_GLOBAL, + [STORE_NAME] = STORE_NAME, + [STORE_SLICE] = STORE_SLICE, + [STORE_SUBSCR] = STORE_SUBSCR, + [STORE_SUBSCR_DICT] = STORE_SUBSCR, + [STORE_SUBSCR_LIST_INT] = STORE_SUBSCR, + [SWAP] = SWAP, + [TO_BOOL] = TO_BOOL, + [TO_BOOL_ALWAYS_TRUE] = TO_BOOL, + [TO_BOOL_BOOL] = TO_BOOL, + [TO_BOOL_INT] = TO_BOOL, + [TO_BOOL_LIST] = TO_BOOL, + [TO_BOOL_NONE] = TO_BOOL, + [TO_BOOL_STR] = TO_BOOL, + [UNARY_INVERT] = UNARY_INVERT, + [UNARY_NEGATIVE] = UNARY_NEGATIVE, + [UNARY_NOT] = UNARY_NOT, + [UNPACK_EX] = UNPACK_EX, + [UNPACK_SEQUENCE] = UNPACK_SEQUENCE, + [UNPACK_SEQUENCE_LIST] = UNPACK_SEQUENCE, + [UNPACK_SEQUENCE_TUPLE] = UNPACK_SEQUENCE, + [UNPACK_SEQUENCE_TWO_TUPLE] = UNPACK_SEQUENCE, + [WITH_EXCEPT_START] = WITH_EXCEPT_START, + [YIELD_VALUE] = YIELD_VALUE, +}; + +#endif // NEED_OPCODE_METADATA + +#define EXTRA_CASES \ + case 119: \ + case 120: \ + case 121: \ + case 122: \ + case 123: \ + case 124: \ + case 125: \ + case 126: \ + case 127: \ + case 128: \ + case 129: \ + case 130: \ + case 131: \ + case 132: \ + case 133: \ + case 134: \ + case 135: \ + case 136: \ + case 137: \ + case 138: \ + case 139: \ + case 140: \ + case 141: \ + case 142: \ + case 143: \ + case 144: \ + case 145: \ + case 146: \ + case 147: \ + case 148: \ + case 223: \ + case 224: \ + case 225: \ + case 226: \ + case 227: \ + case 228: \ + case 229: \ + case 230: \ + case 231: \ + case 232: \ + case 233: \ + case 234: \ + case 235: \ + case 255: \ + ; +struct pseudo_targets { + uint8_t targets[3]; +}; +extern const struct pseudo_targets _PyOpcode_PseudoTargets[12]; +#ifdef NEED_OPCODE_METADATA +const struct pseudo_targets _PyOpcode_PseudoTargets[12] = { + [LOAD_CLOSURE-256] = { { LOAD_FAST, 0, 0 } }, + [STORE_FAST_MAYBE_NULL-256] = { { STORE_FAST, 0, 0 } }, + [LOAD_SUPER_METHOD-256] = { { LOAD_SUPER_ATTR, 0, 0 } }, + [LOAD_ZERO_SUPER_METHOD-256] = { { LOAD_SUPER_ATTR, 0, 0 } }, + [LOAD_ZERO_SUPER_ATTR-256] = { { LOAD_SUPER_ATTR, 0, 0 } }, + [LOAD_METHOD-256] = { { LOAD_ATTR, 0, 0 } }, + [JUMP-256] = { { JUMP_FORWARD, JUMP_BACKWARD, 0 } }, + [JUMP_NO_INTERRUPT-256] = { { JUMP_FORWARD, JUMP_BACKWARD_NO_INTERRUPT, 0 } }, + [SETUP_FINALLY-256] = { { NOP, 0, 0 } }, + [SETUP_CLEANUP-256] = { { NOP, 0, 0 } }, + [SETUP_WITH-256] = { { NOP, 0, 0 } }, + [POP_BLOCK-256] = { { NOP, 0, 0 } }, +}; + +#endif // NEED_OPCODE_METADATA +static inline bool +is_pseudo_target(int pseudo, int target) { + if (pseudo < 256 || pseudo >= 268) { + return false; + } + for (int i = 0; _PyOpcode_PseudoTargets[pseudo-256].targets[i]; i++) { + if (_PyOpcode_PseudoTargets[pseudo-256].targets[i] == target) return true; + } + return false; +} + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CORE_OPCODE_METADATA_H */ diff --git a/Include/internal/pycore_opcode_utils.h b/Include/internal/pycore_opcode_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..208bfb2f75308bc80e39d07e783026cf226789ac --- /dev/null +++ b/Include/internal/pycore_opcode_utils.h @@ -0,0 +1,73 @@ +#ifndef Py_INTERNAL_OPCODE_UTILS_H +#define Py_INTERNAL_OPCODE_UTILS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "opcode_ids.h" + +#define MAX_REAL_OPCODE 254 + +#define IS_WITHIN_OPCODE_RANGE(opcode) \ + (((opcode) >= 0 && (opcode) <= MAX_REAL_OPCODE) || \ + IS_PSEUDO_INSTR(opcode)) + +#define IS_BLOCK_PUSH_OPCODE(opcode) \ + ((opcode) == SETUP_FINALLY || \ + (opcode) == SETUP_WITH || \ + (opcode) == SETUP_CLEANUP) + +#define HAS_TARGET(opcode) \ + (OPCODE_HAS_JUMP(opcode) || IS_BLOCK_PUSH_OPCODE(opcode)) + +/* opcodes that must be last in the basicblock */ +#define IS_TERMINATOR_OPCODE(opcode) \ + (OPCODE_HAS_JUMP(opcode) || IS_SCOPE_EXIT_OPCODE(opcode)) + +/* opcodes which are not emitted in codegen stage, only by the assembler */ +#define IS_ASSEMBLER_OPCODE(opcode) \ + ((opcode) == JUMP_FORWARD || \ + (opcode) == JUMP_BACKWARD || \ + (opcode) == JUMP_BACKWARD_NO_INTERRUPT) + +#define IS_BACKWARDS_JUMP_OPCODE(opcode) \ + ((opcode) == JUMP_BACKWARD || \ + (opcode) == JUMP_BACKWARD_NO_INTERRUPT) + +#define IS_UNCONDITIONAL_JUMP_OPCODE(opcode) \ + ((opcode) == JUMP || \ + (opcode) == JUMP_NO_INTERRUPT || \ + (opcode) == JUMP_FORWARD || \ + (opcode) == JUMP_BACKWARD || \ + (opcode) == JUMP_BACKWARD_NO_INTERRUPT) + +#define IS_SCOPE_EXIT_OPCODE(opcode) \ + ((opcode) == RETURN_VALUE || \ + (opcode) == RETURN_CONST || \ + (opcode) == RAISE_VARARGS || \ + (opcode) == RERAISE) + + +/* Flags used in the oparg for MAKE_FUNCTION */ +#define MAKE_FUNCTION_DEFAULTS 0x01 +#define MAKE_FUNCTION_KWDEFAULTS 0x02 +#define MAKE_FUNCTION_ANNOTATIONS 0x04 +#define MAKE_FUNCTION_CLOSURE 0x08 + +/* Values used in the oparg for RESUME */ +#define RESUME_AT_FUNC_START 0 +#define RESUME_AFTER_YIELD 1 +#define RESUME_AFTER_YIELD_FROM 2 +#define RESUME_AFTER_AWAIT 3 + +#define RESUME_OPARG_LOCATION_MASK 0x3 +#define RESUME_OPARG_DEPTH1_MASK 0x4 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OPCODE_UTILS_H */ diff --git a/Include/internal/pycore_optimizer.h b/Include/internal/pycore_optimizer.h new file mode 100644 index 0000000000000000000000000000000000000000..49aa67c6f3ccc0d111f721b575268228a930a35d --- /dev/null +++ b/Include/internal/pycore_optimizer.h @@ -0,0 +1,272 @@ +#ifndef Py_INTERNAL_OPTIMIZER_H +#define Py_INTERNAL_OPTIMIZER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_uop_ids.h" +#include + + +typedef struct _PyExecutorLinkListNode { + struct _PyExecutorObject *next; + struct _PyExecutorObject *previous; +} _PyExecutorLinkListNode; + + +/* Bloom filter with m = 256 + * https://en.wikipedia.org/wiki/Bloom_filter */ +#define BLOOM_FILTER_WORDS 8 + +typedef struct _bloom_filter { + uint32_t bits[BLOOM_FILTER_WORDS]; +} _PyBloomFilter; + +typedef struct { + uint8_t opcode; + uint8_t oparg; + uint8_t valid; + uint8_t linked; + int index; // Index of ENTER_EXECUTOR (if code isn't NULL, below). + _PyBloomFilter bloom; + _PyExecutorLinkListNode links; + PyCodeObject *code; // Weak (NULL if no corresponding ENTER_EXECUTOR). +} _PyVMData; + +#define UOP_FORMAT_TARGET 0 +#define UOP_FORMAT_EXIT 1 +#define UOP_FORMAT_JUMP 2 +#define UOP_FORMAT_UNUSED 3 + +/* Depending on the format, + * the 32 bits between the oparg and operand are: + * UOP_FORMAT_TARGET: + * uint32_t target; + * UOP_FORMAT_EXIT + * uint16_t exit_index; + * uint16_t error_target; + * UOP_FORMAT_JUMP + * uint16_t jump_target; + * uint16_t error_target; + */ +typedef struct { + uint16_t opcode:14; + uint16_t format:2; + uint16_t oparg; + union { + uint32_t target; + struct { + union { + uint16_t exit_index; + uint16_t jump_target; + }; + uint16_t error_target; + }; + }; + uint64_t operand; // A cache entry +} _PyUOpInstruction; + +static inline uint32_t uop_get_target(const _PyUOpInstruction *inst) +{ + assert(inst->format == UOP_FORMAT_TARGET); + return inst->target; +} + +static inline uint16_t uop_get_exit_index(const _PyUOpInstruction *inst) +{ + assert(inst->format == UOP_FORMAT_EXIT); + return inst->exit_index; +} + +static inline uint16_t uop_get_jump_target(const _PyUOpInstruction *inst) +{ + assert(inst->format == UOP_FORMAT_JUMP); + return inst->jump_target; +} + +static inline uint16_t uop_get_error_target(const _PyUOpInstruction *inst) +{ + assert(inst->format != UOP_FORMAT_TARGET); + return inst->error_target; +} + +typedef struct _exit_data { + uint32_t target; + _Py_BackoffCounter temperature; + const struct _PyExecutorObject *executor; +} _PyExitData; + +typedef struct _PyExecutorObject { + PyObject_VAR_HEAD + const _PyUOpInstruction *trace; + _PyVMData vm_data; /* Used by the VM, but opaque to the optimizer */ + uint32_t exit_count; + uint32_t code_size; + size_t jit_size; + void *jit_code; + void *jit_side_entry; + _PyExitData exits[1]; +} _PyExecutorObject; + +typedef struct _PyOptimizerObject _PyOptimizerObject; + +/* Should return > 0 if a new executor is created. O if no executor is produced and < 0 if an error occurred. */ +typedef int (*optimize_func)( + _PyOptimizerObject* self, struct _PyInterpreterFrame *frame, + _Py_CODEUNIT *instr, _PyExecutorObject **exec_ptr, + int curr_stackentries); + +struct _PyOptimizerObject { + PyObject_HEAD + optimize_func optimize; + /* Data needed by the optimizer goes here, but is opaque to the VM */ +}; + +/** Test support **/ +typedef struct { + _PyOptimizerObject base; + int64_t count; +} _PyCounterOptimizerObject; + +_PyOptimizerObject *_Py_SetOptimizer(PyInterpreterState *interp, _PyOptimizerObject* optimizer); + +PyAPI_FUNC(int) _Py_SetTier2Optimizer(_PyOptimizerObject* optimizer); + +PyAPI_FUNC(_PyOptimizerObject *) _Py_GetOptimizer(void); + +PyAPI_FUNC(_PyExecutorObject *) _Py_GetExecutor(PyCodeObject *code, int offset); + +void _Py_ExecutorInit(_PyExecutorObject *, const _PyBloomFilter *); +void _Py_ExecutorDetach(_PyExecutorObject *); +void _Py_BloomFilter_Init(_PyBloomFilter *); +void _Py_BloomFilter_Add(_PyBloomFilter *bloom, void *obj); +PyAPI_FUNC(void) _Py_Executor_DependsOn(_PyExecutorObject *executor, void *obj); +/* For testing */ +PyAPI_FUNC(PyObject *) _PyOptimizer_NewCounter(void); +PyAPI_FUNC(PyObject *) _PyOptimizer_NewUOpOptimizer(void); + +#define _Py_MAX_ALLOWED_BUILTINS_MODIFICATIONS 3 +#define _Py_MAX_ALLOWED_GLOBALS_MODIFICATIONS 6 + +#ifdef _Py_TIER2 +PyAPI_FUNC(void) _Py_Executors_InvalidateDependency(PyInterpreterState *interp, void *obj, int is_invalidation); +PyAPI_FUNC(void) _Py_Executors_InvalidateAll(PyInterpreterState *interp, int is_invalidation); +#else +# define _Py_Executors_InvalidateDependency(A, B, C) ((void)0) +# define _Py_Executors_InvalidateAll(A, B) ((void)0) +#endif + + +// This is the length of the trace we project initially. +#define UOP_MAX_TRACE_LENGTH 800 + +#define TRACE_STACK_SIZE 5 + +int _Py_uop_analyze_and_optimize(struct _PyInterpreterFrame *frame, + _PyUOpInstruction *trace, int trace_len, int curr_stackentries, + _PyBloomFilter *dependencies); + +extern PyTypeObject _PyCounterExecutor_Type; +extern PyTypeObject _PyCounterOptimizer_Type; +extern PyTypeObject _PyDefaultOptimizer_Type; +extern PyTypeObject _PyUOpExecutor_Type; +extern PyTypeObject _PyUOpOptimizer_Type; + +/* Symbols */ +/* See explanation in optimizer_symbols.c */ + +struct _Py_UopsSymbol { + int flags; // 0 bits: Top; 2 or more bits: Bottom + PyTypeObject *typ; // Borrowed reference + PyObject *const_val; // Owned reference (!) +}; + +// Holds locals, stack, locals, stack ... co_consts (in that order) +#define MAX_ABSTRACT_INTERP_SIZE 4096 + +#define TY_ARENA_SIZE (UOP_MAX_TRACE_LENGTH * 5) + +// Need extras for root frame and for overflow frame (see TRACE_STACK_PUSH()) +#define MAX_ABSTRACT_FRAME_DEPTH (TRACE_STACK_SIZE + 2) + +typedef struct _Py_UopsSymbol _Py_UopsSymbol; + +struct _Py_UOpsAbstractFrame { + // Max stacklen + int stack_len; + int locals_len; + + _Py_UopsSymbol **stack_pointer; + _Py_UopsSymbol **stack; + _Py_UopsSymbol **locals; +}; + +typedef struct _Py_UOpsAbstractFrame _Py_UOpsAbstractFrame; + +typedef struct ty_arena { + int ty_curr_number; + int ty_max_number; + _Py_UopsSymbol arena[TY_ARENA_SIZE]; +} ty_arena; + +struct _Py_UOpsContext { + PyObject_HEAD + // The current "executing" frame. + _Py_UOpsAbstractFrame *frame; + _Py_UOpsAbstractFrame frames[MAX_ABSTRACT_FRAME_DEPTH]; + int curr_frame_depth; + + // Arena for the symbolic types. + ty_arena t_arena; + + _Py_UopsSymbol **n_consumed; + _Py_UopsSymbol **limit; + _Py_UopsSymbol *locals_and_stack[MAX_ABSTRACT_INTERP_SIZE]; +}; + +typedef struct _Py_UOpsContext _Py_UOpsContext; + +extern bool _Py_uop_sym_is_null(_Py_UopsSymbol *sym); +extern bool _Py_uop_sym_is_not_null(_Py_UopsSymbol *sym); +extern bool _Py_uop_sym_is_const(_Py_UopsSymbol *sym); +extern PyObject *_Py_uop_sym_get_const(_Py_UopsSymbol *sym); +extern _Py_UopsSymbol *_Py_uop_sym_new_unknown(_Py_UOpsContext *ctx); +extern _Py_UopsSymbol *_Py_uop_sym_new_not_null(_Py_UOpsContext *ctx); +extern _Py_UopsSymbol *_Py_uop_sym_new_type( + _Py_UOpsContext *ctx, PyTypeObject *typ); +extern _Py_UopsSymbol *_Py_uop_sym_new_const(_Py_UOpsContext *ctx, PyObject *const_val); +extern _Py_UopsSymbol *_Py_uop_sym_new_null(_Py_UOpsContext *ctx); +extern bool _Py_uop_sym_has_type(_Py_UopsSymbol *sym); +extern bool _Py_uop_sym_matches_type(_Py_UopsSymbol *sym, PyTypeObject *typ); +extern bool _Py_uop_sym_set_null(_Py_UopsSymbol *sym); +extern bool _Py_uop_sym_set_non_null(_Py_UopsSymbol *sym); +extern bool _Py_uop_sym_set_type(_Py_UopsSymbol *sym, PyTypeObject *typ); +extern bool _Py_uop_sym_set_const(_Py_UopsSymbol *sym, PyObject *const_val); +extern bool _Py_uop_sym_is_bottom(_Py_UopsSymbol *sym); +extern int _Py_uop_sym_truthiness(_Py_UopsSymbol *sym); +extern PyTypeObject *_Py_uop_sym_get_type(_Py_UopsSymbol *sym); + + +extern int _Py_uop_abstractcontext_init(_Py_UOpsContext *ctx); +extern void _Py_uop_abstractcontext_fini(_Py_UOpsContext *ctx); + +extern _Py_UOpsAbstractFrame *_Py_uop_frame_new( + _Py_UOpsContext *ctx, + PyCodeObject *co, + int curr_stackentries, + _Py_UopsSymbol **args, + int arg_len); +extern int _Py_uop_frame_pop(_Py_UOpsContext *ctx); + +PyAPI_FUNC(PyObject *) _Py_uop_symbols_test(PyObject *self, PyObject *ignored); + +PyAPI_FUNC(int) _PyOptimizer_Optimize(struct _PyInterpreterFrame *frame, _Py_CODEUNIT *start, PyObject **stack_pointer, _PyExecutorObject **exec_ptr); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_OPTIMIZER_H */ diff --git a/Include/internal/pycore_parking_lot.h b/Include/internal/pycore_parking_lot.h new file mode 100644 index 0000000000000000000000000000000000000000..8c9260e2636fbc7ec0396ac443bdd6fdbafb67df --- /dev/null +++ b/Include/internal/pycore_parking_lot.h @@ -0,0 +1,97 @@ +// ParkingLot is an internal API for building efficient synchronization +// primitives like mutexes and events. +// +// The API and name is inspired by WebKit's WTF::ParkingLot, which in turn +// is inspired Linux's futex API. +// See https://webkit.org/blog/6161/locking-in-webkit/. +// +// The core functionality is an atomic "compare-and-sleep" operation along with +// an atomic "wake-up" operation. + +#ifndef Py_INTERNAL_PARKING_LOT_H +#define Py_INTERNAL_PARKING_LOT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +enum { + // The thread was unparked by another thread. + Py_PARK_OK = 0, + + // The value of `address` did not match `expected`. + Py_PARK_AGAIN = -1, + + // The thread was unparked due to a timeout. + Py_PARK_TIMEOUT = -2, + + // The thread was interrupted by a signal. + Py_PARK_INTR = -3, +}; + +// Checks that `*address == *expected` and puts the thread to sleep until an +// unpark operation is called on the same `address`. Otherwise, the function +// returns `Py_PARK_AGAIN`. The comparison behaves like memcmp, but is +// performed atomically with respect to unpark operations. +// +// The `address_size` argument is the size of the data pointed to by the +// `address` and `expected` pointers (i.e., sizeof(*address)). It must be +// 1, 2, 4, or 8. +// +// The `timeout_ns` argument specifies the maximum amount of time to wait, with +// -1 indicating an infinite wait. +// +// `park_arg`, which can be NULL, is passed to the unpark operation. +// +// If `detach` is true, then the thread will detach/release the GIL while +// waiting. +// +// Example usage: +// +// if (_Py_atomic_compare_exchange_uint8(address, &expected, new_value)) { +// int res = _PyParkingLot_Park(address, &new_value, sizeof(*address), +// timeout_ns, NULL, 1); +// ... +// } +PyAPI_FUNC(int) +_PyParkingLot_Park(const void *address, const void *expected, + size_t address_size, PyTime_t timeout_ns, + void *park_arg, int detach); + +// Callback for _PyParkingLot_Unpark: +// +// `arg` is the data of the same name provided to the _PyParkingLot_Unpark() +// call. +// `park_arg` is the data provided to _PyParkingLot_Park() call or NULL if +// no waiting thread was found. +// `has_more_waiters` is true if there are more threads waiting on the same +// address. May be true in cases where threads are waiting on a different +// address that map to the same internal bucket. +typedef void _Py_unpark_fn_t(void *arg, void *park_arg, int has_more_waiters); + +// Unparks a single thread waiting on `address`. +// +// Note that fn() is called regardless of whether a thread was unparked. If +// no threads are waiting on `address` then the `park_arg` argument to fn() +// will be NULL. +// +// Example usage: +// void callback(void *arg, void *park_arg, int has_more_waiters); +// _PyParkingLot_Unpark(address, &callback, arg); +PyAPI_FUNC(void) +_PyParkingLot_Unpark(const void *address, _Py_unpark_fn_t *fn, void *arg); + +// Unparks all threads waiting on `address`. +PyAPI_FUNC(void) _PyParkingLot_UnparkAll(const void *address); + +// Resets the parking lot state after a fork. Forgets all parked threads. +PyAPI_FUNC(void) _PyParkingLot_AfterFork(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PARKING_LOT_H */ diff --git a/Include/internal/pycore_parser.h b/Include/internal/pycore_parser.h new file mode 100644 index 0000000000000000000000000000000000000000..b16084aaa155155d2dc6cd7be581c6163b53b1ad --- /dev/null +++ b/Include/internal/pycore_parser.h @@ -0,0 +1,95 @@ +#ifndef Py_INTERNAL_PARSER_H +#define Py_INTERNAL_PARSER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +#include "pycore_ast.h" // struct _expr +#include "pycore_global_strings.h" // _Py_DECLARE_STR() +#include "pycore_pyarena.h" // PyArena + + +#ifdef Py_DEBUG +#define _PYPEGEN_NSTATISTICS 2000 +#endif + +struct _parser_runtime_state { +#ifdef Py_DEBUG + long memo_statistics[_PYPEGEN_NSTATISTICS]; +#ifdef Py_GIL_DISABLED + PyMutex mutex; +#endif +#else + int _not_used; +#endif + struct _expr dummy_name; +}; + +_Py_DECLARE_STR(empty, "") +#if defined(Py_DEBUG) && defined(Py_GIL_DISABLED) +#define _parser_runtime_state_INIT \ + { \ + .mutex = {0}, \ + .dummy_name = { \ + .kind = Name_kind, \ + .v.Name.id = &_Py_STR(empty), \ + .v.Name.ctx = Load, \ + .lineno = 1, \ + .col_offset = 0, \ + .end_lineno = 1, \ + .end_col_offset = 0, \ + }, \ + } +#else +#define _parser_runtime_state_INIT \ + { \ + .dummy_name = { \ + .kind = Name_kind, \ + .v.Name.id = &_Py_STR(empty), \ + .v.Name.ctx = Load, \ + .lineno = 1, \ + .col_offset = 0, \ + .end_lineno = 1, \ + .end_col_offset = 0, \ + }, \ + } +#endif + +extern struct _mod* _PyParser_ASTFromString( + const char *str, + PyObject* filename, + int mode, + PyCompilerFlags *flags, + PyArena *arena); + +extern struct _mod* _PyParser_ASTFromFile( + FILE *fp, + PyObject *filename_ob, + const char *enc, + int mode, + const char *ps1, + const char *ps2, + PyCompilerFlags *flags, + int *errcode, + PyArena *arena); +extern struct _mod* _PyParser_InteractiveASTFromFile( + FILE *fp, + PyObject *filename_ob, + const char *enc, + int mode, + const char *ps1, + const char *ps2, + PyCompilerFlags *flags, + int *errcode, + PyObject **interactive_src, + PyArena *arena); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PARSER_H */ diff --git a/Include/internal/pycore_pathconfig.h b/Include/internal/pycore_pathconfig.h new file mode 100644 index 0000000000000000000000000000000000000000..a1ce1b19a00283292f09cdd59cb12ad2b6edaac9 --- /dev/null +++ b/Include/internal/pycore_pathconfig.h @@ -0,0 +1,26 @@ +#ifndef Py_INTERNAL_PATHCONFIG_H +#define Py_INTERNAL_PATHCONFIG_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(void) _PyPathConfig_ClearGlobal(void); + +extern PyStatus _PyPathConfig_ReadGlobal(PyConfig *config); +extern PyStatus _PyPathConfig_UpdateGlobal(const PyConfig *config); +extern const wchar_t * _PyPathConfig_GetGlobalModuleSearchPath(void); + +extern int _PyPathConfig_ComputeSysPath0( + const PyWideStringList *argv, + PyObject **path0); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PATHCONFIG_H */ diff --git a/Include/internal/pycore_pyarena.h b/Include/internal/pycore_pyarena.h new file mode 100644 index 0000000000000000000000000000000000000000..1f07479fb2ca27ff2adcf2c6e1f9fa3a75096a61 --- /dev/null +++ b/Include/internal/pycore_pyarena.h @@ -0,0 +1,68 @@ +// An arena-like memory interface for the compiler. + +#ifndef Py_INTERNAL_PYARENA_H +#define Py_INTERNAL_PYARENA_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +typedef struct _arena PyArena; + +// _PyArena_New() and _PyArena_Free() create a new arena and free it, +// respectively. Once an arena has been created, it can be used +// to allocate memory via _PyArena_Malloc(). Pointers to PyObject can +// also be registered with the arena via _PyArena_AddPyObject(), and the +// arena will ensure that the PyObjects stay alive at least until +// _PyArena_Free() is called. When an arena is freed, all the memory it +// allocated is freed, the arena releases internal references to registered +// PyObject*, and none of its pointers are valid. +// XXX (tim) What does "none of its pointers are valid" mean? Does it +// XXX mean that pointers previously obtained via _PyArena_Malloc() are +// XXX no longer valid? (That's clearly true, but not sure that's what +// XXX the text is trying to say.) +// +// _PyArena_New() returns an arena pointer. On error, it +// returns a negative number and sets an exception. +// XXX (tim): Not true. On error, _PyArena_New() actually returns NULL, +// XXX and looks like it may or may not set an exception (e.g., if the +// XXX internal PyList_New(0) returns NULL, _PyArena_New() passes that on +// XXX and an exception is set; OTOH, if the internal +// XXX block_new(DEFAULT_BLOCK_SIZE) returns NULL, that's passed on but +// XXX an exception is not set in that case). +// +// Export for test_peg_generator +PyAPI_FUNC(PyArena*) _PyArena_New(void); + +// Export for test_peg_generator +PyAPI_FUNC(void) _PyArena_Free(PyArena *); + +// Mostly like malloc(), return the address of a block of memory spanning +// `size` bytes, or return NULL (without setting an exception) if enough +// new memory can't be obtained. Unlike malloc(0), _PyArena_Malloc() with +// size=0 does not guarantee to return a unique pointer (the pointer +// returned may equal one or more other pointers obtained from +// _PyArena_Malloc()). +// Note that pointers obtained via _PyArena_Malloc() must never be passed to +// the system free() or realloc(), or to any of Python's similar memory- +// management functions. _PyArena_Malloc()-obtained pointers remain valid +// until _PyArena_Free(ar) is called, at which point all pointers obtained +// from the arena `ar` become invalid simultaneously. +// +// Export for test_peg_generator +PyAPI_FUNC(void*) _PyArena_Malloc(PyArena *, size_t size); + +// This routine isn't a proper arena allocation routine. It takes +// a PyObject* and records it so that it can be DECREFed when the +// arena is freed. +// +// Export for test_peg_generator +PyAPI_FUNC(int) _PyArena_AddPyObject(PyArena *, PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYARENA_H */ diff --git a/Include/internal/pycore_pyatomic_ft_wrappers.h b/Include/internal/pycore_pyatomic_ft_wrappers.h new file mode 100644 index 0000000000000000000000000000000000000000..d755d03a5fa190d3afc83ae6f4964b174a710d4d --- /dev/null +++ b/Include/internal/pycore_pyatomic_ft_wrappers.h @@ -0,0 +1,165 @@ +// This header file provides wrappers around the atomic operations found in +// `pyatomic.h` that are only atomic in free-threaded builds. +// +// These are intended to be used in places where atomics are required in +// free-threaded builds, but not in the default build, and we don't want to +// introduce the potential performance overhead of an atomic operation in the +// default build. +// +// All usages of these macros should be replaced with unconditionally atomic or +// non-atomic versions, and this file should be removed, once the dust settles +// on free threading. +#ifndef Py_ATOMIC_FT_WRAPPERS_H +#define Py_ATOMIC_FT_WRAPPERS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +#error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_GIL_DISABLED +#define FT_ATOMIC_LOAD_PTR(value) _Py_atomic_load_ptr(&value) +#define FT_ATOMIC_STORE_PTR(value, new_value) _Py_atomic_store_ptr(&value, new_value) +#define FT_ATOMIC_LOAD_SSIZE(value) _Py_atomic_load_ssize(&value) +#define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) \ + _Py_atomic_load_ssize_acquire(&value) +#define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) \ + _Py_atomic_load_ssize_relaxed(&value) +#define FT_ATOMIC_STORE_PTR(value, new_value) \ + _Py_atomic_store_ptr(&value, new_value) +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) \ + _Py_atomic_load_ptr_acquire(&value) +#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) \ + _Py_atomic_load_uintptr_acquire(&value) +#define FT_ATOMIC_LOAD_PTR_RELAXED(value) \ + _Py_atomic_load_ptr_relaxed(&value) +#define FT_ATOMIC_LOAD_UINT8(value) \ + _Py_atomic_load_uint8(&value) +#define FT_ATOMIC_STORE_UINT8(value, new_value) \ + _Py_atomic_store_uint8(&value, new_value) +#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) \ + _Py_atomic_load_uint8_relaxed(&value) +#define FT_ATOMIC_LOAD_UINT16_RELAXED(value) \ + _Py_atomic_load_uint16_relaxed(&value) +#define FT_ATOMIC_LOAD_UINT32_RELAXED(value) \ + _Py_atomic_load_uint32_relaxed(&value) +#define FT_ATOMIC_LOAD_ULONG_RELAXED(value) \ + _Py_atomic_load_ulong_relaxed(&value) +#define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) \ + _Py_atomic_store_ptr_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) \ + _Py_atomic_store_ptr_release(&value, new_value) +#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) \ + _Py_atomic_store_uintptr_release(&value, new_value) +#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ + _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) \ + _Py_atomic_store_uint8_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) \ + _Py_atomic_store_uint16_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) \ + _Py_atomic_store_uint32_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) \ + _Py_atomic_store_char_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_CHAR_RELAXED(value) \ + _Py_atomic_load_char_relaxed(&value) +#define FT_ATOMIC_STORE_UCHAR_RELAXED(value, new_value) \ + _Py_atomic_store_uchar_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_UCHAR_RELAXED(value) \ + _Py_atomic_load_uchar_relaxed(&value) +#define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) \ + _Py_atomic_store_short_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_SHORT_RELAXED(value) \ + _Py_atomic_load_short_relaxed(&value) +#define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) \ + _Py_atomic_store_ushort_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) \ + _Py_atomic_load_ushort_relaxed(&value) +#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) \ + _Py_atomic_store_int_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_INT_RELAXED(value) \ + _Py_atomic_load_int_relaxed(&value) +#define FT_ATOMIC_STORE_UINT_RELAXED(value, new_value) \ + _Py_atomic_store_uint_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_UINT_RELAXED(value) \ + _Py_atomic_load_uint_relaxed(&value) +#define FT_ATOMIC_STORE_LONG_RELAXED(value, new_value) \ + _Py_atomic_store_long_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_LONG_RELAXED(value) \ + _Py_atomic_load_long_relaxed(&value) +#define FT_ATOMIC_STORE_ULONG_RELAXED(value, new_value) \ + _Py_atomic_store_ulong_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) \ + _Py_atomic_store_ssize_relaxed(&value, new_value) +#define FT_ATOMIC_STORE_FLOAT_RELAXED(value, new_value) \ + _Py_atomic_store_float_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_FLOAT_RELAXED(value) \ + _Py_atomic_load_float_relaxed(&value) +#define FT_ATOMIC_STORE_DOUBLE_RELAXED(value, new_value) \ + _Py_atomic_store_double_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_DOUBLE_RELAXED(value) \ + _Py_atomic_load_double_relaxed(&value) +#define FT_ATOMIC_STORE_LLONG_RELAXED(value, new_value) \ + _Py_atomic_store_llong_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_LLONG_RELAXED(value) \ + _Py_atomic_load_llong_relaxed(&value) +#define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) \ + _Py_atomic_store_ullong_relaxed(&value, new_value) +#define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) \ + _Py_atomic_load_ullong_relaxed(&value) + +#else +#define FT_ATOMIC_LOAD_PTR(value) value +#define FT_ATOMIC_STORE_PTR(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_SSIZE(value) value +#define FT_ATOMIC_LOAD_SSIZE_ACQUIRE(value) value +#define FT_ATOMIC_LOAD_SSIZE_RELAXED(value) value +#define FT_ATOMIC_LOAD_PTR_ACQUIRE(value) value +#define FT_ATOMIC_LOAD_UINTPTR_ACQUIRE(value) value +#define FT_ATOMIC_LOAD_PTR_RELAXED(value) value +#define FT_ATOMIC_LOAD_UINT8(value) value +#define FT_ATOMIC_STORE_UINT8(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_UINT8_RELAXED(value) value +#define FT_ATOMIC_LOAD_UINT16_RELAXED(value) value +#define FT_ATOMIC_LOAD_UINT32_RELAXED(value) value +#define FT_ATOMIC_LOAD_ULONG_RELAXED(value) value +#define FT_ATOMIC_STORE_PTR_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_PTR_RELEASE(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINTPTR_RELEASE(value, new_value) value = new_value +#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT8_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT16_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_UINT32_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_CHAR_RELAXED(value) value +#define FT_ATOMIC_STORE_CHAR_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_UCHAR_RELAXED(value) value +#define FT_ATOMIC_STORE_UCHAR_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_SHORT_RELAXED(value) value +#define FT_ATOMIC_STORE_SHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_USHORT_RELAXED(value) value +#define FT_ATOMIC_STORE_USHORT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_INT_RELAXED(value) value +#define FT_ATOMIC_STORE_INT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_UINT_RELAXED(value) value +#define FT_ATOMIC_STORE_UINT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_LONG_RELAXED(value) value +#define FT_ATOMIC_STORE_LONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_ULONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_STORE_SSIZE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_FLOAT_RELAXED(value) value +#define FT_ATOMIC_STORE_FLOAT_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_DOUBLE_RELAXED(value) value +#define FT_ATOMIC_STORE_DOUBLE_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_LLONG_RELAXED(value) value +#define FT_ATOMIC_STORE_LLONG_RELAXED(value, new_value) value = new_value +#define FT_ATOMIC_LOAD_ULLONG_RELAXED(value) value +#define FT_ATOMIC_STORE_ULLONG_RELAXED(value, new_value) value = new_value + +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ATOMIC_FT_WRAPPERS_H */ diff --git a/Include/internal/pycore_pybuffer.h b/Include/internal/pycore_pybuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..9439d2bd770587d6c22620635254cfa5bf727b40 --- /dev/null +++ b/Include/internal/pycore_pybuffer.h @@ -0,0 +1,21 @@ +#ifndef Py_INTERNAL_PYBUFFER_H +#define Py_INTERNAL_PYBUFFER_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +// Exported for the _interpchannels module. +PyAPI_FUNC(int) _PyBuffer_ReleaseInInterpreter( + PyInterpreterState *interp, Py_buffer *view); +PyAPI_FUNC(int) _PyBuffer_ReleaseInInterpreterAndRawFree( + PyInterpreterState *interp, Py_buffer *view); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYBUFFER_H */ diff --git a/Include/internal/pycore_pyerrors.h b/Include/internal/pycore_pyerrors.h new file mode 100644 index 0000000000000000000000000000000000000000..615cc23ec935284c9dbf7e2610a54c110727f4d6 --- /dev/null +++ b/Include/internal/pycore_pyerrors.h @@ -0,0 +1,190 @@ +#ifndef Py_INTERNAL_PYERRORS_H +#define Py_INTERNAL_PYERRORS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* Error handling definitions */ + +extern _PyErr_StackItem* _PyErr_GetTopmostException(PyThreadState *tstate); +extern PyObject* _PyErr_GetHandledException(PyThreadState *); +extern void _PyErr_SetHandledException(PyThreadState *, PyObject *); +extern void _PyErr_GetExcInfo(PyThreadState *, PyObject **, PyObject **, PyObject **); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(void) _PyErr_SetKeyError(PyObject *); + + +// Like PyErr_Format(), but saves current exception as __context__ and +// __cause__. +// Export for '_sqlite3' shared extension. +PyAPI_FUNC(PyObject*) _PyErr_FormatFromCause( + PyObject *exception, + const char *format, /* ASCII-encoded string */ + ... + ); + +extern int _PyException_AddNote( + PyObject *exc, + PyObject *note); + +extern int _PyErr_CheckSignals(void); + +/* Support for adding program text to SyntaxErrors */ + +// Export for test_peg_generator +PyAPI_FUNC(PyObject*) _PyErr_ProgramDecodedTextObject( + PyObject *filename, + int lineno, + const char* encoding); + +extern PyObject* _PyUnicodeTranslateError_Create( + PyObject *object, + Py_ssize_t start, + Py_ssize_t end, + const char *reason /* UTF-8 encoded string */ + ); + +extern void _Py_NO_RETURN _Py_FatalErrorFormat( + const char *func, + const char *format, + ...); + +extern PyObject* _PyErr_SetImportErrorWithNameFrom( + PyObject *, + PyObject *, + PyObject *, + PyObject *); + + +/* runtime lifecycle */ + +extern PyStatus _PyErr_InitTypes(PyInterpreterState *); +extern void _PyErr_FiniTypes(PyInterpreterState *); + + +/* other API */ + +static inline PyObject* _PyErr_Occurred(PyThreadState *tstate) +{ + assert(tstate != NULL); + if (tstate->current_exception == NULL) { + return NULL; + } + return (PyObject *)Py_TYPE(tstate->current_exception); +} + +static inline void _PyErr_ClearExcState(_PyErr_StackItem *exc_state) +{ + Py_CLEAR(exc_state->exc_value); +} + +extern PyObject* _PyErr_StackItemToExcInfoTuple( + _PyErr_StackItem *err_info); + +extern void _PyErr_Fetch( + PyThreadState *tstate, + PyObject **type, + PyObject **value, + PyObject **traceback); + +extern PyObject* _PyErr_GetRaisedException(PyThreadState *tstate); + +PyAPI_FUNC(int) _PyErr_ExceptionMatches( + PyThreadState *tstate, + PyObject *exc); + +extern void _PyErr_SetRaisedException(PyThreadState *tstate, PyObject *exc); + +extern void _PyErr_Restore( + PyThreadState *tstate, + PyObject *type, + PyObject *value, + PyObject *traceback); + +extern void _PyErr_SetObject( + PyThreadState *tstate, + PyObject *type, + PyObject *value); + +extern void _PyErr_ChainStackItem(void); + +PyAPI_FUNC(void) _PyErr_Clear(PyThreadState *tstate); + +extern void _PyErr_SetNone(PyThreadState *tstate, PyObject *exception); + +extern PyObject* _PyErr_NoMemory(PyThreadState *tstate); + +PyAPI_FUNC(void) _PyErr_SetString( + PyThreadState *tstate, + PyObject *exception, + const char *string); + +/* + * Set an exception with the error message decoded from the current locale + * encoding (LC_CTYPE). + * + * Exceptions occurring in decoding take priority over the desired exception. + * + * Exported for '_ctypes' shared extensions. + */ +PyAPI_FUNC(void) _PyErr_SetLocaleString( + PyObject *exception, + const char *string); + +PyAPI_FUNC(PyObject*) _PyErr_Format( + PyThreadState *tstate, + PyObject *exception, + const char *format, + ...); + +extern void _PyErr_NormalizeException( + PyThreadState *tstate, + PyObject **exc, + PyObject **val, + PyObject **tb); + +extern PyObject* _PyErr_FormatFromCauseTstate( + PyThreadState *tstate, + PyObject *exception, + const char *format, + ...); + +extern PyObject* _PyExc_CreateExceptionGroup( + const char *msg, + PyObject *excs); + +extern PyObject* _PyExc_PrepReraiseStar( + PyObject *orig, + PyObject *excs); + +extern int _PyErr_CheckSignalsTstate(PyThreadState *tstate); + +extern void _Py_DumpExtensionModules(int fd, PyInterpreterState *interp); +extern PyObject* _Py_CalculateSuggestions(PyObject *dir, PyObject *name); +extern PyObject* _Py_Offer_Suggestions(PyObject* exception); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(Py_ssize_t) _Py_UTF8_Edit_Cost(PyObject *str_a, PyObject *str_b, + Py_ssize_t max_cost); + +void _PyErr_FormatNote(const char *format, ...); + +/* Context manipulation (PEP 3134) */ + +Py_DEPRECATED(3.12) extern void _PyErr_ChainExceptions(PyObject *, PyObject *, PyObject *); + +// implementation detail for the codeop module. +// Exported for test.test_peg_generator.test_c_parser +PyAPI_DATA(PyTypeObject) _PyExc_IncompleteInputError; +#define PyExc_IncompleteInputError ((PyObject *)(&_PyExc_IncompleteInputError)) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYERRORS_H */ diff --git a/Include/internal/pycore_pyhash.h b/Include/internal/pycore_pyhash.h new file mode 100644 index 0000000000000000000000000000000000000000..0ce08900e96f0b28b0f2c34a0151ccbe2906798b --- /dev/null +++ b/Include/internal/pycore_pyhash.h @@ -0,0 +1,107 @@ +#ifndef Py_INTERNAL_PYHASH_H +#define Py_INTERNAL_PYHASH_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Similar to Py_HashPointer(), but don't replace -1 with -2. +static inline Py_hash_t +_Py_HashPointerRaw(const void *ptr) +{ + uintptr_t x = (uintptr_t)ptr; + Py_BUILD_ASSERT(sizeof(x) == sizeof(ptr)); + + // Bottom 3 or 4 bits are likely to be 0; rotate x by 4 to the right + // to avoid excessive hash collisions for dicts and sets. + x = (x >> 4) | (x << (8 * sizeof(uintptr_t) - 4)); + + Py_BUILD_ASSERT(sizeof(x) == sizeof(Py_hash_t)); + return (Py_hash_t)x; +} + +// Export for '_datetime' shared extension +PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t); + +/* Hash secret + * + * memory layout on 64 bit systems + * cccccccc cccccccc cccccccc uc -- unsigned char[24] + * pppppppp ssssssss ........ fnv -- two Py_hash_t + * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t + * ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t + * ........ ........ eeeeeeee pyexpat XML hash salt + * + * memory layout on 32 bit systems + * cccccccc cccccccc cccccccc uc + * ppppssss ........ ........ fnv -- two Py_hash_t + * k0k0k0k0 k1k1k1k1 ........ siphash -- two uint64_t (*) + * ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t + * ........ ........ eeee.... pyexpat XML hash salt + * + * (*) The siphash member may not be available on 32 bit platforms without + * an unsigned int64 data type. + */ +typedef union { + /* ensure 24 bytes */ + unsigned char uc[24]; + /* two Py_hash_t for FNV */ + struct { + Py_hash_t prefix; + Py_hash_t suffix; + } fnv; + /* two uint64 for SipHash24 */ + struct { + uint64_t k0; + uint64_t k1; + } siphash; + /* a different (!) Py_hash_t for small string optimization */ + struct { + unsigned char padding[16]; + Py_hash_t suffix; + } djbx33a; + struct { + unsigned char padding[16]; + Py_hash_t hashsalt; + } expat; +} _Py_HashSecret_t; + +// Export for '_elementtree' shared extension +PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret; + +#ifdef Py_DEBUG +extern int _Py_HashSecret_Initialized; +#endif + + +struct pyhash_runtime_state { + struct { +#ifndef MS_WINDOWS + int fd; + dev_t st_dev; + ino_t st_ino; +#else + // This is a placeholder so the struct isn't empty on Windows. + int _not_used; +#endif + } urandom_cache; +}; + +#ifndef MS_WINDOWS +# define _py_urandom_cache_INIT \ + { \ + .fd = -1, \ + } +#else +# define _py_urandom_cache_INIT {0} +#endif + +#define pyhash_state_INIT \ + { \ + .urandom_cache = _py_urandom_cache_INIT, \ + } + + +extern uint64_t _Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz); + +#endif // !Py_INTERNAL_PYHASH_H diff --git a/Include/internal/pycore_pylifecycle.h b/Include/internal/pycore_pylifecycle.h new file mode 100644 index 0000000000000000000000000000000000000000..f426ae0e103b9c991229c1e1cc2c9c55ad12b715 --- /dev/null +++ b/Include/internal/pycore_pylifecycle.h @@ -0,0 +1,136 @@ +#ifndef Py_INTERNAL_LIFECYCLE_H +#define Py_INTERNAL_LIFECYCLE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_runtime.h" // _PyRuntimeState + +/* Forward declarations */ +struct _PyArgv; +struct pyruntimestate; + +extern int _Py_SetFileSystemEncoding( + const char *encoding, + const char *errors); +extern void _Py_ClearFileSystemEncoding(void); +extern PyStatus _PyUnicode_InitEncodings(PyThreadState *tstate); +#ifdef MS_WINDOWS +extern int _PyUnicode_EnableLegacyWindowsFSEncoding(void); +#endif + +extern int _Py_IsLocaleCoercionTarget(const char *ctype_loc); + +/* Various one-time initializers */ + +extern void _Py_InitVersion(void); +extern PyStatus _PyFaulthandler_Init(int enable); +extern PyObject * _PyBuiltin_Init(PyInterpreterState *interp); +extern PyStatus _PySys_Create( + PyThreadState *tstate, + PyObject **sysmod_p); +extern PyStatus _PySys_ReadPreinitWarnOptions(PyWideStringList *options); +extern PyStatus _PySys_ReadPreinitXOptions(PyConfig *config); +extern int _PySys_UpdateConfig(PyThreadState *tstate); +extern void _PySys_FiniTypes(PyInterpreterState *interp); +extern int _PyBuiltins_AddExceptions(PyObject * bltinmod); +extern PyStatus _Py_HashRandomization_Init(const PyConfig *); + +extern PyStatus _PyGC_Init(PyInterpreterState *interp); +extern PyStatus _PyAtExit_Init(PyInterpreterState *interp); + +/* Various internal finalizers */ + +extern int _PySignal_Init(int install_signal_handlers); +extern void _PySignal_Fini(void); + +extern void _PyGC_Fini(PyInterpreterState *interp); +extern void _Py_HashRandomization_Fini(void); +extern void _PyFaulthandler_Fini(void); +extern void _PyHash_Fini(void); +extern void _PyTraceMalloc_Fini(void); +extern void _PyWarnings_Fini(PyInterpreterState *interp); +extern void _PyAST_Fini(PyInterpreterState *interp); +extern void _PyAtExit_Fini(PyInterpreterState *interp); +extern void _PyThread_FiniType(PyInterpreterState *interp); +extern void _PyArg_Fini(void); +extern void _Py_FinalizeAllocatedBlocks(_PyRuntimeState *); + +extern PyStatus _PyGILState_Init(PyInterpreterState *interp); +extern void _PyGILState_SetTstate(PyThreadState *tstate); +extern void _PyGILState_Fini(PyInterpreterState *interp); + +extern void _PyGC_DumpShutdownStats(PyInterpreterState *interp); + +extern PyStatus _Py_PreInitializeFromPyArgv( + const PyPreConfig *src_config, + const struct _PyArgv *args); +extern PyStatus _Py_PreInitializeFromConfig( + const PyConfig *config, + const struct _PyArgv *args); + +extern wchar_t * _Py_GetStdlibDir(void); + +extern int _Py_HandleSystemExit(int *exitcode_p); + +extern PyObject* _PyErr_WriteUnraisableDefaultHook(PyObject *unraisable); + +extern void _PyErr_Print(PyThreadState *tstate); +extern void _PyErr_Display(PyObject *file, PyObject *exception, + PyObject *value, PyObject *tb); +extern void _PyErr_DisplayException(PyObject *file, PyObject *exc); + +extern void _PyThreadState_DeleteCurrent(PyThreadState *tstate); + +extern void _PyAtExit_Call(PyInterpreterState *interp); + +extern int _Py_IsCoreInitialized(void); + +extern int _Py_FdIsInteractive(FILE *fp, PyObject *filename); + +extern const char* _Py_gitidentifier(void); +extern const char* _Py_gitversion(void); + +// Export for '_asyncio' shared extension +PyAPI_FUNC(int) _Py_IsInterpreterFinalizing(PyInterpreterState *interp); + +/* Random */ +extern int _PyOS_URandom(void *buffer, Py_ssize_t size); + +// Export for '_random' shared extension +PyAPI_FUNC(int) _PyOS_URandomNonblock(void *buffer, Py_ssize_t size); + +/* Legacy locale support */ +extern int _Py_CoerceLegacyLocale(int warn); +extern int _Py_LegacyLocaleDetected(int warn); + +// Export for 'readline' shared extension +PyAPI_FUNC(char*) _Py_SetLocaleFromEnv(int category); + +// Export for special main.c string compiling with source tracebacks +int _PyRun_SimpleStringFlagsWithName(const char *command, const char* name, PyCompilerFlags *flags); + + +/* interpreter config */ + +// Export for _testinternalcapi shared extension +PyAPI_FUNC(int) _PyInterpreterConfig_InitFromState( + PyInterpreterConfig *, + PyInterpreterState *); +PyAPI_FUNC(PyObject *) _PyInterpreterConfig_AsDict(PyInterpreterConfig *); +PyAPI_FUNC(int) _PyInterpreterConfig_InitFromDict( + PyInterpreterConfig *, + PyObject *); +PyAPI_FUNC(int) _PyInterpreterConfig_UpdateFromDict( + PyInterpreterConfig *, + PyObject *); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_LIFECYCLE_H */ diff --git a/Include/internal/pycore_pymath.h b/Include/internal/pycore_pymath.h new file mode 100644 index 0000000000000000000000000000000000000000..7a4e1c1eb714f732a399baaecb81848b8d0f5527 --- /dev/null +++ b/Include/internal/pycore_pymath.h @@ -0,0 +1,205 @@ +#ifndef Py_INTERNAL_PYMATH_H +#define Py_INTERNAL_PYMATH_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* _Py_ADJUST_ERANGE1(x) + * _Py_ADJUST_ERANGE2(x, y) + * Set errno to 0 before calling a libm function, and invoke one of these + * macros after, passing the function result(s) (_Py_ADJUST_ERANGE2 is useful + * for functions returning complex results). This makes two kinds of + * adjustments to errno: (A) If it looks like the platform libm set + * errno=ERANGE due to underflow, clear errno. (B) If it looks like the + * platform libm overflowed but didn't set errno, force errno to ERANGE. In + * effect, we're trying to force a useful implementation of C89 errno + * behavior. + * Caution: + * This isn't reliable. C99 no longer requires libm to set errno under + * any exceptional condition, but does require +- HUGE_VAL return + * values on overflow. A 754 box *probably* maps HUGE_VAL to a + * double infinity, and we're cool if that's so, unless the input + * was an infinity and an infinity is the expected result. A C89 + * system sets errno to ERANGE, so we check for that too. We're + * out of luck if a C99 754 box doesn't map HUGE_VAL to +Inf, or + * if the returned result is a NaN, or if a C89 box returns HUGE_VAL + * in non-overflow cases. + */ +static inline void _Py_ADJUST_ERANGE1(double x) +{ + if (errno == 0) { + if (x == Py_HUGE_VAL || x == -Py_HUGE_VAL) { + errno = ERANGE; + } + } + else if (errno == ERANGE && x == 0.0) { + errno = 0; + } +} + +static inline void _Py_ADJUST_ERANGE2(double x, double y) +{ + if (x == Py_HUGE_VAL || x == -Py_HUGE_VAL || + y == Py_HUGE_VAL || y == -Py_HUGE_VAL) + { + if (errno == 0) { + errno = ERANGE; + } + } + else if (errno == ERANGE) { + errno = 0; + } +} + + +//--- HAVE_PY_SET_53BIT_PRECISION macro ------------------------------------ +// +// The functions _Py_dg_strtod() and _Py_dg_dtoa() in Python/dtoa.c (which are +// required to support the short float repr introduced in Python 3.1) require +// that the floating-point unit that's being used for arithmetic operations on +// C doubles is set to use 53-bit precision. It also requires that the FPU +// rounding mode is round-half-to-even, but that's less often an issue. +// +// If your FPU isn't already set to 53-bit precision/round-half-to-even, and +// you want to make use of _Py_dg_strtod() and _Py_dg_dtoa(), then you should: +// +// #define HAVE_PY_SET_53BIT_PRECISION 1 +// +// and also give appropriate definitions for the following three macros: +// +// * _Py_SET_53BIT_PRECISION_HEADER: any variable declarations needed to +// use the two macros below. +// * _Py_SET_53BIT_PRECISION_START: store original FPU settings, and +// set FPU to 53-bit precision/round-half-to-even +// * _Py_SET_53BIT_PRECISION_END: restore original FPU settings +// +// The macros are designed to be used within a single C function: see +// Python/pystrtod.c for an example of their use. + + +// Get and set x87 control word for gcc/x86 +#ifdef HAVE_GCC_ASM_FOR_X87 +#define HAVE_PY_SET_53BIT_PRECISION 1 + +// Functions defined in Python/pymath.c +extern unsigned short _Py_get_387controlword(void); +extern void _Py_set_387controlword(unsigned short); + +#define _Py_SET_53BIT_PRECISION_HEADER \ + unsigned short old_387controlword, new_387controlword +#define _Py_SET_53BIT_PRECISION_START \ + do { \ + old_387controlword = _Py_get_387controlword(); \ + new_387controlword = (old_387controlword & ~0x0f00) | 0x0200; \ + if (new_387controlword != old_387controlword) { \ + _Py_set_387controlword(new_387controlword); \ + } \ + } while (0) +#define _Py_SET_53BIT_PRECISION_END \ + do { \ + if (new_387controlword != old_387controlword) { \ + _Py_set_387controlword(old_387controlword); \ + } \ + } while (0) +#endif + +// Get and set x87 control word for VisualStudio/x86. +// x87 is not supported in 64-bit or ARM. +#if defined(_MSC_VER) && !defined(_WIN64) && !defined(_M_ARM) +#define HAVE_PY_SET_53BIT_PRECISION 1 + +#include // __control87_2() + +#define _Py_SET_53BIT_PRECISION_HEADER \ + unsigned int old_387controlword, new_387controlword, out_387controlword + // We use the __control87_2 function to set only the x87 control word. + // The SSE control word is unaffected. +#define _Py_SET_53BIT_PRECISION_START \ + do { \ + __control87_2(0, 0, &old_387controlword, NULL); \ + new_387controlword = \ + (old_387controlword & ~(_MCW_PC | _MCW_RC)) | (_PC_53 | _RC_NEAR); \ + if (new_387controlword != old_387controlword) { \ + __control87_2(new_387controlword, _MCW_PC | _MCW_RC, \ + &out_387controlword, NULL); \ + } \ + } while (0) +#define _Py_SET_53BIT_PRECISION_END \ + do { \ + if (new_387controlword != old_387controlword) { \ + __control87_2(old_387controlword, _MCW_PC | _MCW_RC, \ + &out_387controlword, NULL); \ + } \ + } while (0) +#endif + + +// MC68881 +#ifdef HAVE_GCC_ASM_FOR_MC68881 +#define HAVE_PY_SET_53BIT_PRECISION 1 +#define _Py_SET_53BIT_PRECISION_HEADER \ + unsigned int old_fpcr, new_fpcr +#define _Py_SET_53BIT_PRECISION_START \ + do { \ + __asm__ ("fmove.l %%fpcr,%0" : "=g" (old_fpcr)); \ + /* Set double precision / round to nearest. */ \ + new_fpcr = (old_fpcr & ~0xf0) | 0x80; \ + if (new_fpcr != old_fpcr) { \ + __asm__ volatile ("fmove.l %0,%%fpcr" : : "g" (new_fpcr));\ + } \ + } while (0) +#define _Py_SET_53BIT_PRECISION_END \ + do { \ + if (new_fpcr != old_fpcr) { \ + __asm__ volatile ("fmove.l %0,%%fpcr" : : "g" (old_fpcr)); \ + } \ + } while (0) +#endif + +// Default definitions are empty +#ifndef _Py_SET_53BIT_PRECISION_HEADER +# define _Py_SET_53BIT_PRECISION_HEADER +# define _Py_SET_53BIT_PRECISION_START +# define _Py_SET_53BIT_PRECISION_END +#endif + + +//--- _PY_SHORT_FLOAT_REPR macro ------------------------------------------- + +// If we can't guarantee 53-bit precision, don't use the code +// in Python/dtoa.c, but fall back to standard code. This +// means that repr of a float will be long (17 significant digits). +// +// Realistically, there are two things that could go wrong: +// +// (1) doubles aren't IEEE 754 doubles, or +// (2) we're on x86 with the rounding precision set to 64-bits +// (extended precision), and we don't know how to change +// the rounding precision. +#if !defined(DOUBLE_IS_LITTLE_ENDIAN_IEEE754) && \ + !defined(DOUBLE_IS_BIG_ENDIAN_IEEE754) && \ + !defined(DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754) +# define _PY_SHORT_FLOAT_REPR 0 +#endif + +// Double rounding is symptomatic of use of extended precision on x86. +// If we're seeing double rounding, and we don't have any mechanism available +// for changing the FPU rounding precision, then don't use Python/dtoa.c. +#if defined(X87_DOUBLE_ROUNDING) && !defined(HAVE_PY_SET_53BIT_PRECISION) +# define _PY_SHORT_FLOAT_REPR 0 +#endif + +#ifndef _PY_SHORT_FLOAT_REPR +# define _PY_SHORT_FLOAT_REPR 1 +#endif + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYMATH_H */ diff --git a/Include/internal/pycore_pymem.h b/Include/internal/pycore_pymem.h new file mode 100644 index 0000000000000000000000000000000000000000..e9593dbff1cc60aeb2ac5ee59a04f41b8a7fee5c --- /dev/null +++ b/Include/internal/pycore_pymem.h @@ -0,0 +1,138 @@ +#ifndef Py_INTERNAL_PYMEM_H +#define Py_INTERNAL_PYMEM_H + +#include "pycore_llist.h" // struct llist_node +#include "pycore_lock.h" // PyMutex + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Try to get the allocators name set by _PyMem_SetupAllocators(). +// Return NULL if unknown. +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(const char*) _PyMem_GetCurrentAllocatorName(void); + +// strdup() using PyMem_RawMalloc() +extern char* _PyMem_RawStrdup(const char *str); + +// strdup() using PyMem_Malloc(). +// Export for '_pickle ' shared extension. +PyAPI_FUNC(char*) _PyMem_Strdup(const char *str); + +// wcsdup() using PyMem_RawMalloc() +extern wchar_t* _PyMem_RawWcsdup(const wchar_t *str); + +typedef struct { + /* We tag each block with an API ID in order to tag API violations */ + char api_id; + PyMemAllocatorEx alloc; +} debug_alloc_api_t; + +struct _pymem_allocators { + PyMutex mutex; + struct { + PyMemAllocatorEx raw; + PyMemAllocatorEx mem; + PyMemAllocatorEx obj; + } standard; + struct { + debug_alloc_api_t raw; + debug_alloc_api_t mem; + debug_alloc_api_t obj; + } debug; + int is_debug_enabled; + PyObjectArenaAllocator obj_arena; +}; + +struct _Py_mem_interp_free_queue { + int has_work; // true if the queue is not empty + PyMutex mutex; // protects the queue + struct llist_node head; // queue of _mem_work_chunk items +}; + +/* Set the memory allocator of the specified domain to the default. + Save the old allocator into *old_alloc if it's non-NULL. + Return on success, or return -1 if the domain is unknown. */ +extern int _PyMem_SetDefaultAllocator( + PyMemAllocatorDomain domain, + PyMemAllocatorEx *old_alloc); + +/* Special bytes broadcast into debug memory blocks at appropriate times. + Strings of these are unlikely to be valid addresses, floats, ints or + 7-bit ASCII. + + - PYMEM_CLEANBYTE: clean (newly allocated) memory + - PYMEM_DEADBYTE dead (newly freed) memory + - PYMEM_FORBIDDENBYTE: untouchable bytes at each end of a block + + Byte patterns 0xCB, 0xDB and 0xFB have been replaced with 0xCD, 0xDD and + 0xFD to use the same values as Windows CRT debug malloc() and free(). + If modified, _PyMem_IsPtrFreed() should be updated as well. */ +#define PYMEM_CLEANBYTE 0xCD +#define PYMEM_DEADBYTE 0xDD +#define PYMEM_FORBIDDENBYTE 0xFD + +/* Heuristic checking if a pointer value is newly allocated + (uninitialized), newly freed or NULL (is equal to zero). + + The pointer is not dereferenced, only the pointer value is checked. + + The heuristic relies on the debug hooks on Python memory allocators which + fills newly allocated memory with CLEANBYTE (0xCD) and newly freed memory + with DEADBYTE (0xDD). Detect also "untouchable bytes" marked + with FORBIDDENBYTE (0xFD). */ +static inline int _PyMem_IsPtrFreed(const void *ptr) +{ + uintptr_t value = (uintptr_t)ptr; +#if SIZEOF_VOID_P == 8 + return (value == 0 + || value == (uintptr_t)0xCDCDCDCDCDCDCDCD + || value == (uintptr_t)0xDDDDDDDDDDDDDDDD + || value == (uintptr_t)0xFDFDFDFDFDFDFDFD); +#elif SIZEOF_VOID_P == 4 + return (value == 0 + || value == (uintptr_t)0xCDCDCDCD + || value == (uintptr_t)0xDDDDDDDD + || value == (uintptr_t)0xFDFDFDFD); +#else +# error "unknown pointer size" +#endif +} + +extern int _PyMem_GetAllocatorName( + const char *name, + PyMemAllocatorName *allocator); + +/* Configure the Python memory allocators. + Pass PYMEM_ALLOCATOR_DEFAULT to use default allocators. + PYMEM_ALLOCATOR_NOT_SET does nothing. */ +extern int _PyMem_SetupAllocators(PyMemAllocatorName allocator); + +/* Is the debug allocator enabled? */ +extern int _PyMem_DebugEnabled(void); + +// Enqueue a pointer to be freed possibly after some delay. +extern void _PyMem_FreeDelayed(void *ptr, size_t size); + +// Enqueue an object to be freed possibly after some delay +extern void _PyObject_FreeDelayed(void *ptr); + +// Periodically process delayed free requests. +extern void _PyMem_ProcessDelayed(PyThreadState *tstate); + +// Abandon all thread-local delayed free requests and push them to the +// interpreter's queue. +extern void _PyMem_AbandonDelayed(PyThreadState *tstate); + +// On interpreter shutdown, frees all delayed free requests. +extern void _PyMem_FiniDelayed(PyInterpreterState *interp); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_PYMEM_H diff --git a/Include/internal/pycore_pymem_init.h b/Include/internal/pycore_pymem_init.h new file mode 100644 index 0000000000000000000000000000000000000000..c593edc86d9952aa9b001006122460ee6f215c44 --- /dev/null +++ b/Include/internal/pycore_pymem_init.h @@ -0,0 +1,103 @@ +#ifndef Py_INTERNAL_PYMEM_INIT_H +#define Py_INTERNAL_PYMEM_INIT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/********************************/ +/* the allocators' initializers */ + +extern void * _PyMem_RawMalloc(void *, size_t); +extern void * _PyMem_RawCalloc(void *, size_t, size_t); +extern void * _PyMem_RawRealloc(void *, void *, size_t); +extern void _PyMem_RawFree(void *, void *); +#define PYRAW_ALLOC {NULL, _PyMem_RawMalloc, _PyMem_RawCalloc, _PyMem_RawRealloc, _PyMem_RawFree} + +#ifdef Py_GIL_DISABLED +// Py_GIL_DISABLED requires mimalloc +extern void* _PyObject_MiMalloc(void *, size_t); +extern void* _PyObject_MiCalloc(void *, size_t, size_t); +extern void _PyObject_MiFree(void *, void *); +extern void* _PyObject_MiRealloc(void *, void *, size_t); +# define PYOBJ_ALLOC {NULL, _PyObject_MiMalloc, _PyObject_MiCalloc, _PyObject_MiRealloc, _PyObject_MiFree} +extern void* _PyMem_MiMalloc(void *, size_t); +extern void* _PyMem_MiCalloc(void *, size_t, size_t); +extern void _PyMem_MiFree(void *, void *); +extern void* _PyMem_MiRealloc(void *, void *, size_t); +# define PYMEM_ALLOC {NULL, _PyMem_MiMalloc, _PyMem_MiCalloc, _PyMem_MiRealloc, _PyMem_MiFree} +#elif defined(WITH_PYMALLOC) +extern void* _PyObject_Malloc(void *, size_t); +extern void* _PyObject_Calloc(void *, size_t, size_t); +extern void _PyObject_Free(void *, void *); +extern void* _PyObject_Realloc(void *, void *, size_t); +# define PYOBJ_ALLOC {NULL, _PyObject_Malloc, _PyObject_Calloc, _PyObject_Realloc, _PyObject_Free} +# define PYMEM_ALLOC PYOBJ_ALLOC +#else +# define PYOBJ_ALLOC PYRAW_ALLOC +# define PYMEM_ALLOC PYOBJ_ALLOC +#endif // WITH_PYMALLOC + + +extern void* _PyMem_DebugRawMalloc(void *, size_t); +extern void* _PyMem_DebugRawCalloc(void *, size_t, size_t); +extern void* _PyMem_DebugRawRealloc(void *, void *, size_t); +extern void _PyMem_DebugRawFree(void *, void *); + +extern void* _PyMem_DebugMalloc(void *, size_t); +extern void* _PyMem_DebugCalloc(void *, size_t, size_t); +extern void* _PyMem_DebugRealloc(void *, void *, size_t); +extern void _PyMem_DebugFree(void *, void *); + +#define PYDBGRAW_ALLOC(runtime) \ + {&(runtime).allocators.debug.raw, _PyMem_DebugRawMalloc, _PyMem_DebugRawCalloc, _PyMem_DebugRawRealloc, _PyMem_DebugRawFree} +#define PYDBGMEM_ALLOC(runtime) \ + {&(runtime).allocators.debug.mem, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} +#define PYDBGOBJ_ALLOC(runtime) \ + {&(runtime).allocators.debug.obj, _PyMem_DebugMalloc, _PyMem_DebugCalloc, _PyMem_DebugRealloc, _PyMem_DebugFree} + +extern void * _PyMem_ArenaAlloc(void *, size_t); +extern void _PyMem_ArenaFree(void *, void *, size_t); + +#ifdef Py_DEBUG +# define _pymem_allocators_standard_INIT(runtime) \ + { \ + PYDBGRAW_ALLOC(runtime), \ + PYDBGMEM_ALLOC(runtime), \ + PYDBGOBJ_ALLOC(runtime), \ + } +# define _pymem_is_debug_enabled_INIT 1 +#else +# define _pymem_allocators_standard_INIT(runtime) \ + { \ + PYRAW_ALLOC, \ + PYMEM_ALLOC, \ + PYOBJ_ALLOC, \ + } +# define _pymem_is_debug_enabled_INIT 0 +#endif + +#define _pymem_allocators_debug_INIT \ + { \ + {'r', PYRAW_ALLOC}, \ + {'m', PYMEM_ALLOC}, \ + {'o', PYOBJ_ALLOC}, \ + } + +# define _pymem_allocators_obj_arena_INIT \ + { NULL, _PyMem_ArenaAlloc, _PyMem_ArenaFree } + + +#define _Py_mem_free_queue_INIT(queue) \ + { \ + .head = LLIST_INIT(queue.head), \ + } + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_PYMEM_INIT_H diff --git a/Include/internal/pycore_pystate.h b/Include/internal/pycore_pystate.h new file mode 100644 index 0000000000000000000000000000000000000000..b0e72523f58ed8ec5aadc6c7844b652aa30d727c --- /dev/null +++ b/Include/internal/pycore_pystate.h @@ -0,0 +1,299 @@ +#ifndef Py_INTERNAL_PYSTATE_H +#define Py_INTERNAL_PYSTATE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_freelist.h" // _PyFreeListState +#include "pycore_runtime.h" // _PyRuntime +#include "pycore_tstate.h" // _PyThreadStateImpl + + +// Values for PyThreadState.state. A thread must be in the "attached" state +// before calling most Python APIs. If the GIL is enabled, then "attached" +// implies that the thread holds the GIL and "detached" implies that the +// thread does not hold the GIL (or is in the process of releasing it). In +// `--disable-gil` builds, multiple threads may be "attached" to the same +// interpreter at the same time. Only the "bound" thread may perform the +// transitions between "attached" and "detached" on its own PyThreadState. +// +// The "suspended" state is used to implement stop-the-world pauses, such as +// for cyclic garbage collection. It is only used in `--disable-gil` builds. +// The "suspended" state is similar to the "detached" state in that in both +// states the thread is not allowed to call most Python APIs. However, unlike +// the "detached" state, a thread may not transition itself out from the +// "suspended" state. Only the thread performing a stop-the-world pause may +// transition a thread from the "suspended" state back to the "detached" state. +// +// State transition diagram: +// +// (bound thread) (stop-the-world thread) +// [attached] <-> [detached] <-> [suspended] +// | ^ +// +---------------------------->---------------------------+ +// (bound thread) +// +// The (bound thread) and (stop-the-world thread) labels indicate which thread +// is allowed to perform the transition. +#define _Py_THREAD_DETACHED 0 +#define _Py_THREAD_ATTACHED 1 +#define _Py_THREAD_SUSPENDED 2 + + +/* Check if the current thread is the main thread. + Use _Py_IsMainInterpreter() to check if it's the main interpreter. */ +static inline int +_Py_IsMainThread(void) +{ + unsigned long thread = PyThread_get_thread_ident(); + return (thread == _PyRuntime.main_thread); +} + + +static inline PyInterpreterState * +_PyInterpreterState_Main(void) +{ + return _PyRuntime.interpreters.main; +} + +static inline int +_Py_IsMainInterpreter(PyInterpreterState *interp) +{ + return (interp == _PyInterpreterState_Main()); +} + +static inline int +_Py_IsMainInterpreterFinalizing(PyInterpreterState *interp) +{ + /* bpo-39877: Access _PyRuntime directly rather than using + tstate->interp->runtime to support calls from Python daemon threads. + After Py_Finalize() has been called, tstate can be a dangling pointer: + point to PyThreadState freed memory. */ + return (_PyRuntimeState_GetFinalizing(&_PyRuntime) != NULL && + interp == &_PyRuntime._main_interpreter); +} + +// Export for _interpreters module. +PyAPI_FUNC(PyObject *) _PyInterpreterState_GetIDObject(PyInterpreterState *); + +// Export for _interpreters module. +PyAPI_FUNC(int) _PyInterpreterState_SetRunningMain(PyInterpreterState *); +PyAPI_FUNC(void) _PyInterpreterState_SetNotRunningMain(PyInterpreterState *); +PyAPI_FUNC(int) _PyInterpreterState_IsRunningMain(PyInterpreterState *); +PyAPI_FUNC(int) _PyInterpreterState_FailIfRunningMain(PyInterpreterState *); + +extern int _PyThreadState_IsRunningMain(PyThreadState *); +extern void _PyInterpreterState_ReinitRunningMain(PyThreadState *); + + +static inline const PyConfig * +_Py_GetMainConfig(void) +{ + PyInterpreterState *interp = _PyInterpreterState_Main(); + if (interp == NULL) { + return NULL; + } + return _PyInterpreterState_GetConfig(interp); +} + + +/* Only handle signals on the main thread of the main interpreter. */ +static inline int +_Py_ThreadCanHandleSignals(PyInterpreterState *interp) +{ + return (_Py_IsMainThread() && _Py_IsMainInterpreter(interp)); +} + + +/* Variable and static inline functions for in-line access to current thread + and interpreter state */ + +#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) +extern _Py_thread_local PyThreadState *_Py_tss_tstate; +#endif + +#ifndef NDEBUG +extern int _PyThreadState_CheckConsistency(PyThreadState *tstate); +#endif + +int _PyThreadState_MustExit(PyThreadState *tstate); + +// Export for most shared extensions, used via _PyThreadState_GET() static +// inline function. +PyAPI_FUNC(PyThreadState *) _PyThreadState_GetCurrent(void); + +/* Get the current Python thread state. + + This function is unsafe: it does not check for error and it can return NULL. + + The caller must hold the GIL. + + See also PyThreadState_Get() and PyThreadState_GetUnchecked(). */ +static inline PyThreadState* +_PyThreadState_GET(void) +{ +#if defined(HAVE_THREAD_LOCAL) && !defined(Py_BUILD_CORE_MODULE) + return _Py_tss_tstate; +#else + return _PyThreadState_GetCurrent(); +#endif +} + +// Attaches the current thread to the interpreter. +// +// This may block while acquiring the GIL (if the GIL is enabled) or while +// waiting for a stop-the-world pause (if the GIL is disabled). +// +// High-level code should generally call PyEval_RestoreThread() instead, which +// calls this function. +extern void _PyThreadState_Attach(PyThreadState *tstate); + +// Detaches the current thread from the interpreter. +// +// High-level code should generally call PyEval_SaveThread() instead, which +// calls this function. +extern void _PyThreadState_Detach(PyThreadState *tstate); + +// Detaches the current thread to the "suspended" state if a stop-the-world +// pause is in progress. +// +// If there is no stop-the-world pause in progress, then the thread switches +// to the "detached" state. +extern void _PyThreadState_Suspend(PyThreadState *tstate); + +// Perform a stop-the-world pause for all threads in the all interpreters. +// +// Threads in the "attached" state are paused and transitioned to the "GC" +// state. Threads in the "detached" state switch to the "GC" state, preventing +// them from reattaching until the stop-the-world pause is complete. +// +// NOTE: This is a no-op outside of Py_GIL_DISABLED builds. +extern void _PyEval_StopTheWorldAll(_PyRuntimeState *runtime); +extern void _PyEval_StartTheWorldAll(_PyRuntimeState *runtime); + +// Perform a stop-the-world pause for threads in the specified interpreter. +// +// NOTE: This is a no-op outside of Py_GIL_DISABLED builds. +extern void _PyEval_StopTheWorld(PyInterpreterState *interp); +extern void _PyEval_StartTheWorld(PyInterpreterState *interp); + + +static inline void +_Py_EnsureFuncTstateNotNULL(const char *func, PyThreadState *tstate) +{ + if (tstate == NULL) { + _Py_FatalErrorFunc(func, + "the function must be called with the GIL held, " + "after Python initialization and before Python finalization, " + "but the GIL is released (the current Python thread state is NULL)"); + } +} + +// Call Py_FatalError() if tstate is NULL +#define _Py_EnsureTstateNotNULL(tstate) \ + _Py_EnsureFuncTstateNotNULL(__func__, (tstate)) + + +/* Get the current interpreter state. + + The function is unsafe: it does not check for error and it can return NULL. + + The caller must hold the GIL. + + See also PyInterpreterState_Get() + and _PyGILState_GetInterpreterStateUnsafe(). */ +static inline PyInterpreterState* _PyInterpreterState_GET(void) { + PyThreadState *tstate = _PyThreadState_GET(); +#ifdef Py_DEBUG + _Py_EnsureTstateNotNULL(tstate); +#endif + return tstate->interp; +} + + +// PyThreadState functions + +// Export for _testinternalcapi +PyAPI_FUNC(PyThreadState *) _PyThreadState_New( + PyInterpreterState *interp, + int whence); +extern void _PyThreadState_Bind(PyThreadState *tstate); +PyAPI_FUNC(PyThreadState *) _PyThreadState_NewBound( + PyInterpreterState *interp, + int whence); +extern PyThreadState * _PyThreadState_RemoveExcept(PyThreadState *tstate); +extern void _PyThreadState_DeleteList(PyThreadState *list); +extern void _PyThreadState_ClearMimallocHeaps(PyThreadState *tstate); + +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyThreadState_GetDict(PyThreadState *tstate); + +/* The implementation of sys._current_exceptions() Returns a dict mapping + thread id to that thread's current exception. +*/ +extern PyObject* _PyThread_CurrentExceptions(void); + + +/* Other */ + +extern PyThreadState * _PyThreadState_Swap( + _PyRuntimeState *runtime, + PyThreadState *newts); + +extern PyStatus _PyInterpreterState_Enable(_PyRuntimeState *runtime); + +#ifdef HAVE_FORK +extern PyStatus _PyInterpreterState_DeleteExceptMain(_PyRuntimeState *runtime); +extern void _PySignal_AfterFork(void); +#endif + +// Export for the stable ABI +PyAPI_FUNC(int) _PyState_AddModule( + PyThreadState *tstate, + PyObject* module, + PyModuleDef* def); + + +extern int _PyOS_InterruptOccurred(PyThreadState *tstate); + +#define HEAD_LOCK(runtime) \ + PyMutex_LockFlags(&(runtime)->interpreters.mutex, _Py_LOCK_DONT_DETACH) +#define HEAD_UNLOCK(runtime) \ + PyMutex_Unlock(&(runtime)->interpreters.mutex) + +// Get the configuration of the current interpreter. +// The caller must hold the GIL. +// Export for test_peg_generator. +PyAPI_FUNC(const PyConfig*) _Py_GetConfig(void); + +// Get the single PyInterpreterState used by this process' GILState +// implementation. +// +// This function doesn't check for error. Return NULL before _PyGILState_Init() +// is called and after _PyGILState_Fini() is called. +// +// See also PyInterpreterState_Get() and _PyInterpreterState_GET(). +extern PyInterpreterState* _PyGILState_GetInterpreterStateUnsafe(void); + +static inline struct _Py_object_freelists* _Py_object_freelists_GET(void) +{ + PyThreadState *tstate = _PyThreadState_GET(); +#ifdef Py_DEBUG + _Py_EnsureTstateNotNULL(tstate); +#endif + +#ifdef Py_GIL_DISABLED + return &((_PyThreadStateImpl*)tstate)->freelists; +#else + return &tstate->interp->object_state.freelists; +#endif +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYSTATE_H */ diff --git a/Include/internal/pycore_pystats.h b/Include/internal/pycore_pystats.h new file mode 100644 index 0000000000000000000000000000000000000000..f8af398a56058619c35842f2a4f5c6f9a9dada97 --- /dev/null +++ b/Include/internal/pycore_pystats.h @@ -0,0 +1,21 @@ +#ifndef Py_INTERNAL_PYSTATS_H +#define Py_INTERNAL_PYSTATS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#ifdef Py_STATS +extern void _Py_StatsOn(void); +extern void _Py_StatsOff(void); +extern void _Py_StatsClear(void); +extern int _Py_PrintSpecializationStats(int to_file); +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_PYSTATS_H diff --git a/Include/internal/pycore_pythonrun.h b/Include/internal/pycore_pythonrun.h new file mode 100644 index 0000000000000000000000000000000000000000..0bfc5704dc4c5948f696a962425c676fc866010e --- /dev/null +++ b/Include/internal/pycore_pythonrun.h @@ -0,0 +1,39 @@ +#ifndef Py_INTERNAL_PYTHONRUN_H +#define Py_INTERNAL_PYTHONRUN_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern int _PyRun_SimpleFileObject( + FILE *fp, + PyObject *filename, + int closeit, + PyCompilerFlags *flags); + +extern int _PyRun_AnyFileObject( + FILE *fp, + PyObject *filename, + int closeit, + PyCompilerFlags *flags); + +extern int _PyRun_InteractiveLoopObject( + FILE *fp, + PyObject *filename, + PyCompilerFlags *flags); + +extern const char* _Py_SourceAsString( + PyObject *cmd, + const char *funcname, + const char *what, + PyCompilerFlags *cf, + PyObject **cmd_copy); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_PYTHONRUN_H + diff --git a/Include/internal/pycore_pythread.h b/Include/internal/pycore_pythread.h new file mode 100644 index 0000000000000000000000000000000000000000..3610c6254db6af36e8dc1f7e485f7b8ad595e35d --- /dev/null +++ b/Include/internal/pycore_pythread.h @@ -0,0 +1,159 @@ +#ifndef Py_INTERNAL_PYTHREAD_H +#define Py_INTERNAL_PYTHREAD_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "dynamic_annotations.h" // _Py_ANNOTATE_PURE_HAPPENS_BEFORE_MUTEX +#include "pycore_llist.h" // struct llist_node + +// Get _POSIX_THREADS and _POSIX_SEMAPHORES macros if available +#if (defined(HAVE_UNISTD_H) && !defined(_POSIX_THREADS) \ + && !defined(_POSIX_SEMAPHORES)) +# include // _POSIX_THREADS, _POSIX_SEMAPHORES +#endif +#if (defined(HAVE_PTHREAD_H) && !defined(_POSIX_THREADS) \ + && !defined(_POSIX_SEMAPHORES)) + // This means pthreads are not implemented in libc headers, hence the macro + // not present in . But they still can be implemented as an + // external library (e.g. gnu pth in pthread emulation) +# include // _POSIX_THREADS, _POSIX_SEMAPHORES +#endif +#if !defined(_POSIX_THREADS) && defined(__hpux) && defined(_SC_THREADS) + // Check if we're running on HP-UX and _SC_THREADS is defined. If so, then + // enough of the POSIX threads package is implemented to support Python + // threads. + // + // This is valid for HP-UX 11.23 running on an ia64 system. If needed, add + // a check of __ia64 to verify that we're running on an ia64 system instead + // of a pa-risc system. +# define _POSIX_THREADS +#endif + + +#if defined(_POSIX_THREADS) || defined(HAVE_PTHREAD_STUBS) +# define _USE_PTHREADS +#endif + +#if defined(_USE_PTHREADS) && defined(HAVE_PTHREAD_CONDATTR_SETCLOCK) && defined(HAVE_CLOCK_GETTIME) && defined(CLOCK_MONOTONIC) +// monotonic is supported statically. It doesn't mean it works on runtime. +# define CONDATTR_MONOTONIC +#endif + + +#if defined(HAVE_PTHREAD_STUBS) +#include "cpython/pthread_stubs.h" // PTHREAD_KEYS_MAX +#include // bool + +// pthread_key +struct py_stub_tls_entry { + bool in_use; + void *value; +}; +#endif + +struct _pythread_runtime_state { + int initialized; + +#ifdef _USE_PTHREADS + // This matches when thread_pthread.h is used. + struct { + /* NULL when pthread_condattr_setclock(CLOCK_MONOTONIC) is not supported. */ + pthread_condattr_t *ptr; +# ifdef CONDATTR_MONOTONIC + /* The value to which condattr_monotonic is set. */ + pthread_condattr_t val; +# endif + } _condattr_monotonic; + +#endif // USE_PTHREADS + +#if defined(HAVE_PTHREAD_STUBS) + struct { + struct py_stub_tls_entry tls_entries[PTHREAD_KEYS_MAX]; + } stubs; +#endif + + // Linked list of ThreadHandles + struct llist_node handles; +}; + +#define _pythread_RUNTIME_INIT(pythread) \ + { \ + .handles = LLIST_INIT(pythread.handles), \ + } + +#ifdef HAVE_FORK +/* Private function to reinitialize a lock at fork in the child process. + Reset the lock to the unlocked state. + Return 0 on success, return -1 on error. */ +extern int _PyThread_at_fork_reinit(PyThread_type_lock *lock); +extern void _PyThread_AfterFork(struct _pythread_runtime_state *state); +#endif /* HAVE_FORK */ + + +// unset: -1 seconds, in nanoseconds +#define PyThread_UNSET_TIMEOUT ((PyTime_t)(-1 * 1000 * 1000 * 1000)) + +// Exported for the _interpchannels module. +PyAPI_FUNC(int) PyThread_ParseTimeoutArg( + PyObject *arg, + int blocking, + PY_TIMEOUT_T *timeout); + +/* Helper to acquire an interruptible lock with a timeout. If the lock acquire + * is interrupted, signal handlers are run, and if they raise an exception, + * PY_LOCK_INTR is returned. Otherwise, PY_LOCK_ACQUIRED or PY_LOCK_FAILURE + * are returned, depending on whether the lock can be acquired within the + * timeout. + */ +// Exported for the _interpchannels module. +PyAPI_FUNC(PyLockStatus) PyThread_acquire_lock_timed_with_retries( + PyThread_type_lock, + PY_TIMEOUT_T microseconds); + +typedef unsigned long long PyThread_ident_t; +typedef Py_uintptr_t PyThread_handle_t; + +#define PY_FORMAT_THREAD_IDENT_T "llu" +#define Py_PARSE_THREAD_IDENT_T "K" + +PyAPI_FUNC(PyThread_ident_t) PyThread_get_thread_ident_ex(void); + +/* Thread joining APIs. + * + * These APIs have a strict contract: + * - Either PyThread_join_thread or PyThread_detach_thread must be called + * exactly once with the given handle. + * - Calling neither PyThread_join_thread nor PyThread_detach_thread results + * in a resource leak until the end of the process. + * - Any other usage, such as calling both PyThread_join_thread and + * PyThread_detach_thread, or calling them more than once (including + * simultaneously), results in undefined behavior. + */ +PyAPI_FUNC(int) PyThread_start_joinable_thread(void (*func)(void *), + void *arg, + PyThread_ident_t* ident, + PyThread_handle_t* handle); +/* + * Join a thread started with `PyThread_start_joinable_thread`. + * This function cannot be interrupted. It returns 0 on success, + * a non-zero value on failure. + */ +PyAPI_FUNC(int) PyThread_join_thread(PyThread_handle_t); +/* + * Detach a thread started with `PyThread_start_joinable_thread`, such + * that its resources are relased as soon as it exits. + * This function cannot be interrupted. It returns 0 on success, + * a non-zero value on failure. + */ +PyAPI_FUNC(int) PyThread_detach_thread(PyThread_handle_t); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_PYTHREAD_H */ diff --git a/Include/internal/pycore_qsbr.h b/Include/internal/pycore_qsbr.h new file mode 100644 index 0000000000000000000000000000000000000000..84e9d98dd21bda1460b0ecfd68a71c630f865ba8 --- /dev/null +++ b/Include/internal/pycore_qsbr.h @@ -0,0 +1,173 @@ +// The QSBR APIs (quiescent state-based reclamation) provide a mechanism for +// the free-threaded build to safely reclaim memory when there may be +// concurrent accesses. +// +// Many operations in the free-threaded build are protected by locks. However, +// in some cases, we want to allow reads to happen concurrently with updates. +// In this case, we need to delay freeing ("reclaiming") any memory that may be +// concurrently accessed by a reader. The QSBR APIs provide a way to do this. +#ifndef Py_INTERNAL_QSBR_H +#define Py_INTERNAL_QSBR_H + +#include +#include +#include "pycore_lock.h" // PyMutex + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// The shared write sequence is always odd and incremented by two. Detached +// threads are indicated by a read sequence of zero. This avoids collisions +// between the offline state and any valid sequence number even if the +// sequences numbers wrap around. +#define QSBR_OFFLINE 0 +#define QSBR_INITIAL 1 +#define QSBR_INCR 2 + +// Wrap-around safe comparison. This is a holdover from the FreeBSD +// implementation, which uses 32-bit sequence numbers. We currently use 64-bit +// sequence numbers, so wrap-around is unlikely. +#define QSBR_LT(a, b) ((int64_t)((a)-(b)) < 0) +#define QSBR_LEQ(a, b) ((int64_t)((a)-(b)) <= 0) + +struct _qsbr_shared; +struct _PyThreadStateImpl; // forward declare to avoid circular dependency + +// Per-thread state +struct _qsbr_thread_state { + // Last observed write sequence (or 0 if detached) + uint64_t seq; + + // Shared (per-interpreter) QSBR state + struct _qsbr_shared *shared; + + // Thread state (or NULL) + PyThreadState *tstate; + + // Number of held items added by this thread since the last write sequence + // advance + int deferred_count; + + // Estimate for the amount of memory that is held by this thread since + // the last write sequence advance + size_t deferred_memory; + + // Amount of memory in mimalloc pages deferred from collection. When + // deferred, they are prevented from being used for a different size class + // and in a different thread. + size_t deferred_page_memory; + + // True if the deferred memory frees should be processed. + bool should_process; + + // Is this thread state allocated? + bool allocated; + struct _qsbr_thread_state *freelist_next; +}; + +// Padding to avoid false sharing +struct _qsbr_pad { + struct _qsbr_thread_state qsbr; + char __padding[64 - sizeof(struct _qsbr_thread_state)]; +}; + +// Per-interpreter state +struct _qsbr_shared { + // Write sequence: always odd, incremented by two + uint64_t wr_seq; + + // Minimum observed read sequence of all QSBR thread states + uint64_t rd_seq; + + // Array of QSBR thread states. + struct _qsbr_pad *array; + Py_ssize_t size; + + // Freelist of unused _qsbr_thread_states (protected by mutex) + PyMutex mutex; + struct _qsbr_thread_state *freelist; +}; + +static inline uint64_t +_Py_qsbr_shared_current(struct _qsbr_shared *shared) +{ + return _Py_atomic_load_uint64_acquire(&shared->wr_seq); +} + +// Reports a quiescent state: the caller no longer holds any pointer to shared +// data not protected by locks or reference counts. +static inline void +_Py_qsbr_quiescent_state(struct _qsbr_thread_state *qsbr) +{ + uint64_t seq = _Py_qsbr_shared_current(qsbr->shared); + _Py_atomic_store_uint64_release(&qsbr->seq, seq); +} + +// Have the read sequences advanced to the given goal? Like `_Py_qsbr_poll()`, +// but does not perform a scan of threads. +static inline bool +_Py_qbsr_goal_reached(struct _qsbr_thread_state *qsbr, uint64_t goal) +{ + uint64_t rd_seq = _Py_atomic_load_uint64(&qsbr->shared->rd_seq); + return QSBR_LEQ(goal, rd_seq); +} + +// Advance the write sequence and return the new goal. This should be called +// after data is removed. The returned goal is used with `_Py_qsbr_poll()` to +// determine when it is safe to reclaim (free) the memory. +extern uint64_t +_Py_qsbr_advance(struct _qsbr_shared *shared); + +// Return the next value for the write sequence (current plus the increment). +extern uint64_t +_Py_qsbr_shared_next(struct _qsbr_shared *shared); + +// Return true if deferred memory frees held by QSBR should be processed to +// determine if they can be safely freed. +static inline bool +_Py_qsbr_should_process(struct _qsbr_thread_state *qsbr) +{ + return qsbr->should_process; +} + +// Have the read sequences advanced to the given goal? If this returns true, +// it safe to reclaim any memory tagged with the goal (or earlier goal). +extern bool +_Py_qsbr_poll(struct _qsbr_thread_state *qsbr, uint64_t goal); + +// Called when thread attaches to interpreter +extern void +_Py_qsbr_attach(struct _qsbr_thread_state *qsbr); + +// Called when thread detaches from interpreter +extern void +_Py_qsbr_detach(struct _qsbr_thread_state *qsbr); + +// Reserves (allocates) a QSBR state and returns its index. +extern Py_ssize_t +_Py_qsbr_reserve(PyInterpreterState *interp); + +// Associates a PyThreadState with the QSBR state at the given index +extern void +_Py_qsbr_register(struct _PyThreadStateImpl *tstate, + PyInterpreterState *interp, Py_ssize_t index); + +// Disassociates a PyThreadState from the QSBR state and frees the QSBR state. +extern void +_Py_qsbr_unregister(PyThreadState *tstate); + +extern void +_Py_qsbr_fini(PyInterpreterState *interp); + +extern void +_Py_qsbr_after_fork(struct _PyThreadStateImpl *tstate); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_QSBR_H */ diff --git a/Include/internal/pycore_range.h b/Include/internal/pycore_range.h new file mode 100644 index 0000000000000000000000000000000000000000..bf045ec4fd8332de7d13c72558099f5db8bee086 --- /dev/null +++ b/Include/internal/pycore_range.h @@ -0,0 +1,21 @@ +#ifndef Py_INTERNAL_RANGE_H +#define Py_INTERNAL_RANGE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +typedef struct { + PyObject_HEAD + long start; + long step; + long len; +} _PyRangeIterObject; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_RANGE_H */ diff --git a/Include/internal/pycore_runtime.h b/Include/internal/pycore_runtime.h new file mode 100644 index 0000000000000000000000000000000000000000..ed028944d18e046317cfa3a62239c9e2e4c32c59 --- /dev/null +++ b/Include/internal/pycore_runtime.h @@ -0,0 +1,410 @@ +#ifndef Py_INTERNAL_RUNTIME_H +#define Py_INTERNAL_RUNTIME_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_atexit.h" // struct _atexit_runtime_state +#include "pycore_ceval_state.h" // struct _ceval_runtime_state +#include "pycore_crossinterp.h" // struct _xidregistry +#include "pycore_faulthandler.h" // struct _faulthandler_runtime_state +#include "pycore_floatobject.h" // struct _Py_float_runtime_state +#include "pycore_import.h" // struct _import_runtime_state +#include "pycore_interp.h" // PyInterpreterState +#include "pycore_object_state.h" // struct _py_object_runtime_state +#include "pycore_parser.h" // struct _parser_runtime_state +#include "pycore_pyhash.h" // struct pyhash_runtime_state +#include "pycore_pymem.h" // struct _pymem_allocators +#include "pycore_pythread.h" // struct _pythread_runtime_state +#include "pycore_signal.h" // struct _signals_runtime_state +#include "pycore_tracemalloc.h" // struct _tracemalloc_runtime_state +#include "pycore_typeobject.h" // struct _types_runtime_state +#include "pycore_unicodeobject.h" // struct _Py_unicode_runtime_state + +struct _getargs_runtime_state { + struct _PyArg_Parser *static_parsers; +}; + +/* GIL state */ + +struct _gilstate_runtime_state { + /* bpo-26558: Flag to disable PyGILState_Check(). + If set to non-zero, PyGILState_Check() always return 1. */ + int check_enabled; + /* The single PyInterpreterState used by this process' + GILState implementation + */ + /* TODO: Given interp_main, it may be possible to kill this ref */ + PyInterpreterState *autoInterpreterState; +}; + +/* Runtime audit hook state */ + +#define _Py_Debug_Cookie "xdebugpy" + +#ifdef Py_GIL_DISABLED +# define _Py_Debug_gilruntimestate_enabled offsetof(struct _gil_runtime_state, enabled) +# define _Py_Debug_Free_Threaded 1 +#else +# define _Py_Debug_gilruntimestate_enabled 0 +# define _Py_Debug_Free_Threaded 0 +#endif +typedef struct _Py_AuditHookEntry { + struct _Py_AuditHookEntry *next; + Py_AuditHookFunction hookCFunction; + void *userData; +} _Py_AuditHookEntry; + +typedef struct _Py_DebugOffsets { + char cookie[8] _Py_NONSTRING; + uint64_t version; + uint64_t free_threaded; + // Runtime state offset; + struct _runtime_state { + uint64_t size; + uint64_t finalizing; + uint64_t interpreters_head; + } runtime_state; + + // Interpreter state offset; + struct _interpreter_state { + uint64_t size; + uint64_t id; + uint64_t next; + uint64_t threads_head; + uint64_t gc; + uint64_t imports_modules; + uint64_t sysdict; + uint64_t builtins; + uint64_t ceval_gil; + uint64_t gil_runtime_state; + uint64_t gil_runtime_state_enabled; + uint64_t gil_runtime_state_locked; + uint64_t gil_runtime_state_holder; + } interpreter_state; + + // Thread state offset; + struct _thread_state{ + uint64_t size; + uint64_t prev; + uint64_t next; + uint64_t interp; + uint64_t current_frame; + uint64_t thread_id; + uint64_t native_thread_id; + uint64_t datastack_chunk; + uint64_t status; + } thread_state; + + // InterpreterFrame offset; + struct _interpreter_frame { + uint64_t size; + uint64_t previous; + uint64_t executable; + uint64_t instr_ptr; + uint64_t localsplus; + uint64_t owner; + } interpreter_frame; + + // Code object offset; + struct _code_object { + uint64_t size; + uint64_t filename; + uint64_t name; + uint64_t qualname; + uint64_t linetable; + uint64_t firstlineno; + uint64_t argcount; + uint64_t localsplusnames; + uint64_t localspluskinds; + uint64_t co_code_adaptive; + } code_object; + + // PyObject offset; + struct _pyobject { + uint64_t size; + uint64_t ob_type; + } pyobject; + + // PyTypeObject object offset; + struct _type_object { + uint64_t size; + uint64_t tp_name; + uint64_t tp_repr; + uint64_t tp_flags; + } type_object; + + // PyTuple object offset; + struct _tuple_object { + uint64_t size; + uint64_t ob_item; + uint64_t ob_size; + } tuple_object; + + // PyList object offset; + struct _list_object { + uint64_t size; + uint64_t ob_item; + uint64_t ob_size; + } list_object; + + // PyDict object offset; + struct _dict_object { + uint64_t size; + uint64_t ma_keys; + uint64_t ma_values; + } dict_object; + + // PyFloat object offset; + struct _float_object { + uint64_t size; + uint64_t ob_fval; + } float_object; + + // PyLong object offset; + struct _long_object { + uint64_t size; + uint64_t lv_tag; + uint64_t ob_digit; + } long_object; + + // PyBytes object offset; + struct _bytes_object { + uint64_t size; + uint64_t ob_size; + uint64_t ob_sval; + } bytes_object; + + // Unicode object offset; + struct _unicode_object { + uint64_t size; + uint64_t state; + uint64_t length; + uint64_t asciiobject_size; + } unicode_object; + + // GC runtime state offset; + struct _gc { + uint64_t size; + uint64_t collecting; + } gc; +} _Py_DebugOffsets; + +/* Reference tracer state */ +struct _reftracer_runtime_state { + PyRefTracer tracer_func; + void* tracer_data; +}; + +/* Full Python runtime state */ + +/* _PyRuntimeState holds the global state for the CPython runtime. + That data is exposed in the internal API as a static variable (_PyRuntime). + */ +typedef struct pyruntimestate { + /* This field must be first to facilitate locating it by out of process + * debuggers. Out of process debuggers will use the offsets contained in this + * field to be able to locate other fields in several interpreter structures + * in a way that doesn't require them to know the exact layout of those + * structures. + * + * IMPORTANT: + * This struct is **NOT** backwards compatible between minor version of the + * interpreter and the members, order of members and size can change between + * minor versions. This struct is only guaranteed to be stable between patch + * versions for a given minor version of the interpreter. + */ + _Py_DebugOffsets debug_offsets; + + /* Has been initialized to a safe state. + + In order to be effective, this must be set to 0 during or right + after allocation. */ + int _initialized; + + /* Is running Py_PreInitialize()? */ + int preinitializing; + + /* Is Python preinitialized? Set to 1 by Py_PreInitialize() */ + int preinitialized; + + /* Is Python core initialized? Set to 1 by _Py_InitializeCore() */ + int core_initialized; + + /* Is Python fully initialized? Set to 1 by Py_Initialize() */ + int initialized; + + /* Set by Py_FinalizeEx(). Only reset to NULL if Py_Initialize() + is called again. + + Use _PyRuntimeState_GetFinalizing() and _PyRuntimeState_SetFinalizing() + to access it, don't access it directly. */ + PyThreadState *_finalizing; + /* The ID of the OS thread in which we are finalizing. */ + unsigned long _finalizing_id; + + struct pyinterpreters { + PyMutex mutex; + /* The linked list of interpreters, newest first. */ + PyInterpreterState *head; + /* The runtime's initial interpreter, which has a special role + in the operation of the runtime. It is also often the only + interpreter. */ + PyInterpreterState *main; + /* next_id is an auto-numbered sequence of small + integers. It gets initialized in _PyInterpreterState_Enable(), + which is called in Py_Initialize(), and used in + PyInterpreterState_New(). A negative interpreter ID + indicates an error occurred. The main interpreter will + always have an ID of 0. Overflow results in a RuntimeError. + If that becomes a problem later then we can adjust, e.g. by + using a Python int. */ + int64_t next_id; + } interpreters; + + /* Platform-specific identifier and PyThreadState, respectively, for the + main thread in the main interpreter. */ + unsigned long main_thread; + PyThreadState *main_tstate; + + /* ---------- IMPORTANT --------------------------- + The fields above this line are declared as early as + possible to facilitate out-of-process observability + tools. */ + + /* cross-interpreter data and utils */ + struct _xi_runtime_state xi; + + struct _pymem_allocators allocators; + struct _obmalloc_global_state obmalloc; + struct pyhash_runtime_state pyhash_state; + struct _pythread_runtime_state threads; + struct _signals_runtime_state signals; + + /* Used for the thread state bound to the current thread. */ + Py_tss_t autoTSSkey; + + /* Used instead of PyThreadState.trash when there is not current tstate. */ + Py_tss_t trashTSSkey; + + PyWideStringList orig_argv; + + struct _parser_runtime_state parser; + + struct _atexit_runtime_state atexit; + + struct _import_runtime_state imports; + struct _ceval_runtime_state ceval; + struct _gilstate_runtime_state gilstate; + struct _getargs_runtime_state getargs; + struct _fileutils_state fileutils; + struct _faulthandler_runtime_state faulthandler; + struct _tracemalloc_runtime_state tracemalloc; + struct _reftracer_runtime_state ref_tracer; + + // The rwmutex is used to prevent overlapping global and per-interpreter + // stop-the-world events. Global stop-the-world events lock the mutex + // exclusively (as a "writer"), while per-interpreter stop-the-world events + // lock it non-exclusively (as "readers"). + _PyRWMutex stoptheworld_mutex; + struct _stoptheworld_state stoptheworld; + + PyPreConfig preconfig; + + // Audit values must be preserved when Py_Initialize()/Py_Finalize() + // is called multiple times. + Py_OpenCodeHookFunction open_code_hook; + void *open_code_userdata; + struct { + PyMutex mutex; + _Py_AuditHookEntry *head; + } audit_hooks; + + struct _py_object_runtime_state object_state; + struct _Py_float_runtime_state float_state; + struct _Py_unicode_runtime_state unicode_state; + struct _types_runtime_state types; + + /* All the objects that are shared by the runtime's interpreters. */ + struct _Py_cached_objects cached_objects; + struct _Py_static_objects static_objects; + + /* The following fields are here to avoid allocation during init. + The data is exposed through _PyRuntimeState pointer fields. + These fields should not be accessed directly outside of init. + + All other _PyRuntimeState pointer fields are populated when + needed and default to NULL. + + For now there are some exceptions to that rule, which require + allocation during init. These will be addressed on a case-by-case + basis. Most notably, we don't pre-allocated the several mutex + (PyThread_type_lock) fields, because on Windows we only ever get + a pointer type. + */ + + /* _PyRuntimeState.interpreters.main */ + PyInterpreterState _main_interpreter; + +#if defined(__EMSCRIPTEN__) && defined(PY_CALL_TRAMPOLINE) + // Used in "Python/emscripten_trampoline.c" to choose between type + // reflection trampoline and EM_JS trampoline. + bool wasm_type_reflection_available; +#endif + +} _PyRuntimeState; + + +/* other API */ + +// Export _PyRuntime for shared extensions which use it in static inline +// functions for best performance, like _Py_IsMainThread() or _Py_ID(). +// It's also made accessible for debuggers and profilers. +PyAPI_DATA(_PyRuntimeState) _PyRuntime; + +extern PyStatus _PyRuntimeState_Init(_PyRuntimeState *runtime); +extern void _PyRuntimeState_Fini(_PyRuntimeState *runtime); + +#ifdef HAVE_FORK +extern PyStatus _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime); +#endif + +/* Initialize _PyRuntimeState. + Return NULL on success, or return an error message on failure. */ +extern PyStatus _PyRuntime_Initialize(void); + +extern void _PyRuntime_Finalize(void); + + +static inline PyThreadState* +_PyRuntimeState_GetFinalizing(_PyRuntimeState *runtime) { + return (PyThreadState*)_Py_atomic_load_ptr_relaxed(&runtime->_finalizing); +} + +static inline unsigned long +_PyRuntimeState_GetFinalizingID(_PyRuntimeState *runtime) { + return _Py_atomic_load_ulong_relaxed(&runtime->_finalizing_id); +} + +static inline void +_PyRuntimeState_SetFinalizing(_PyRuntimeState *runtime, PyThreadState *tstate) { + _Py_atomic_store_ptr_relaxed(&runtime->_finalizing, tstate); + if (tstate == NULL) { + _Py_atomic_store_ulong_relaxed(&runtime->_finalizing_id, 0); + } + else { + // XXX Re-enable this assert once gh-109860 is fixed. + //assert(tstate->thread_id == PyThread_get_thread_ident()); + _Py_atomic_store_ulong_relaxed(&runtime->_finalizing_id, + tstate->thread_id); + } +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_RUNTIME_H */ diff --git a/Include/internal/pycore_runtime_init.h b/Include/internal/pycore_runtime_init.h new file mode 100644 index 0000000000000000000000000000000000000000..7eef9edc0aa33dd5d9c8eb67cff24907e4e98dc7 --- /dev/null +++ b/Include/internal/pycore_runtime_init.h @@ -0,0 +1,329 @@ +#ifndef Py_INTERNAL_RUNTIME_INIT_H +#define Py_INTERNAL_RUNTIME_INIT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_ceval_state.h" // _PyEval_RUNTIME_PERF_INIT +#include "pycore_faulthandler.h" // _faulthandler_runtime_state_INIT +#include "pycore_floatobject.h" // _py_float_format_unknown +#include "pycore_object.h" // _PyObject_HEAD_INIT +#include "pycore_obmalloc_init.h" // _obmalloc_global_state_INIT +#include "pycore_parser.h" // _parser_runtime_state_INIT +#include "pycore_pyhash.h" // pyhash_state_INIT +#include "pycore_pymem_init.h" // _pymem_allocators_standard_INIT +#include "pycore_pythread.h" // _pythread_RUNTIME_INIT +#include "pycore_qsbr.h" // QSBR_INITIAL +#include "pycore_runtime_init_generated.h" // _Py_bytes_characters_INIT +#include "pycore_signal.h" // _signals_RUNTIME_INIT +#include "pycore_tracemalloc.h" // _tracemalloc_runtime_state_INIT + + +extern PyTypeObject _PyExc_MemoryError; + + +/* The static initializers defined here should only be used + in the runtime init code (in pystate.c and pylifecycle.c). */ + +#define _PyRuntimeState_INIT(runtime, debug_cookie) \ + { \ + .debug_offsets = { \ + .cookie = debug_cookie, \ + .version = PY_VERSION_HEX, \ + .free_threaded = _Py_Debug_Free_Threaded, \ + .runtime_state = { \ + .size = sizeof(_PyRuntimeState), \ + .finalizing = offsetof(_PyRuntimeState, _finalizing), \ + .interpreters_head = offsetof(_PyRuntimeState, interpreters.head), \ + }, \ + .interpreter_state = { \ + .size = sizeof(PyInterpreterState), \ + .id = offsetof(PyInterpreterState, id), \ + .next = offsetof(PyInterpreterState, next), \ + .threads_head = offsetof(PyInterpreterState, threads.head), \ + .gc = offsetof(PyInterpreterState, gc), \ + .imports_modules = offsetof(PyInterpreterState, imports.modules), \ + .sysdict = offsetof(PyInterpreterState, sysdict), \ + .builtins = offsetof(PyInterpreterState, builtins), \ + .ceval_gil = offsetof(PyInterpreterState, ceval.gil), \ + .gil_runtime_state = offsetof(PyInterpreterState, _gil), \ + .gil_runtime_state_enabled = _Py_Debug_gilruntimestate_enabled, \ + .gil_runtime_state_locked = offsetof(PyInterpreterState, _gil.locked), \ + .gil_runtime_state_holder = offsetof(PyInterpreterState, _gil.last_holder), \ + }, \ + .thread_state = { \ + .size = sizeof(PyThreadState), \ + .prev = offsetof(PyThreadState, prev), \ + .next = offsetof(PyThreadState, next), \ + .interp = offsetof(PyThreadState, interp), \ + .current_frame = offsetof(PyThreadState, current_frame), \ + .thread_id = offsetof(PyThreadState, thread_id), \ + .native_thread_id = offsetof(PyThreadState, native_thread_id), \ + .datastack_chunk = offsetof(PyThreadState, datastack_chunk), \ + .status = offsetof(PyThreadState, _status), \ + }, \ + .interpreter_frame = { \ + .size = sizeof(_PyInterpreterFrame), \ + .previous = offsetof(_PyInterpreterFrame, previous), \ + .executable = offsetof(_PyInterpreterFrame, f_executable), \ + .instr_ptr = offsetof(_PyInterpreterFrame, instr_ptr), \ + .localsplus = offsetof(_PyInterpreterFrame, localsplus), \ + .owner = offsetof(_PyInterpreterFrame, owner), \ + }, \ + .code_object = { \ + .size = sizeof(PyCodeObject), \ + .filename = offsetof(PyCodeObject, co_filename), \ + .name = offsetof(PyCodeObject, co_name), \ + .qualname = offsetof(PyCodeObject, co_qualname), \ + .linetable = offsetof(PyCodeObject, co_linetable), \ + .firstlineno = offsetof(PyCodeObject, co_firstlineno), \ + .argcount = offsetof(PyCodeObject, co_argcount), \ + .localsplusnames = offsetof(PyCodeObject, co_localsplusnames), \ + .localspluskinds = offsetof(PyCodeObject, co_localspluskinds), \ + .co_code_adaptive = offsetof(PyCodeObject, co_code_adaptive), \ + }, \ + .pyobject = { \ + .size = sizeof(PyObject), \ + .ob_type = offsetof(PyObject, ob_type), \ + }, \ + .type_object = { \ + .size = sizeof(PyTypeObject), \ + .tp_name = offsetof(PyTypeObject, tp_name), \ + .tp_repr = offsetof(PyTypeObject, tp_repr), \ + .tp_flags = offsetof(PyTypeObject, tp_flags), \ + }, \ + .tuple_object = { \ + .size = sizeof(PyTupleObject), \ + .ob_item = offsetof(PyTupleObject, ob_item), \ + .ob_size = offsetof(PyTupleObject, ob_base.ob_size), \ + }, \ + .list_object = { \ + .size = sizeof(PyListObject), \ + .ob_item = offsetof(PyListObject, ob_item), \ + .ob_size = offsetof(PyListObject, ob_base.ob_size), \ + }, \ + .dict_object = { \ + .size = sizeof(PyDictObject), \ + .ma_keys = offsetof(PyDictObject, ma_keys), \ + .ma_values = offsetof(PyDictObject, ma_values), \ + }, \ + .float_object = { \ + .size = sizeof(PyFloatObject), \ + .ob_fval = offsetof(PyFloatObject, ob_fval), \ + }, \ + .long_object = { \ + .size = sizeof(PyLongObject), \ + .lv_tag = offsetof(PyLongObject, long_value.lv_tag), \ + .ob_digit = offsetof(PyLongObject, long_value.ob_digit), \ + }, \ + .bytes_object = { \ + .size = sizeof(PyBytesObject), \ + .ob_size = offsetof(PyBytesObject, ob_base.ob_size), \ + .ob_sval = offsetof(PyBytesObject, ob_sval), \ + }, \ + .unicode_object = { \ + .size = sizeof(PyUnicodeObject), \ + .state = offsetof(PyUnicodeObject, _base._base.state), \ + .length = offsetof(PyUnicodeObject, _base._base.length), \ + .asciiobject_size = sizeof(PyASCIIObject), \ + }, \ + .gc = { \ + .size = sizeof(struct _gc_runtime_state), \ + .collecting = offsetof(struct _gc_runtime_state, collecting), \ + }, \ + }, \ + .allocators = { \ + .standard = _pymem_allocators_standard_INIT(runtime), \ + .debug = _pymem_allocators_debug_INIT, \ + .obj_arena = _pymem_allocators_obj_arena_INIT, \ + .is_debug_enabled = _pymem_is_debug_enabled_INIT, \ + }, \ + .obmalloc = _obmalloc_global_state_INIT, \ + .pyhash_state = pyhash_state_INIT, \ + .threads = _pythread_RUNTIME_INIT(runtime.threads), \ + .signals = _signals_RUNTIME_INIT, \ + .interpreters = { \ + /* This prevents interpreters from getting created \ + until _PyInterpreterState_Enable() is called. */ \ + .next_id = -1, \ + }, \ + .xi = { \ + .registry = { \ + .global = 1, \ + }, \ + }, \ + /* A TSS key must be initialized with Py_tss_NEEDS_INIT \ + in accordance with the specification. */ \ + .autoTSSkey = Py_tss_NEEDS_INIT, \ + .parser = _parser_runtime_state_INIT, \ + .ceval = { \ + .pending_mainthread = { \ + .max = MAXPENDINGCALLS_MAIN, \ + .maxloop = MAXPENDINGCALLSLOOP_MAIN, \ + }, \ + .perf = _PyEval_RUNTIME_PERF_INIT, \ + }, \ + .gilstate = { \ + .check_enabled = 1, \ + }, \ + .fileutils = { \ + .force_ascii = -1, \ + }, \ + .faulthandler = _faulthandler_runtime_state_INIT, \ + .tracemalloc = _tracemalloc_runtime_state_INIT, \ + .ref_tracer = { \ + .tracer_func = NULL, \ + .tracer_data = NULL, \ + }, \ + .stoptheworld = { \ + .is_global = 1, \ + }, \ + .float_state = { \ + .float_format = _py_float_format_unknown, \ + .double_format = _py_float_format_unknown, \ + }, \ + .types = { \ + .next_version_tag = 1, \ + }, \ + .static_objects = { \ + .singletons = { \ + .small_ints = _Py_small_ints_INIT, \ + .bytes_empty = _PyBytes_SIMPLE_INIT(0, 0), \ + .bytes_characters = _Py_bytes_characters_INIT, \ + .strings = { \ + .literals = _Py_str_literals_INIT, \ + .identifiers = _Py_str_identifiers_INIT, \ + .ascii = _Py_str_ascii_INIT, \ + .latin1 = _Py_str_latin1_INIT, \ + }, \ + .tuple_empty = { \ + .ob_base = _PyVarObject_HEAD_INIT(&PyTuple_Type, 0), \ + }, \ + .hamt_bitmap_node_empty = { \ + .ob_base = _PyVarObject_HEAD_INIT(&_PyHamt_BitmapNode_Type, 0), \ + }, \ + .context_token_missing = { \ + .ob_base = _PyObject_HEAD_INIT(&_PyContextTokenMissing_Type), \ + }, \ + }, \ + }, \ + ._main_interpreter = _PyInterpreterState_INIT(runtime._main_interpreter), \ + } + +#define _PyInterpreterState_INIT(INTERP) \ + { \ + .id_refcount = -1, \ + ._whence = _PyInterpreterState_WHENCE_NOTSET, \ + .imports = IMPORTS_INIT, \ + .ceval = { \ + .recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ + .pending = { \ + .max = MAXPENDINGCALLS, \ + .maxloop = MAXPENDINGCALLSLOOP, \ + }, \ + }, \ + .gc = { \ + .enabled = 1, \ + .generations = { \ + /* .head is set in _PyGC_InitState(). */ \ + { .threshold = 2000, }, \ + { .threshold = 10, }, \ + { .threshold = 10, }, \ + }, \ + }, \ + .qsbr = { \ + .wr_seq = QSBR_INITIAL, \ + .rd_seq = QSBR_INITIAL, \ + }, \ + .dtoa = _dtoa_state_INIT(&(INTERP)), \ + .dict_state = _dict_state_INIT, \ + .mem_free_queue = _Py_mem_free_queue_INIT(INTERP.mem_free_queue), \ + .func_state = { \ + .next_version = 1, \ + }, \ + .types = { \ + .next_version_tag = _Py_TYPE_BASE_VERSION_TAG, \ + }, \ + .static_objects = { \ + .singletons = { \ + ._not_used = 1, \ + .hamt_empty = { \ + .ob_base = _PyObject_HEAD_INIT(&_PyHamt_Type), \ + .h_root = (PyHamtNode*)&_Py_SINGLETON(hamt_bitmap_node_empty), \ + }, \ + .last_resort_memory_error = { \ + _PyObject_HEAD_INIT(&_PyExc_MemoryError), \ + .args = (PyObject*)&_Py_SINGLETON(tuple_empty) \ + }, \ + }, \ + }, \ + ._initial_thread = _PyThreadStateImpl_INIT, \ + } + +#define _PyThreadStateImpl_INIT \ + { \ + .base = _PyThreadState_INIT, \ + } + +#define _PyThreadState_INIT \ + { \ + ._whence = _PyThreadState_WHENCE_NOTSET, \ + .py_recursion_limit = Py_DEFAULT_RECURSION_LIMIT, \ + .context_ver = 1, \ + } + + +// global objects + +#define _PyBytes_SIMPLE_INIT(CH, LEN) \ + { \ + _PyVarObject_HEAD_INIT(&PyBytes_Type, (LEN)), \ + .ob_shash = -1, \ + .ob_sval = { (CH) }, \ + } +#define _PyBytes_CHAR_INIT(CH) \ + { \ + _PyBytes_SIMPLE_INIT((CH), 1) \ + } + +#define _PyUnicode_ASCII_BASE_INIT(LITERAL, ASCII) \ + { \ + .ob_base = _PyObject_HEAD_INIT(&PyUnicode_Type), \ + .length = sizeof(LITERAL) - 1, \ + .hash = -1, \ + .state = { \ + .kind = 1, \ + .compact = 1, \ + .ascii = (ASCII), \ + .statically_allocated = 1, \ + }, \ + } +#define _PyASCIIObject_INIT(LITERAL) \ + { \ + ._ascii = _PyUnicode_ASCII_BASE_INIT((LITERAL), 1), \ + ._data = (LITERAL) \ + } +#define INIT_STR(NAME, LITERAL) \ + ._py_ ## NAME = _PyASCIIObject_INIT(LITERAL) +#define INIT_ID(NAME) \ + ._py_ ## NAME = _PyASCIIObject_INIT(#NAME) +#define _PyUnicode_LATIN1_INIT(LITERAL, UTF8) \ + { \ + ._latin1 = { \ + ._base = _PyUnicode_ASCII_BASE_INIT((LITERAL), 0), \ + .utf8 = (UTF8), \ + .utf8_length = sizeof(UTF8) - 1, \ + }, \ + ._data = (LITERAL), \ + } + +#include "pycore_runtime_init_generated.h" + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_RUNTIME_INIT_H */ diff --git a/Include/internal/pycore_runtime_init_generated.h b/Include/internal/pycore_runtime_init_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..19a6b9b1537d514d69969254ca19c6037eb461be --- /dev/null +++ b/Include/internal/pycore_runtime_init_generated.h @@ -0,0 +1,1551 @@ +#ifndef Py_INTERNAL_RUNTIME_INIT_GENERATED_H +#define Py_INTERNAL_RUNTIME_INIT_GENERATED_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_long.h" // _PyLong_DIGIT_INIT() + + +/* The following is auto-generated by Tools/build/generate_global_objects.py. */ +#define _Py_small_ints_INIT { \ + _PyLong_DIGIT_INIT(-5), \ + _PyLong_DIGIT_INIT(-4), \ + _PyLong_DIGIT_INIT(-3), \ + _PyLong_DIGIT_INIT(-2), \ + _PyLong_DIGIT_INIT(-1), \ + _PyLong_DIGIT_INIT(0), \ + _PyLong_DIGIT_INIT(1), \ + _PyLong_DIGIT_INIT(2), \ + _PyLong_DIGIT_INIT(3), \ + _PyLong_DIGIT_INIT(4), \ + _PyLong_DIGIT_INIT(5), \ + _PyLong_DIGIT_INIT(6), \ + _PyLong_DIGIT_INIT(7), \ + _PyLong_DIGIT_INIT(8), \ + _PyLong_DIGIT_INIT(9), \ + _PyLong_DIGIT_INIT(10), \ + _PyLong_DIGIT_INIT(11), \ + _PyLong_DIGIT_INIT(12), \ + _PyLong_DIGIT_INIT(13), \ + _PyLong_DIGIT_INIT(14), \ + _PyLong_DIGIT_INIT(15), \ + _PyLong_DIGIT_INIT(16), \ + _PyLong_DIGIT_INIT(17), \ + _PyLong_DIGIT_INIT(18), \ + _PyLong_DIGIT_INIT(19), \ + _PyLong_DIGIT_INIT(20), \ + _PyLong_DIGIT_INIT(21), \ + _PyLong_DIGIT_INIT(22), \ + _PyLong_DIGIT_INIT(23), \ + _PyLong_DIGIT_INIT(24), \ + _PyLong_DIGIT_INIT(25), \ + _PyLong_DIGIT_INIT(26), \ + _PyLong_DIGIT_INIT(27), \ + _PyLong_DIGIT_INIT(28), \ + _PyLong_DIGIT_INIT(29), \ + _PyLong_DIGIT_INIT(30), \ + _PyLong_DIGIT_INIT(31), \ + _PyLong_DIGIT_INIT(32), \ + _PyLong_DIGIT_INIT(33), \ + _PyLong_DIGIT_INIT(34), \ + _PyLong_DIGIT_INIT(35), \ + _PyLong_DIGIT_INIT(36), \ + _PyLong_DIGIT_INIT(37), \ + _PyLong_DIGIT_INIT(38), \ + _PyLong_DIGIT_INIT(39), \ + _PyLong_DIGIT_INIT(40), \ + _PyLong_DIGIT_INIT(41), \ + _PyLong_DIGIT_INIT(42), \ + _PyLong_DIGIT_INIT(43), \ + _PyLong_DIGIT_INIT(44), \ + _PyLong_DIGIT_INIT(45), \ + _PyLong_DIGIT_INIT(46), \ + _PyLong_DIGIT_INIT(47), \ + _PyLong_DIGIT_INIT(48), \ + _PyLong_DIGIT_INIT(49), \ + _PyLong_DIGIT_INIT(50), \ + _PyLong_DIGIT_INIT(51), \ + _PyLong_DIGIT_INIT(52), \ + _PyLong_DIGIT_INIT(53), \ + _PyLong_DIGIT_INIT(54), \ + _PyLong_DIGIT_INIT(55), \ + _PyLong_DIGIT_INIT(56), \ + _PyLong_DIGIT_INIT(57), \ + _PyLong_DIGIT_INIT(58), \ + _PyLong_DIGIT_INIT(59), \ + _PyLong_DIGIT_INIT(60), \ + _PyLong_DIGIT_INIT(61), \ + _PyLong_DIGIT_INIT(62), \ + _PyLong_DIGIT_INIT(63), \ + _PyLong_DIGIT_INIT(64), \ + _PyLong_DIGIT_INIT(65), \ + _PyLong_DIGIT_INIT(66), \ + _PyLong_DIGIT_INIT(67), \ + _PyLong_DIGIT_INIT(68), \ + _PyLong_DIGIT_INIT(69), \ + _PyLong_DIGIT_INIT(70), \ + _PyLong_DIGIT_INIT(71), \ + _PyLong_DIGIT_INIT(72), \ + _PyLong_DIGIT_INIT(73), \ + _PyLong_DIGIT_INIT(74), \ + _PyLong_DIGIT_INIT(75), \ + _PyLong_DIGIT_INIT(76), \ + _PyLong_DIGIT_INIT(77), \ + _PyLong_DIGIT_INIT(78), \ + _PyLong_DIGIT_INIT(79), \ + _PyLong_DIGIT_INIT(80), \ + _PyLong_DIGIT_INIT(81), \ + _PyLong_DIGIT_INIT(82), \ + _PyLong_DIGIT_INIT(83), \ + _PyLong_DIGIT_INIT(84), \ + _PyLong_DIGIT_INIT(85), \ + _PyLong_DIGIT_INIT(86), \ + _PyLong_DIGIT_INIT(87), \ + _PyLong_DIGIT_INIT(88), \ + _PyLong_DIGIT_INIT(89), \ + _PyLong_DIGIT_INIT(90), \ + _PyLong_DIGIT_INIT(91), \ + _PyLong_DIGIT_INIT(92), \ + _PyLong_DIGIT_INIT(93), \ + _PyLong_DIGIT_INIT(94), \ + _PyLong_DIGIT_INIT(95), \ + _PyLong_DIGIT_INIT(96), \ + _PyLong_DIGIT_INIT(97), \ + _PyLong_DIGIT_INIT(98), \ + _PyLong_DIGIT_INIT(99), \ + _PyLong_DIGIT_INIT(100), \ + _PyLong_DIGIT_INIT(101), \ + _PyLong_DIGIT_INIT(102), \ + _PyLong_DIGIT_INIT(103), \ + _PyLong_DIGIT_INIT(104), \ + _PyLong_DIGIT_INIT(105), \ + _PyLong_DIGIT_INIT(106), \ + _PyLong_DIGIT_INIT(107), \ + _PyLong_DIGIT_INIT(108), \ + _PyLong_DIGIT_INIT(109), \ + _PyLong_DIGIT_INIT(110), \ + _PyLong_DIGIT_INIT(111), \ + _PyLong_DIGIT_INIT(112), \ + _PyLong_DIGIT_INIT(113), \ + _PyLong_DIGIT_INIT(114), \ + _PyLong_DIGIT_INIT(115), \ + _PyLong_DIGIT_INIT(116), \ + _PyLong_DIGIT_INIT(117), \ + _PyLong_DIGIT_INIT(118), \ + _PyLong_DIGIT_INIT(119), \ + _PyLong_DIGIT_INIT(120), \ + _PyLong_DIGIT_INIT(121), \ + _PyLong_DIGIT_INIT(122), \ + _PyLong_DIGIT_INIT(123), \ + _PyLong_DIGIT_INIT(124), \ + _PyLong_DIGIT_INIT(125), \ + _PyLong_DIGIT_INIT(126), \ + _PyLong_DIGIT_INIT(127), \ + _PyLong_DIGIT_INIT(128), \ + _PyLong_DIGIT_INIT(129), \ + _PyLong_DIGIT_INIT(130), \ + _PyLong_DIGIT_INIT(131), \ + _PyLong_DIGIT_INIT(132), \ + _PyLong_DIGIT_INIT(133), \ + _PyLong_DIGIT_INIT(134), \ + _PyLong_DIGIT_INIT(135), \ + _PyLong_DIGIT_INIT(136), \ + _PyLong_DIGIT_INIT(137), \ + _PyLong_DIGIT_INIT(138), \ + _PyLong_DIGIT_INIT(139), \ + _PyLong_DIGIT_INIT(140), \ + _PyLong_DIGIT_INIT(141), \ + _PyLong_DIGIT_INIT(142), \ + _PyLong_DIGIT_INIT(143), \ + _PyLong_DIGIT_INIT(144), \ + _PyLong_DIGIT_INIT(145), \ + _PyLong_DIGIT_INIT(146), \ + _PyLong_DIGIT_INIT(147), \ + _PyLong_DIGIT_INIT(148), \ + _PyLong_DIGIT_INIT(149), \ + _PyLong_DIGIT_INIT(150), \ + _PyLong_DIGIT_INIT(151), \ + _PyLong_DIGIT_INIT(152), \ + _PyLong_DIGIT_INIT(153), \ + _PyLong_DIGIT_INIT(154), \ + _PyLong_DIGIT_INIT(155), \ + _PyLong_DIGIT_INIT(156), \ + _PyLong_DIGIT_INIT(157), \ + _PyLong_DIGIT_INIT(158), \ + _PyLong_DIGIT_INIT(159), \ + _PyLong_DIGIT_INIT(160), \ + _PyLong_DIGIT_INIT(161), \ + _PyLong_DIGIT_INIT(162), \ + _PyLong_DIGIT_INIT(163), \ + _PyLong_DIGIT_INIT(164), \ + _PyLong_DIGIT_INIT(165), \ + _PyLong_DIGIT_INIT(166), \ + _PyLong_DIGIT_INIT(167), \ + _PyLong_DIGIT_INIT(168), \ + _PyLong_DIGIT_INIT(169), \ + _PyLong_DIGIT_INIT(170), \ + _PyLong_DIGIT_INIT(171), \ + _PyLong_DIGIT_INIT(172), \ + _PyLong_DIGIT_INIT(173), \ + _PyLong_DIGIT_INIT(174), \ + _PyLong_DIGIT_INIT(175), \ + _PyLong_DIGIT_INIT(176), \ + _PyLong_DIGIT_INIT(177), \ + _PyLong_DIGIT_INIT(178), \ + _PyLong_DIGIT_INIT(179), \ + _PyLong_DIGIT_INIT(180), \ + _PyLong_DIGIT_INIT(181), \ + _PyLong_DIGIT_INIT(182), \ + _PyLong_DIGIT_INIT(183), \ + _PyLong_DIGIT_INIT(184), \ + _PyLong_DIGIT_INIT(185), \ + _PyLong_DIGIT_INIT(186), \ + _PyLong_DIGIT_INIT(187), \ + _PyLong_DIGIT_INIT(188), \ + _PyLong_DIGIT_INIT(189), \ + _PyLong_DIGIT_INIT(190), \ + _PyLong_DIGIT_INIT(191), \ + _PyLong_DIGIT_INIT(192), \ + _PyLong_DIGIT_INIT(193), \ + _PyLong_DIGIT_INIT(194), \ + _PyLong_DIGIT_INIT(195), \ + _PyLong_DIGIT_INIT(196), \ + _PyLong_DIGIT_INIT(197), \ + _PyLong_DIGIT_INIT(198), \ + _PyLong_DIGIT_INIT(199), \ + _PyLong_DIGIT_INIT(200), \ + _PyLong_DIGIT_INIT(201), \ + _PyLong_DIGIT_INIT(202), \ + _PyLong_DIGIT_INIT(203), \ + _PyLong_DIGIT_INIT(204), \ + _PyLong_DIGIT_INIT(205), \ + _PyLong_DIGIT_INIT(206), \ + _PyLong_DIGIT_INIT(207), \ + _PyLong_DIGIT_INIT(208), \ + _PyLong_DIGIT_INIT(209), \ + _PyLong_DIGIT_INIT(210), \ + _PyLong_DIGIT_INIT(211), \ + _PyLong_DIGIT_INIT(212), \ + _PyLong_DIGIT_INIT(213), \ + _PyLong_DIGIT_INIT(214), \ + _PyLong_DIGIT_INIT(215), \ + _PyLong_DIGIT_INIT(216), \ + _PyLong_DIGIT_INIT(217), \ + _PyLong_DIGIT_INIT(218), \ + _PyLong_DIGIT_INIT(219), \ + _PyLong_DIGIT_INIT(220), \ + _PyLong_DIGIT_INIT(221), \ + _PyLong_DIGIT_INIT(222), \ + _PyLong_DIGIT_INIT(223), \ + _PyLong_DIGIT_INIT(224), \ + _PyLong_DIGIT_INIT(225), \ + _PyLong_DIGIT_INIT(226), \ + _PyLong_DIGIT_INIT(227), \ + _PyLong_DIGIT_INIT(228), \ + _PyLong_DIGIT_INIT(229), \ + _PyLong_DIGIT_INIT(230), \ + _PyLong_DIGIT_INIT(231), \ + _PyLong_DIGIT_INIT(232), \ + _PyLong_DIGIT_INIT(233), \ + _PyLong_DIGIT_INIT(234), \ + _PyLong_DIGIT_INIT(235), \ + _PyLong_DIGIT_INIT(236), \ + _PyLong_DIGIT_INIT(237), \ + _PyLong_DIGIT_INIT(238), \ + _PyLong_DIGIT_INIT(239), \ + _PyLong_DIGIT_INIT(240), \ + _PyLong_DIGIT_INIT(241), \ + _PyLong_DIGIT_INIT(242), \ + _PyLong_DIGIT_INIT(243), \ + _PyLong_DIGIT_INIT(244), \ + _PyLong_DIGIT_INIT(245), \ + _PyLong_DIGIT_INIT(246), \ + _PyLong_DIGIT_INIT(247), \ + _PyLong_DIGIT_INIT(248), \ + _PyLong_DIGIT_INIT(249), \ + _PyLong_DIGIT_INIT(250), \ + _PyLong_DIGIT_INIT(251), \ + _PyLong_DIGIT_INIT(252), \ + _PyLong_DIGIT_INIT(253), \ + _PyLong_DIGIT_INIT(254), \ + _PyLong_DIGIT_INIT(255), \ + _PyLong_DIGIT_INIT(256), \ +} + +#define _Py_bytes_characters_INIT { \ + _PyBytes_CHAR_INIT(0), \ + _PyBytes_CHAR_INIT(1), \ + _PyBytes_CHAR_INIT(2), \ + _PyBytes_CHAR_INIT(3), \ + _PyBytes_CHAR_INIT(4), \ + _PyBytes_CHAR_INIT(5), \ + _PyBytes_CHAR_INIT(6), \ + _PyBytes_CHAR_INIT(7), \ + _PyBytes_CHAR_INIT(8), \ + _PyBytes_CHAR_INIT(9), \ + _PyBytes_CHAR_INIT(10), \ + _PyBytes_CHAR_INIT(11), \ + _PyBytes_CHAR_INIT(12), \ + _PyBytes_CHAR_INIT(13), \ + _PyBytes_CHAR_INIT(14), \ + _PyBytes_CHAR_INIT(15), \ + _PyBytes_CHAR_INIT(16), \ + _PyBytes_CHAR_INIT(17), \ + _PyBytes_CHAR_INIT(18), \ + _PyBytes_CHAR_INIT(19), \ + _PyBytes_CHAR_INIT(20), \ + _PyBytes_CHAR_INIT(21), \ + _PyBytes_CHAR_INIT(22), \ + _PyBytes_CHAR_INIT(23), \ + _PyBytes_CHAR_INIT(24), \ + _PyBytes_CHAR_INIT(25), \ + _PyBytes_CHAR_INIT(26), \ + _PyBytes_CHAR_INIT(27), \ + _PyBytes_CHAR_INIT(28), \ + _PyBytes_CHAR_INIT(29), \ + _PyBytes_CHAR_INIT(30), \ + _PyBytes_CHAR_INIT(31), \ + _PyBytes_CHAR_INIT(32), \ + _PyBytes_CHAR_INIT(33), \ + _PyBytes_CHAR_INIT(34), \ + _PyBytes_CHAR_INIT(35), \ + _PyBytes_CHAR_INIT(36), \ + _PyBytes_CHAR_INIT(37), \ + _PyBytes_CHAR_INIT(38), \ + _PyBytes_CHAR_INIT(39), \ + _PyBytes_CHAR_INIT(40), \ + _PyBytes_CHAR_INIT(41), \ + _PyBytes_CHAR_INIT(42), \ + _PyBytes_CHAR_INIT(43), \ + _PyBytes_CHAR_INIT(44), \ + _PyBytes_CHAR_INIT(45), \ + _PyBytes_CHAR_INIT(46), \ + _PyBytes_CHAR_INIT(47), \ + _PyBytes_CHAR_INIT(48), \ + _PyBytes_CHAR_INIT(49), \ + _PyBytes_CHAR_INIT(50), \ + _PyBytes_CHAR_INIT(51), \ + _PyBytes_CHAR_INIT(52), \ + _PyBytes_CHAR_INIT(53), \ + _PyBytes_CHAR_INIT(54), \ + _PyBytes_CHAR_INIT(55), \ + _PyBytes_CHAR_INIT(56), \ + _PyBytes_CHAR_INIT(57), \ + _PyBytes_CHAR_INIT(58), \ + _PyBytes_CHAR_INIT(59), \ + _PyBytes_CHAR_INIT(60), \ + _PyBytes_CHAR_INIT(61), \ + _PyBytes_CHAR_INIT(62), \ + _PyBytes_CHAR_INIT(63), \ + _PyBytes_CHAR_INIT(64), \ + _PyBytes_CHAR_INIT(65), \ + _PyBytes_CHAR_INIT(66), \ + _PyBytes_CHAR_INIT(67), \ + _PyBytes_CHAR_INIT(68), \ + _PyBytes_CHAR_INIT(69), \ + _PyBytes_CHAR_INIT(70), \ + _PyBytes_CHAR_INIT(71), \ + _PyBytes_CHAR_INIT(72), \ + _PyBytes_CHAR_INIT(73), \ + _PyBytes_CHAR_INIT(74), \ + _PyBytes_CHAR_INIT(75), \ + _PyBytes_CHAR_INIT(76), \ + _PyBytes_CHAR_INIT(77), \ + _PyBytes_CHAR_INIT(78), \ + _PyBytes_CHAR_INIT(79), \ + _PyBytes_CHAR_INIT(80), \ + _PyBytes_CHAR_INIT(81), \ + _PyBytes_CHAR_INIT(82), \ + _PyBytes_CHAR_INIT(83), \ + _PyBytes_CHAR_INIT(84), \ + _PyBytes_CHAR_INIT(85), \ + _PyBytes_CHAR_INIT(86), \ + _PyBytes_CHAR_INIT(87), \ + _PyBytes_CHAR_INIT(88), \ + _PyBytes_CHAR_INIT(89), \ + _PyBytes_CHAR_INIT(90), \ + _PyBytes_CHAR_INIT(91), \ + _PyBytes_CHAR_INIT(92), \ + _PyBytes_CHAR_INIT(93), \ + _PyBytes_CHAR_INIT(94), \ + _PyBytes_CHAR_INIT(95), \ + _PyBytes_CHAR_INIT(96), \ + _PyBytes_CHAR_INIT(97), \ + _PyBytes_CHAR_INIT(98), \ + _PyBytes_CHAR_INIT(99), \ + _PyBytes_CHAR_INIT(100), \ + _PyBytes_CHAR_INIT(101), \ + _PyBytes_CHAR_INIT(102), \ + _PyBytes_CHAR_INIT(103), \ + _PyBytes_CHAR_INIT(104), \ + _PyBytes_CHAR_INIT(105), \ + _PyBytes_CHAR_INIT(106), \ + _PyBytes_CHAR_INIT(107), \ + _PyBytes_CHAR_INIT(108), \ + _PyBytes_CHAR_INIT(109), \ + _PyBytes_CHAR_INIT(110), \ + _PyBytes_CHAR_INIT(111), \ + _PyBytes_CHAR_INIT(112), \ + _PyBytes_CHAR_INIT(113), \ + _PyBytes_CHAR_INIT(114), \ + _PyBytes_CHAR_INIT(115), \ + _PyBytes_CHAR_INIT(116), \ + _PyBytes_CHAR_INIT(117), \ + _PyBytes_CHAR_INIT(118), \ + _PyBytes_CHAR_INIT(119), \ + _PyBytes_CHAR_INIT(120), \ + _PyBytes_CHAR_INIT(121), \ + _PyBytes_CHAR_INIT(122), \ + _PyBytes_CHAR_INIT(123), \ + _PyBytes_CHAR_INIT(124), \ + _PyBytes_CHAR_INIT(125), \ + _PyBytes_CHAR_INIT(126), \ + _PyBytes_CHAR_INIT(127), \ + _PyBytes_CHAR_INIT(128), \ + _PyBytes_CHAR_INIT(129), \ + _PyBytes_CHAR_INIT(130), \ + _PyBytes_CHAR_INIT(131), \ + _PyBytes_CHAR_INIT(132), \ + _PyBytes_CHAR_INIT(133), \ + _PyBytes_CHAR_INIT(134), \ + _PyBytes_CHAR_INIT(135), \ + _PyBytes_CHAR_INIT(136), \ + _PyBytes_CHAR_INIT(137), \ + _PyBytes_CHAR_INIT(138), \ + _PyBytes_CHAR_INIT(139), \ + _PyBytes_CHAR_INIT(140), \ + _PyBytes_CHAR_INIT(141), \ + _PyBytes_CHAR_INIT(142), \ + _PyBytes_CHAR_INIT(143), \ + _PyBytes_CHAR_INIT(144), \ + _PyBytes_CHAR_INIT(145), \ + _PyBytes_CHAR_INIT(146), \ + _PyBytes_CHAR_INIT(147), \ + _PyBytes_CHAR_INIT(148), \ + _PyBytes_CHAR_INIT(149), \ + _PyBytes_CHAR_INIT(150), \ + _PyBytes_CHAR_INIT(151), \ + _PyBytes_CHAR_INIT(152), \ + _PyBytes_CHAR_INIT(153), \ + _PyBytes_CHAR_INIT(154), \ + _PyBytes_CHAR_INIT(155), \ + _PyBytes_CHAR_INIT(156), \ + _PyBytes_CHAR_INIT(157), \ + _PyBytes_CHAR_INIT(158), \ + _PyBytes_CHAR_INIT(159), \ + _PyBytes_CHAR_INIT(160), \ + _PyBytes_CHAR_INIT(161), \ + _PyBytes_CHAR_INIT(162), \ + _PyBytes_CHAR_INIT(163), \ + _PyBytes_CHAR_INIT(164), \ + _PyBytes_CHAR_INIT(165), \ + _PyBytes_CHAR_INIT(166), \ + _PyBytes_CHAR_INIT(167), \ + _PyBytes_CHAR_INIT(168), \ + _PyBytes_CHAR_INIT(169), \ + _PyBytes_CHAR_INIT(170), \ + _PyBytes_CHAR_INIT(171), \ + _PyBytes_CHAR_INIT(172), \ + _PyBytes_CHAR_INIT(173), \ + _PyBytes_CHAR_INIT(174), \ + _PyBytes_CHAR_INIT(175), \ + _PyBytes_CHAR_INIT(176), \ + _PyBytes_CHAR_INIT(177), \ + _PyBytes_CHAR_INIT(178), \ + _PyBytes_CHAR_INIT(179), \ + _PyBytes_CHAR_INIT(180), \ + _PyBytes_CHAR_INIT(181), \ + _PyBytes_CHAR_INIT(182), \ + _PyBytes_CHAR_INIT(183), \ + _PyBytes_CHAR_INIT(184), \ + _PyBytes_CHAR_INIT(185), \ + _PyBytes_CHAR_INIT(186), \ + _PyBytes_CHAR_INIT(187), \ + _PyBytes_CHAR_INIT(188), \ + _PyBytes_CHAR_INIT(189), \ + _PyBytes_CHAR_INIT(190), \ + _PyBytes_CHAR_INIT(191), \ + _PyBytes_CHAR_INIT(192), \ + _PyBytes_CHAR_INIT(193), \ + _PyBytes_CHAR_INIT(194), \ + _PyBytes_CHAR_INIT(195), \ + _PyBytes_CHAR_INIT(196), \ + _PyBytes_CHAR_INIT(197), \ + _PyBytes_CHAR_INIT(198), \ + _PyBytes_CHAR_INIT(199), \ + _PyBytes_CHAR_INIT(200), \ + _PyBytes_CHAR_INIT(201), \ + _PyBytes_CHAR_INIT(202), \ + _PyBytes_CHAR_INIT(203), \ + _PyBytes_CHAR_INIT(204), \ + _PyBytes_CHAR_INIT(205), \ + _PyBytes_CHAR_INIT(206), \ + _PyBytes_CHAR_INIT(207), \ + _PyBytes_CHAR_INIT(208), \ + _PyBytes_CHAR_INIT(209), \ + _PyBytes_CHAR_INIT(210), \ + _PyBytes_CHAR_INIT(211), \ + _PyBytes_CHAR_INIT(212), \ + _PyBytes_CHAR_INIT(213), \ + _PyBytes_CHAR_INIT(214), \ + _PyBytes_CHAR_INIT(215), \ + _PyBytes_CHAR_INIT(216), \ + _PyBytes_CHAR_INIT(217), \ + _PyBytes_CHAR_INIT(218), \ + _PyBytes_CHAR_INIT(219), \ + _PyBytes_CHAR_INIT(220), \ + _PyBytes_CHAR_INIT(221), \ + _PyBytes_CHAR_INIT(222), \ + _PyBytes_CHAR_INIT(223), \ + _PyBytes_CHAR_INIT(224), \ + _PyBytes_CHAR_INIT(225), \ + _PyBytes_CHAR_INIT(226), \ + _PyBytes_CHAR_INIT(227), \ + _PyBytes_CHAR_INIT(228), \ + _PyBytes_CHAR_INIT(229), \ + _PyBytes_CHAR_INIT(230), \ + _PyBytes_CHAR_INIT(231), \ + _PyBytes_CHAR_INIT(232), \ + _PyBytes_CHAR_INIT(233), \ + _PyBytes_CHAR_INIT(234), \ + _PyBytes_CHAR_INIT(235), \ + _PyBytes_CHAR_INIT(236), \ + _PyBytes_CHAR_INIT(237), \ + _PyBytes_CHAR_INIT(238), \ + _PyBytes_CHAR_INIT(239), \ + _PyBytes_CHAR_INIT(240), \ + _PyBytes_CHAR_INIT(241), \ + _PyBytes_CHAR_INIT(242), \ + _PyBytes_CHAR_INIT(243), \ + _PyBytes_CHAR_INIT(244), \ + _PyBytes_CHAR_INIT(245), \ + _PyBytes_CHAR_INIT(246), \ + _PyBytes_CHAR_INIT(247), \ + _PyBytes_CHAR_INIT(248), \ + _PyBytes_CHAR_INIT(249), \ + _PyBytes_CHAR_INIT(250), \ + _PyBytes_CHAR_INIT(251), \ + _PyBytes_CHAR_INIT(252), \ + _PyBytes_CHAR_INIT(253), \ + _PyBytes_CHAR_INIT(254), \ + _PyBytes_CHAR_INIT(255), \ +} + +#define _Py_str_literals_INIT { \ + INIT_STR(anon_dictcomp, ""), \ + INIT_STR(anon_genexpr, ""), \ + INIT_STR(anon_lambda, ""), \ + INIT_STR(anon_listcomp, ""), \ + INIT_STR(anon_module, ""), \ + INIT_STR(anon_null, ""), \ + INIT_STR(anon_setcomp, ""), \ + INIT_STR(anon_string, ""), \ + INIT_STR(anon_unknown, ""), \ + INIT_STR(dbl_close_br, "}}"), \ + INIT_STR(dbl_open_br, "{{"), \ + INIT_STR(dbl_percent, "%%"), \ + INIT_STR(defaults, ".defaults"), \ + INIT_STR(dot_locals, "."), \ + INIT_STR(empty, ""), \ + INIT_STR(generic_base, ".generic_base"), \ + INIT_STR(json_decoder, "json.decoder"), \ + INIT_STR(kwdefaults, ".kwdefaults"), \ + INIT_STR(list_err, "list index out of range"), \ + INIT_STR(str_replace_inf, "1e309"), \ + INIT_STR(type_params, ".type_params"), \ + INIT_STR(utf_8, "utf-8"), \ +} + +#define _Py_str_identifiers_INIT { \ + INIT_ID(CANCELLED), \ + INIT_ID(FINISHED), \ + INIT_ID(False), \ + INIT_ID(JSONDecodeError), \ + INIT_ID(PENDING), \ + INIT_ID(Py_Repr), \ + INIT_ID(TextIOWrapper), \ + INIT_ID(True), \ + INIT_ID(WarningMessage), \ + INIT_ID(_WindowsConsoleIO), \ + INIT_ID(__IOBase_closed), \ + INIT_ID(__abc_tpflags__), \ + INIT_ID(__abs__), \ + INIT_ID(__abstractmethods__), \ + INIT_ID(__add__), \ + INIT_ID(__aenter__), \ + INIT_ID(__aexit__), \ + INIT_ID(__aiter__), \ + INIT_ID(__all__), \ + INIT_ID(__and__), \ + INIT_ID(__anext__), \ + INIT_ID(__annotations__), \ + INIT_ID(__args__), \ + INIT_ID(__await__), \ + INIT_ID(__bases__), \ + INIT_ID(__bool__), \ + INIT_ID(__buffer__), \ + INIT_ID(__build_class__), \ + INIT_ID(__builtins__), \ + INIT_ID(__bytes__), \ + INIT_ID(__call__), \ + INIT_ID(__cantrace__), \ + INIT_ID(__class__), \ + INIT_ID(__class_getitem__), \ + INIT_ID(__classcell__), \ + INIT_ID(__classdict__), \ + INIT_ID(__classdictcell__), \ + INIT_ID(__complex__), \ + INIT_ID(__contains__), \ + INIT_ID(__copy__), \ + INIT_ID(__ctypes_from_outparam__), \ + INIT_ID(__del__), \ + INIT_ID(__delattr__), \ + INIT_ID(__delete__), \ + INIT_ID(__delitem__), \ + INIT_ID(__dict__), \ + INIT_ID(__dictoffset__), \ + INIT_ID(__dir__), \ + INIT_ID(__divmod__), \ + INIT_ID(__doc__), \ + INIT_ID(__enter__), \ + INIT_ID(__eq__), \ + INIT_ID(__exit__), \ + INIT_ID(__file__), \ + INIT_ID(__firstlineno__), \ + INIT_ID(__float__), \ + INIT_ID(__floordiv__), \ + INIT_ID(__format__), \ + INIT_ID(__fspath__), \ + INIT_ID(__ge__), \ + INIT_ID(__get__), \ + INIT_ID(__getattr__), \ + INIT_ID(__getattribute__), \ + INIT_ID(__getinitargs__), \ + INIT_ID(__getitem__), \ + INIT_ID(__getnewargs__), \ + INIT_ID(__getnewargs_ex__), \ + INIT_ID(__getstate__), \ + INIT_ID(__gt__), \ + INIT_ID(__hash__), \ + INIT_ID(__iadd__), \ + INIT_ID(__iand__), \ + INIT_ID(__ifloordiv__), \ + INIT_ID(__ilshift__), \ + INIT_ID(__imatmul__), \ + INIT_ID(__imod__), \ + INIT_ID(__import__), \ + INIT_ID(__imul__), \ + INIT_ID(__index__), \ + INIT_ID(__init__), \ + INIT_ID(__init_subclass__), \ + INIT_ID(__instancecheck__), \ + INIT_ID(__int__), \ + INIT_ID(__invert__), \ + INIT_ID(__ior__), \ + INIT_ID(__ipow__), \ + INIT_ID(__irshift__), \ + INIT_ID(__isabstractmethod__), \ + INIT_ID(__isub__), \ + INIT_ID(__iter__), \ + INIT_ID(__itruediv__), \ + INIT_ID(__ixor__), \ + INIT_ID(__le__), \ + INIT_ID(__len__), \ + INIT_ID(__length_hint__), \ + INIT_ID(__lltrace__), \ + INIT_ID(__loader__), \ + INIT_ID(__lshift__), \ + INIT_ID(__lt__), \ + INIT_ID(__main__), \ + INIT_ID(__match_args__), \ + INIT_ID(__matmul__), \ + INIT_ID(__missing__), \ + INIT_ID(__mod__), \ + INIT_ID(__module__), \ + INIT_ID(__mro_entries__), \ + INIT_ID(__mul__), \ + INIT_ID(__name__), \ + INIT_ID(__ne__), \ + INIT_ID(__neg__), \ + INIT_ID(__new__), \ + INIT_ID(__newobj__), \ + INIT_ID(__newobj_ex__), \ + INIT_ID(__next__), \ + INIT_ID(__notes__), \ + INIT_ID(__or__), \ + INIT_ID(__orig_class__), \ + INIT_ID(__origin__), \ + INIT_ID(__package__), \ + INIT_ID(__parameters__), \ + INIT_ID(__path__), \ + INIT_ID(__pos__), \ + INIT_ID(__pow__), \ + INIT_ID(__prepare__), \ + INIT_ID(__qualname__), \ + INIT_ID(__radd__), \ + INIT_ID(__rand__), \ + INIT_ID(__rdivmod__), \ + INIT_ID(__reduce__), \ + INIT_ID(__reduce_ex__), \ + INIT_ID(__release_buffer__), \ + INIT_ID(__repr__), \ + INIT_ID(__reversed__), \ + INIT_ID(__rfloordiv__), \ + INIT_ID(__rlshift__), \ + INIT_ID(__rmatmul__), \ + INIT_ID(__rmod__), \ + INIT_ID(__rmul__), \ + INIT_ID(__ror__), \ + INIT_ID(__round__), \ + INIT_ID(__rpow__), \ + INIT_ID(__rrshift__), \ + INIT_ID(__rshift__), \ + INIT_ID(__rsub__), \ + INIT_ID(__rtruediv__), \ + INIT_ID(__rxor__), \ + INIT_ID(__set__), \ + INIT_ID(__set_name__), \ + INIT_ID(__setattr__), \ + INIT_ID(__setitem__), \ + INIT_ID(__setstate__), \ + INIT_ID(__sizeof__), \ + INIT_ID(__slotnames__), \ + INIT_ID(__slots__), \ + INIT_ID(__spec__), \ + INIT_ID(__static_attributes__), \ + INIT_ID(__str__), \ + INIT_ID(__sub__), \ + INIT_ID(__subclasscheck__), \ + INIT_ID(__subclasshook__), \ + INIT_ID(__truediv__), \ + INIT_ID(__trunc__), \ + INIT_ID(__type_params__), \ + INIT_ID(__typing_is_unpacked_typevartuple__), \ + INIT_ID(__typing_prepare_subst__), \ + INIT_ID(__typing_subst__), \ + INIT_ID(__typing_unpacked_tuple_args__), \ + INIT_ID(__warningregistry__), \ + INIT_ID(__weaklistoffset__), \ + INIT_ID(__weakref__), \ + INIT_ID(__xor__), \ + INIT_ID(_abc_impl), \ + INIT_ID(_abstract_), \ + INIT_ID(_active), \ + INIT_ID(_align_), \ + INIT_ID(_annotation), \ + INIT_ID(_anonymous_), \ + INIT_ID(_argtypes_), \ + INIT_ID(_as_parameter_), \ + INIT_ID(_asyncio_future_blocking), \ + INIT_ID(_blksize), \ + INIT_ID(_bootstrap), \ + INIT_ID(_check_retval_), \ + INIT_ID(_dealloc_warn), \ + INIT_ID(_feature_version), \ + INIT_ID(_field_types), \ + INIT_ID(_fields_), \ + INIT_ID(_finalizing), \ + INIT_ID(_find_and_load), \ + INIT_ID(_fix_up_module), \ + INIT_ID(_flags_), \ + INIT_ID(_get_sourcefile), \ + INIT_ID(_handle_fromlist), \ + INIT_ID(_initializing), \ + INIT_ID(_io), \ + INIT_ID(_is_text_encoding), \ + INIT_ID(_length_), \ + INIT_ID(_limbo), \ + INIT_ID(_lock_unlock_module), \ + INIT_ID(_loop), \ + INIT_ID(_needs_com_addref_), \ + INIT_ID(_only_immortal), \ + INIT_ID(_pack_), \ + INIT_ID(_restype_), \ + INIT_ID(_showwarnmsg), \ + INIT_ID(_shutdown), \ + INIT_ID(_slotnames), \ + INIT_ID(_strptime), \ + INIT_ID(_strptime_datetime), \ + INIT_ID(_swappedbytes_), \ + INIT_ID(_type_), \ + INIT_ID(_uninitialized_submodules), \ + INIT_ID(_warn_unawaited_coroutine), \ + INIT_ID(_xoptions), \ + INIT_ID(abs_tol), \ + INIT_ID(access), \ + INIT_ID(aclose), \ + INIT_ID(add), \ + INIT_ID(add_done_callback), \ + INIT_ID(after_in_child), \ + INIT_ID(after_in_parent), \ + INIT_ID(aggregate_class), \ + INIT_ID(alias), \ + INIT_ID(allow_code), \ + INIT_ID(append), \ + INIT_ID(arg), \ + INIT_ID(argdefs), \ + INIT_ID(args), \ + INIT_ID(arguments), \ + INIT_ID(argv), \ + INIT_ID(as_integer_ratio), \ + INIT_ID(asend), \ + INIT_ID(ast), \ + INIT_ID(athrow), \ + INIT_ID(attribute), \ + INIT_ID(authorizer_callback), \ + INIT_ID(autocommit), \ + INIT_ID(backtick), \ + INIT_ID(base), \ + INIT_ID(before), \ + INIT_ID(big), \ + INIT_ID(binary_form), \ + INIT_ID(block), \ + INIT_ID(bound), \ + INIT_ID(buffer), \ + INIT_ID(buffer_callback), \ + INIT_ID(buffer_size), \ + INIT_ID(buffering), \ + INIT_ID(buffers), \ + INIT_ID(bufsize), \ + INIT_ID(builtins), \ + INIT_ID(byteorder), \ + INIT_ID(bytes), \ + INIT_ID(bytes_per_sep), \ + INIT_ID(c_call), \ + INIT_ID(c_exception), \ + INIT_ID(c_return), \ + INIT_ID(cached_datetime_module), \ + INIT_ID(cached_statements), \ + INIT_ID(cadata), \ + INIT_ID(cafile), \ + INIT_ID(call), \ + INIT_ID(call_exception_handler), \ + INIT_ID(call_soon), \ + INIT_ID(callback), \ + INIT_ID(cancel), \ + INIT_ID(capath), \ + INIT_ID(category), \ + INIT_ID(cb_type), \ + INIT_ID(certfile), \ + INIT_ID(check_same_thread), \ + INIT_ID(clear), \ + INIT_ID(close), \ + INIT_ID(closed), \ + INIT_ID(closefd), \ + INIT_ID(closure), \ + INIT_ID(co_argcount), \ + INIT_ID(co_cellvars), \ + INIT_ID(co_code), \ + INIT_ID(co_consts), \ + INIT_ID(co_exceptiontable), \ + INIT_ID(co_filename), \ + INIT_ID(co_firstlineno), \ + INIT_ID(co_flags), \ + INIT_ID(co_freevars), \ + INIT_ID(co_kwonlyargcount), \ + INIT_ID(co_linetable), \ + INIT_ID(co_name), \ + INIT_ID(co_names), \ + INIT_ID(co_nlocals), \ + INIT_ID(co_posonlyargcount), \ + INIT_ID(co_qualname), \ + INIT_ID(co_stacksize), \ + INIT_ID(co_varnames), \ + INIT_ID(code), \ + INIT_ID(col_offset), \ + INIT_ID(command), \ + INIT_ID(comment_factory), \ + INIT_ID(compile_mode), \ + INIT_ID(consts), \ + INIT_ID(context), \ + INIT_ID(contravariant), \ + INIT_ID(cookie), \ + INIT_ID(copy), \ + INIT_ID(copyreg), \ + INIT_ID(coro), \ + INIT_ID(count), \ + INIT_ID(covariant), \ + INIT_ID(cwd), \ + INIT_ID(data), \ + INIT_ID(database), \ + INIT_ID(day), \ + INIT_ID(decode), \ + INIT_ID(decoder), \ + INIT_ID(default), \ + INIT_ID(defaultaction), \ + INIT_ID(delete), \ + INIT_ID(depth), \ + INIT_ID(desired_access), \ + INIT_ID(detect_types), \ + INIT_ID(deterministic), \ + INIT_ID(device), \ + INIT_ID(dict), \ + INIT_ID(dictcomp), \ + INIT_ID(difference_update), \ + INIT_ID(digest), \ + INIT_ID(digest_size), \ + INIT_ID(digestmod), \ + INIT_ID(dir_fd), \ + INIT_ID(discard), \ + INIT_ID(dispatch_table), \ + INIT_ID(displayhook), \ + INIT_ID(dklen), \ + INIT_ID(doc), \ + INIT_ID(dont_inherit), \ + INIT_ID(dst), \ + INIT_ID(dst_dir_fd), \ + INIT_ID(eager_start), \ + INIT_ID(effective_ids), \ + INIT_ID(element_factory), \ + INIT_ID(encode), \ + INIT_ID(encoding), \ + INIT_ID(end), \ + INIT_ID(end_col_offset), \ + INIT_ID(end_lineno), \ + INIT_ID(end_offset), \ + INIT_ID(endpos), \ + INIT_ID(entrypoint), \ + INIT_ID(env), \ + INIT_ID(errors), \ + INIT_ID(event), \ + INIT_ID(eventmask), \ + INIT_ID(exc_type), \ + INIT_ID(exc_value), \ + INIT_ID(excepthook), \ + INIT_ID(exception), \ + INIT_ID(existing_file_name), \ + INIT_ID(exp), \ + INIT_ID(extend), \ + INIT_ID(extra_tokens), \ + INIT_ID(facility), \ + INIT_ID(factory), \ + INIT_ID(false), \ + INIT_ID(family), \ + INIT_ID(fanout), \ + INIT_ID(fd), \ + INIT_ID(fd2), \ + INIT_ID(fdel), \ + INIT_ID(fget), \ + INIT_ID(file), \ + INIT_ID(file_actions), \ + INIT_ID(filename), \ + INIT_ID(fileno), \ + INIT_ID(filepath), \ + INIT_ID(fillvalue), \ + INIT_ID(filter), \ + INIT_ID(filters), \ + INIT_ID(final), \ + INIT_ID(find_class), \ + INIT_ID(fix_imports), \ + INIT_ID(flags), \ + INIT_ID(flush), \ + INIT_ID(fold), \ + INIT_ID(follow_symlinks), \ + INIT_ID(format), \ + INIT_ID(from_param), \ + INIT_ID(fromlist), \ + INIT_ID(fromtimestamp), \ + INIT_ID(fromutc), \ + INIT_ID(fset), \ + INIT_ID(func), \ + INIT_ID(future), \ + INIT_ID(generation), \ + INIT_ID(genexpr), \ + INIT_ID(get), \ + INIT_ID(get_debug), \ + INIT_ID(get_event_loop), \ + INIT_ID(get_loop), \ + INIT_ID(get_source), \ + INIT_ID(getattr), \ + INIT_ID(getstate), \ + INIT_ID(gid), \ + INIT_ID(globals), \ + INIT_ID(groupindex), \ + INIT_ID(groups), \ + INIT_ID(handle), \ + INIT_ID(handle_seq), \ + INIT_ID(has_location), \ + INIT_ID(hash_name), \ + INIT_ID(header), \ + INIT_ID(headers), \ + INIT_ID(hi), \ + INIT_ID(hook), \ + INIT_ID(hour), \ + INIT_ID(ident), \ + INIT_ID(identity_hint), \ + INIT_ID(ignore), \ + INIT_ID(imag), \ + INIT_ID(importlib), \ + INIT_ID(in_fd), \ + INIT_ID(incoming), \ + INIT_ID(indexgroup), \ + INIT_ID(inf), \ + INIT_ID(infer_variance), \ + INIT_ID(inherit_handle), \ + INIT_ID(inheritable), \ + INIT_ID(initial), \ + INIT_ID(initial_bytes), \ + INIT_ID(initial_owner), \ + INIT_ID(initial_state), \ + INIT_ID(initial_value), \ + INIT_ID(initval), \ + INIT_ID(inner_size), \ + INIT_ID(input), \ + INIT_ID(insert_comments), \ + INIT_ID(insert_pis), \ + INIT_ID(instructions), \ + INIT_ID(intern), \ + INIT_ID(intersection), \ + INIT_ID(interval), \ + INIT_ID(is_running), \ + INIT_ID(isatty), \ + INIT_ID(isinstance), \ + INIT_ID(isoformat), \ + INIT_ID(isolation_level), \ + INIT_ID(istext), \ + INIT_ID(item), \ + INIT_ID(items), \ + INIT_ID(iter), \ + INIT_ID(iterable), \ + INIT_ID(iterations), \ + INIT_ID(join), \ + INIT_ID(jump), \ + INIT_ID(keepends), \ + INIT_ID(key), \ + INIT_ID(keyfile), \ + INIT_ID(keys), \ + INIT_ID(kind), \ + INIT_ID(kw), \ + INIT_ID(kw1), \ + INIT_ID(kw2), \ + INIT_ID(kwdefaults), \ + INIT_ID(label), \ + INIT_ID(lambda), \ + INIT_ID(last), \ + INIT_ID(last_exc), \ + INIT_ID(last_node), \ + INIT_ID(last_traceback), \ + INIT_ID(last_type), \ + INIT_ID(last_value), \ + INIT_ID(latin1), \ + INIT_ID(leaf_size), \ + INIT_ID(len), \ + INIT_ID(length), \ + INIT_ID(level), \ + INIT_ID(limit), \ + INIT_ID(line), \ + INIT_ID(line_buffering), \ + INIT_ID(lineno), \ + INIT_ID(listcomp), \ + INIT_ID(little), \ + INIT_ID(lo), \ + INIT_ID(locale), \ + INIT_ID(locals), \ + INIT_ID(logoption), \ + INIT_ID(loop), \ + INIT_ID(manual_reset), \ + INIT_ID(mapping), \ + INIT_ID(match), \ + INIT_ID(max_length), \ + INIT_ID(maxdigits), \ + INIT_ID(maxevents), \ + INIT_ID(maxlen), \ + INIT_ID(maxmem), \ + INIT_ID(maxsplit), \ + INIT_ID(maxvalue), \ + INIT_ID(memLevel), \ + INIT_ID(memlimit), \ + INIT_ID(message), \ + INIT_ID(metaclass), \ + INIT_ID(metadata), \ + INIT_ID(method), \ + INIT_ID(microsecond), \ + INIT_ID(milliseconds), \ + INIT_ID(minute), \ + INIT_ID(mod), \ + INIT_ID(mode), \ + INIT_ID(module), \ + INIT_ID(module_globals), \ + INIT_ID(modules), \ + INIT_ID(month), \ + INIT_ID(mro), \ + INIT_ID(msg), \ + INIT_ID(mutex), \ + INIT_ID(mycmp), \ + INIT_ID(n_arg), \ + INIT_ID(n_fields), \ + INIT_ID(n_sequence_fields), \ + INIT_ID(n_unnamed_fields), \ + INIT_ID(name), \ + INIT_ID(name_from), \ + INIT_ID(namespace_separator), \ + INIT_ID(namespaces), \ + INIT_ID(narg), \ + INIT_ID(ndigits), \ + INIT_ID(nested), \ + INIT_ID(new_file_name), \ + INIT_ID(new_limit), \ + INIT_ID(newline), \ + INIT_ID(newlines), \ + INIT_ID(next), \ + INIT_ID(nlocals), \ + INIT_ID(node_depth), \ + INIT_ID(node_offset), \ + INIT_ID(ns), \ + INIT_ID(nstype), \ + INIT_ID(nt), \ + INIT_ID(null), \ + INIT_ID(number), \ + INIT_ID(obj), \ + INIT_ID(object), \ + INIT_ID(offset), \ + INIT_ID(offset_dst), \ + INIT_ID(offset_src), \ + INIT_ID(on_type_read), \ + INIT_ID(onceregistry), \ + INIT_ID(only_keys), \ + INIT_ID(oparg), \ + INIT_ID(opcode), \ + INIT_ID(open), \ + INIT_ID(opener), \ + INIT_ID(operation), \ + INIT_ID(optimize), \ + INIT_ID(options), \ + INIT_ID(order), \ + INIT_ID(origin), \ + INIT_ID(out_fd), \ + INIT_ID(outgoing), \ + INIT_ID(overlapped), \ + INIT_ID(owner), \ + INIT_ID(pages), \ + INIT_ID(parent), \ + INIT_ID(password), \ + INIT_ID(path), \ + INIT_ID(pattern), \ + INIT_ID(peek), \ + INIT_ID(persistent_id), \ + INIT_ID(persistent_load), \ + INIT_ID(person), \ + INIT_ID(pi_factory), \ + INIT_ID(pid), \ + INIT_ID(policy), \ + INIT_ID(pos), \ + INIT_ID(pos1), \ + INIT_ID(pos2), \ + INIT_ID(posix), \ + INIT_ID(print_file_and_line), \ + INIT_ID(priority), \ + INIT_ID(progress), \ + INIT_ID(progress_handler), \ + INIT_ID(progress_routine), \ + INIT_ID(proto), \ + INIT_ID(protocol), \ + INIT_ID(ps1), \ + INIT_ID(ps2), \ + INIT_ID(query), \ + INIT_ID(quotetabs), \ + INIT_ID(raw), \ + INIT_ID(read), \ + INIT_ID(read1), \ + INIT_ID(readable), \ + INIT_ID(readall), \ + INIT_ID(readinto), \ + INIT_ID(readinto1), \ + INIT_ID(readline), \ + INIT_ID(readonly), \ + INIT_ID(real), \ + INIT_ID(reducer_override), \ + INIT_ID(registry), \ + INIT_ID(rel_tol), \ + INIT_ID(release), \ + INIT_ID(reload), \ + INIT_ID(repl), \ + INIT_ID(replace), \ + INIT_ID(reserved), \ + INIT_ID(reset), \ + INIT_ID(resetids), \ + INIT_ID(return), \ + INIT_ID(reverse), \ + INIT_ID(reversed), \ + INIT_ID(salt), \ + INIT_ID(sched_priority), \ + INIT_ID(scheduler), \ + INIT_ID(second), \ + INIT_ID(security_attributes), \ + INIT_ID(seek), \ + INIT_ID(seekable), \ + INIT_ID(selectors), \ + INIT_ID(self), \ + INIT_ID(send), \ + INIT_ID(sep), \ + INIT_ID(sequence), \ + INIT_ID(server_hostname), \ + INIT_ID(server_side), \ + INIT_ID(session), \ + INIT_ID(setcomp), \ + INIT_ID(setpgroup), \ + INIT_ID(setsid), \ + INIT_ID(setsigdef), \ + INIT_ID(setsigmask), \ + INIT_ID(setstate), \ + INIT_ID(shape), \ + INIT_ID(show_cmd), \ + INIT_ID(signed), \ + INIT_ID(size), \ + INIT_ID(sizehint), \ + INIT_ID(skip_file_prefixes), \ + INIT_ID(sleep), \ + INIT_ID(sock), \ + INIT_ID(sort), \ + INIT_ID(source), \ + INIT_ID(source_traceback), \ + INIT_ID(spam), \ + INIT_ID(src), \ + INIT_ID(src_dir_fd), \ + INIT_ID(stacklevel), \ + INIT_ID(start), \ + INIT_ID(statement), \ + INIT_ID(status), \ + INIT_ID(stderr), \ + INIT_ID(stdin), \ + INIT_ID(stdout), \ + INIT_ID(step), \ + INIT_ID(steps), \ + INIT_ID(store_name), \ + INIT_ID(strategy), \ + INIT_ID(strftime), \ + INIT_ID(strict), \ + INIT_ID(strict_mode), \ + INIT_ID(string), \ + INIT_ID(sub_key), \ + INIT_ID(symmetric_difference_update), \ + INIT_ID(tabsize), \ + INIT_ID(tag), \ + INIT_ID(target), \ + INIT_ID(target_is_directory), \ + INIT_ID(task), \ + INIT_ID(tb_frame), \ + INIT_ID(tb_lasti), \ + INIT_ID(tb_lineno), \ + INIT_ID(tb_next), \ + INIT_ID(tell), \ + INIT_ID(template), \ + INIT_ID(term), \ + INIT_ID(text), \ + INIT_ID(threading), \ + INIT_ID(throw), \ + INIT_ID(timeout), \ + INIT_ID(times), \ + INIT_ID(timetuple), \ + INIT_ID(top), \ + INIT_ID(trace_callback), \ + INIT_ID(traceback), \ + INIT_ID(trailers), \ + INIT_ID(translate), \ + INIT_ID(true), \ + INIT_ID(truncate), \ + INIT_ID(twice), \ + INIT_ID(txt), \ + INIT_ID(type), \ + INIT_ID(type_params), \ + INIT_ID(tz), \ + INIT_ID(tzinfo), \ + INIT_ID(tzname), \ + INIT_ID(uid), \ + INIT_ID(unlink), \ + INIT_ID(unraisablehook), \ + INIT_ID(uri), \ + INIT_ID(usedforsecurity), \ + INIT_ID(value), \ + INIT_ID(values), \ + INIT_ID(version), \ + INIT_ID(volume), \ + INIT_ID(wait_all), \ + INIT_ID(warn_on_full_buffer), \ + INIT_ID(warnings), \ + INIT_ID(warnoptions), \ + INIT_ID(wbits), \ + INIT_ID(week), \ + INIT_ID(weekday), \ + INIT_ID(which), \ + INIT_ID(who), \ + INIT_ID(withdata), \ + INIT_ID(writable), \ + INIT_ID(write), \ + INIT_ID(write_through), \ + INIT_ID(year), \ + INIT_ID(zdict), \ +} + +#define _Py_str_ascii_INIT { \ + _PyASCIIObject_INIT("\x00"), \ + _PyASCIIObject_INIT("\x01"), \ + _PyASCIIObject_INIT("\x02"), \ + _PyASCIIObject_INIT("\x03"), \ + _PyASCIIObject_INIT("\x04"), \ + _PyASCIIObject_INIT("\x05"), \ + _PyASCIIObject_INIT("\x06"), \ + _PyASCIIObject_INIT("\x07"), \ + _PyASCIIObject_INIT("\x08"), \ + _PyASCIIObject_INIT("\x09"), \ + _PyASCIIObject_INIT("\x0a"), \ + _PyASCIIObject_INIT("\x0b"), \ + _PyASCIIObject_INIT("\x0c"), \ + _PyASCIIObject_INIT("\x0d"), \ + _PyASCIIObject_INIT("\x0e"), \ + _PyASCIIObject_INIT("\x0f"), \ + _PyASCIIObject_INIT("\x10"), \ + _PyASCIIObject_INIT("\x11"), \ + _PyASCIIObject_INIT("\x12"), \ + _PyASCIIObject_INIT("\x13"), \ + _PyASCIIObject_INIT("\x14"), \ + _PyASCIIObject_INIT("\x15"), \ + _PyASCIIObject_INIT("\x16"), \ + _PyASCIIObject_INIT("\x17"), \ + _PyASCIIObject_INIT("\x18"), \ + _PyASCIIObject_INIT("\x19"), \ + _PyASCIIObject_INIT("\x1a"), \ + _PyASCIIObject_INIT("\x1b"), \ + _PyASCIIObject_INIT("\x1c"), \ + _PyASCIIObject_INIT("\x1d"), \ + _PyASCIIObject_INIT("\x1e"), \ + _PyASCIIObject_INIT("\x1f"), \ + _PyASCIIObject_INIT("\x20"), \ + _PyASCIIObject_INIT("\x21"), \ + _PyASCIIObject_INIT("\x22"), \ + _PyASCIIObject_INIT("\x23"), \ + _PyASCIIObject_INIT("\x24"), \ + _PyASCIIObject_INIT("\x25"), \ + _PyASCIIObject_INIT("\x26"), \ + _PyASCIIObject_INIT("\x27"), \ + _PyASCIIObject_INIT("\x28"), \ + _PyASCIIObject_INIT("\x29"), \ + _PyASCIIObject_INIT("\x2a"), \ + _PyASCIIObject_INIT("\x2b"), \ + _PyASCIIObject_INIT("\x2c"), \ + _PyASCIIObject_INIT("\x2d"), \ + _PyASCIIObject_INIT("\x2e"), \ + _PyASCIIObject_INIT("\x2f"), \ + _PyASCIIObject_INIT("\x30"), \ + _PyASCIIObject_INIT("\x31"), \ + _PyASCIIObject_INIT("\x32"), \ + _PyASCIIObject_INIT("\x33"), \ + _PyASCIIObject_INIT("\x34"), \ + _PyASCIIObject_INIT("\x35"), \ + _PyASCIIObject_INIT("\x36"), \ + _PyASCIIObject_INIT("\x37"), \ + _PyASCIIObject_INIT("\x38"), \ + _PyASCIIObject_INIT("\x39"), \ + _PyASCIIObject_INIT("\x3a"), \ + _PyASCIIObject_INIT("\x3b"), \ + _PyASCIIObject_INIT("\x3c"), \ + _PyASCIIObject_INIT("\x3d"), \ + _PyASCIIObject_INIT("\x3e"), \ + _PyASCIIObject_INIT("\x3f"), \ + _PyASCIIObject_INIT("\x40"), \ + _PyASCIIObject_INIT("\x41"), \ + _PyASCIIObject_INIT("\x42"), \ + _PyASCIIObject_INIT("\x43"), \ + _PyASCIIObject_INIT("\x44"), \ + _PyASCIIObject_INIT("\x45"), \ + _PyASCIIObject_INIT("\x46"), \ + _PyASCIIObject_INIT("\x47"), \ + _PyASCIIObject_INIT("\x48"), \ + _PyASCIIObject_INIT("\x49"), \ + _PyASCIIObject_INIT("\x4a"), \ + _PyASCIIObject_INIT("\x4b"), \ + _PyASCIIObject_INIT("\x4c"), \ + _PyASCIIObject_INIT("\x4d"), \ + _PyASCIIObject_INIT("\x4e"), \ + _PyASCIIObject_INIT("\x4f"), \ + _PyASCIIObject_INIT("\x50"), \ + _PyASCIIObject_INIT("\x51"), \ + _PyASCIIObject_INIT("\x52"), \ + _PyASCIIObject_INIT("\x53"), \ + _PyASCIIObject_INIT("\x54"), \ + _PyASCIIObject_INIT("\x55"), \ + _PyASCIIObject_INIT("\x56"), \ + _PyASCIIObject_INIT("\x57"), \ + _PyASCIIObject_INIT("\x58"), \ + _PyASCIIObject_INIT("\x59"), \ + _PyASCIIObject_INIT("\x5a"), \ + _PyASCIIObject_INIT("\x5b"), \ + _PyASCIIObject_INIT("\x5c"), \ + _PyASCIIObject_INIT("\x5d"), \ + _PyASCIIObject_INIT("\x5e"), \ + _PyASCIIObject_INIT("\x5f"), \ + _PyASCIIObject_INIT("\x60"), \ + _PyASCIIObject_INIT("\x61"), \ + _PyASCIIObject_INIT("\x62"), \ + _PyASCIIObject_INIT("\x63"), \ + _PyASCIIObject_INIT("\x64"), \ + _PyASCIIObject_INIT("\x65"), \ + _PyASCIIObject_INIT("\x66"), \ + _PyASCIIObject_INIT("\x67"), \ + _PyASCIIObject_INIT("\x68"), \ + _PyASCIIObject_INIT("\x69"), \ + _PyASCIIObject_INIT("\x6a"), \ + _PyASCIIObject_INIT("\x6b"), \ + _PyASCIIObject_INIT("\x6c"), \ + _PyASCIIObject_INIT("\x6d"), \ + _PyASCIIObject_INIT("\x6e"), \ + _PyASCIIObject_INIT("\x6f"), \ + _PyASCIIObject_INIT("\x70"), \ + _PyASCIIObject_INIT("\x71"), \ + _PyASCIIObject_INIT("\x72"), \ + _PyASCIIObject_INIT("\x73"), \ + _PyASCIIObject_INIT("\x74"), \ + _PyASCIIObject_INIT("\x75"), \ + _PyASCIIObject_INIT("\x76"), \ + _PyASCIIObject_INIT("\x77"), \ + _PyASCIIObject_INIT("\x78"), \ + _PyASCIIObject_INIT("\x79"), \ + _PyASCIIObject_INIT("\x7a"), \ + _PyASCIIObject_INIT("\x7b"), \ + _PyASCIIObject_INIT("\x7c"), \ + _PyASCIIObject_INIT("\x7d"), \ + _PyASCIIObject_INIT("\x7e"), \ + _PyASCIIObject_INIT("\x7f"), \ +} + +#define _Py_str_latin1_INIT { \ + _PyUnicode_LATIN1_INIT("\x80", "\xc2\x80"), \ + _PyUnicode_LATIN1_INIT("\x81", "\xc2\x81"), \ + _PyUnicode_LATIN1_INIT("\x82", "\xc2\x82"), \ + _PyUnicode_LATIN1_INIT("\x83", "\xc2\x83"), \ + _PyUnicode_LATIN1_INIT("\x84", "\xc2\x84"), \ + _PyUnicode_LATIN1_INIT("\x85", "\xc2\x85"), \ + _PyUnicode_LATIN1_INIT("\x86", "\xc2\x86"), \ + _PyUnicode_LATIN1_INIT("\x87", "\xc2\x87"), \ + _PyUnicode_LATIN1_INIT("\x88", "\xc2\x88"), \ + _PyUnicode_LATIN1_INIT("\x89", "\xc2\x89"), \ + _PyUnicode_LATIN1_INIT("\x8a", "\xc2\x8a"), \ + _PyUnicode_LATIN1_INIT("\x8b", "\xc2\x8b"), \ + _PyUnicode_LATIN1_INIT("\x8c", "\xc2\x8c"), \ + _PyUnicode_LATIN1_INIT("\x8d", "\xc2\x8d"), \ + _PyUnicode_LATIN1_INIT("\x8e", "\xc2\x8e"), \ + _PyUnicode_LATIN1_INIT("\x8f", "\xc2\x8f"), \ + _PyUnicode_LATIN1_INIT("\x90", "\xc2\x90"), \ + _PyUnicode_LATIN1_INIT("\x91", "\xc2\x91"), \ + _PyUnicode_LATIN1_INIT("\x92", "\xc2\x92"), \ + _PyUnicode_LATIN1_INIT("\x93", "\xc2\x93"), \ + _PyUnicode_LATIN1_INIT("\x94", "\xc2\x94"), \ + _PyUnicode_LATIN1_INIT("\x95", "\xc2\x95"), \ + _PyUnicode_LATIN1_INIT("\x96", "\xc2\x96"), \ + _PyUnicode_LATIN1_INIT("\x97", "\xc2\x97"), \ + _PyUnicode_LATIN1_INIT("\x98", "\xc2\x98"), \ + _PyUnicode_LATIN1_INIT("\x99", "\xc2\x99"), \ + _PyUnicode_LATIN1_INIT("\x9a", "\xc2\x9a"), \ + _PyUnicode_LATIN1_INIT("\x9b", "\xc2\x9b"), \ + _PyUnicode_LATIN1_INIT("\x9c", "\xc2\x9c"), \ + _PyUnicode_LATIN1_INIT("\x9d", "\xc2\x9d"), \ + _PyUnicode_LATIN1_INIT("\x9e", "\xc2\x9e"), \ + _PyUnicode_LATIN1_INIT("\x9f", "\xc2\x9f"), \ + _PyUnicode_LATIN1_INIT("\xa0", "\xc2\xa0"), \ + _PyUnicode_LATIN1_INIT("\xa1", "\xc2\xa1"), \ + _PyUnicode_LATIN1_INIT("\xa2", "\xc2\xa2"), \ + _PyUnicode_LATIN1_INIT("\xa3", "\xc2\xa3"), \ + _PyUnicode_LATIN1_INIT("\xa4", "\xc2\xa4"), \ + _PyUnicode_LATIN1_INIT("\xa5", "\xc2\xa5"), \ + _PyUnicode_LATIN1_INIT("\xa6", "\xc2\xa6"), \ + _PyUnicode_LATIN1_INIT("\xa7", "\xc2\xa7"), \ + _PyUnicode_LATIN1_INIT("\xa8", "\xc2\xa8"), \ + _PyUnicode_LATIN1_INIT("\xa9", "\xc2\xa9"), \ + _PyUnicode_LATIN1_INIT("\xaa", "\xc2\xaa"), \ + _PyUnicode_LATIN1_INIT("\xab", "\xc2\xab"), \ + _PyUnicode_LATIN1_INIT("\xac", "\xc2\xac"), \ + _PyUnicode_LATIN1_INIT("\xad", "\xc2\xad"), \ + _PyUnicode_LATIN1_INIT("\xae", "\xc2\xae"), \ + _PyUnicode_LATIN1_INIT("\xaf", "\xc2\xaf"), \ + _PyUnicode_LATIN1_INIT("\xb0", "\xc2\xb0"), \ + _PyUnicode_LATIN1_INIT("\xb1", "\xc2\xb1"), \ + _PyUnicode_LATIN1_INIT("\xb2", "\xc2\xb2"), \ + _PyUnicode_LATIN1_INIT("\xb3", "\xc2\xb3"), \ + _PyUnicode_LATIN1_INIT("\xb4", "\xc2\xb4"), \ + _PyUnicode_LATIN1_INIT("\xb5", "\xc2\xb5"), \ + _PyUnicode_LATIN1_INIT("\xb6", "\xc2\xb6"), \ + _PyUnicode_LATIN1_INIT("\xb7", "\xc2\xb7"), \ + _PyUnicode_LATIN1_INIT("\xb8", "\xc2\xb8"), \ + _PyUnicode_LATIN1_INIT("\xb9", "\xc2\xb9"), \ + _PyUnicode_LATIN1_INIT("\xba", "\xc2\xba"), \ + _PyUnicode_LATIN1_INIT("\xbb", "\xc2\xbb"), \ + _PyUnicode_LATIN1_INIT("\xbc", "\xc2\xbc"), \ + _PyUnicode_LATIN1_INIT("\xbd", "\xc2\xbd"), \ + _PyUnicode_LATIN1_INIT("\xbe", "\xc2\xbe"), \ + _PyUnicode_LATIN1_INIT("\xbf", "\xc2\xbf"), \ + _PyUnicode_LATIN1_INIT("\xc0", "\xc3\x80"), \ + _PyUnicode_LATIN1_INIT("\xc1", "\xc3\x81"), \ + _PyUnicode_LATIN1_INIT("\xc2", "\xc3\x82"), \ + _PyUnicode_LATIN1_INIT("\xc3", "\xc3\x83"), \ + _PyUnicode_LATIN1_INIT("\xc4", "\xc3\x84"), \ + _PyUnicode_LATIN1_INIT("\xc5", "\xc3\x85"), \ + _PyUnicode_LATIN1_INIT("\xc6", "\xc3\x86"), \ + _PyUnicode_LATIN1_INIT("\xc7", "\xc3\x87"), \ + _PyUnicode_LATIN1_INIT("\xc8", "\xc3\x88"), \ + _PyUnicode_LATIN1_INIT("\xc9", "\xc3\x89"), \ + _PyUnicode_LATIN1_INIT("\xca", "\xc3\x8a"), \ + _PyUnicode_LATIN1_INIT("\xcb", "\xc3\x8b"), \ + _PyUnicode_LATIN1_INIT("\xcc", "\xc3\x8c"), \ + _PyUnicode_LATIN1_INIT("\xcd", "\xc3\x8d"), \ + _PyUnicode_LATIN1_INIT("\xce", "\xc3\x8e"), \ + _PyUnicode_LATIN1_INIT("\xcf", "\xc3\x8f"), \ + _PyUnicode_LATIN1_INIT("\xd0", "\xc3\x90"), \ + _PyUnicode_LATIN1_INIT("\xd1", "\xc3\x91"), \ + _PyUnicode_LATIN1_INIT("\xd2", "\xc3\x92"), \ + _PyUnicode_LATIN1_INIT("\xd3", "\xc3\x93"), \ + _PyUnicode_LATIN1_INIT("\xd4", "\xc3\x94"), \ + _PyUnicode_LATIN1_INIT("\xd5", "\xc3\x95"), \ + _PyUnicode_LATIN1_INIT("\xd6", "\xc3\x96"), \ + _PyUnicode_LATIN1_INIT("\xd7", "\xc3\x97"), \ + _PyUnicode_LATIN1_INIT("\xd8", "\xc3\x98"), \ + _PyUnicode_LATIN1_INIT("\xd9", "\xc3\x99"), \ + _PyUnicode_LATIN1_INIT("\xda", "\xc3\x9a"), \ + _PyUnicode_LATIN1_INIT("\xdb", "\xc3\x9b"), \ + _PyUnicode_LATIN1_INIT("\xdc", "\xc3\x9c"), \ + _PyUnicode_LATIN1_INIT("\xdd", "\xc3\x9d"), \ + _PyUnicode_LATIN1_INIT("\xde", "\xc3\x9e"), \ + _PyUnicode_LATIN1_INIT("\xdf", "\xc3\x9f"), \ + _PyUnicode_LATIN1_INIT("\xe0", "\xc3\xa0"), \ + _PyUnicode_LATIN1_INIT("\xe1", "\xc3\xa1"), \ + _PyUnicode_LATIN1_INIT("\xe2", "\xc3\xa2"), \ + _PyUnicode_LATIN1_INIT("\xe3", "\xc3\xa3"), \ + _PyUnicode_LATIN1_INIT("\xe4", "\xc3\xa4"), \ + _PyUnicode_LATIN1_INIT("\xe5", "\xc3\xa5"), \ + _PyUnicode_LATIN1_INIT("\xe6", "\xc3\xa6"), \ + _PyUnicode_LATIN1_INIT("\xe7", "\xc3\xa7"), \ + _PyUnicode_LATIN1_INIT("\xe8", "\xc3\xa8"), \ + _PyUnicode_LATIN1_INIT("\xe9", "\xc3\xa9"), \ + _PyUnicode_LATIN1_INIT("\xea", "\xc3\xaa"), \ + _PyUnicode_LATIN1_INIT("\xeb", "\xc3\xab"), \ + _PyUnicode_LATIN1_INIT("\xec", "\xc3\xac"), \ + _PyUnicode_LATIN1_INIT("\xed", "\xc3\xad"), \ + _PyUnicode_LATIN1_INIT("\xee", "\xc3\xae"), \ + _PyUnicode_LATIN1_INIT("\xef", "\xc3\xaf"), \ + _PyUnicode_LATIN1_INIT("\xf0", "\xc3\xb0"), \ + _PyUnicode_LATIN1_INIT("\xf1", "\xc3\xb1"), \ + _PyUnicode_LATIN1_INIT("\xf2", "\xc3\xb2"), \ + _PyUnicode_LATIN1_INIT("\xf3", "\xc3\xb3"), \ + _PyUnicode_LATIN1_INIT("\xf4", "\xc3\xb4"), \ + _PyUnicode_LATIN1_INIT("\xf5", "\xc3\xb5"), \ + _PyUnicode_LATIN1_INIT("\xf6", "\xc3\xb6"), \ + _PyUnicode_LATIN1_INIT("\xf7", "\xc3\xb7"), \ + _PyUnicode_LATIN1_INIT("\xf8", "\xc3\xb8"), \ + _PyUnicode_LATIN1_INIT("\xf9", "\xc3\xb9"), \ + _PyUnicode_LATIN1_INIT("\xfa", "\xc3\xba"), \ + _PyUnicode_LATIN1_INIT("\xfb", "\xc3\xbb"), \ + _PyUnicode_LATIN1_INIT("\xfc", "\xc3\xbc"), \ + _PyUnicode_LATIN1_INIT("\xfd", "\xc3\xbd"), \ + _PyUnicode_LATIN1_INIT("\xfe", "\xc3\xbe"), \ + _PyUnicode_LATIN1_INIT("\xff", "\xc3\xbf"), \ +} +/* End auto-generated code */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_RUNTIME_INIT_GENERATED_H */ diff --git a/Include/internal/pycore_semaphore.h b/Include/internal/pycore_semaphore.h new file mode 100644 index 0000000000000000000000000000000000000000..269538384606ce1b031a75f928038e253c5efce4 --- /dev/null +++ b/Include/internal/pycore_semaphore.h @@ -0,0 +1,67 @@ +// The _PySemaphore API a simplified cross-platform semaphore used to implement +// wakeup/sleep. +#ifndef Py_INTERNAL_SEMAPHORE_H +#define Py_INTERNAL_SEMAPHORE_H + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_pythread.h" // _POSIX_SEMAPHORES + +#ifdef MS_WINDOWS +# ifndef WIN32_LEAN_AND_MEAN +# define WIN32_LEAN_AND_MEAN +# endif +# include +#elif defined(HAVE_PTHREAD_H) +# include +#elif defined(HAVE_PTHREAD_STUBS) +# include "cpython/pthread_stubs.h" +#else +# error "Require native threads. See https://bugs.python.org/issue31370" +#endif + +#if (defined(_POSIX_SEMAPHORES) && (_POSIX_SEMAPHORES+0) != -1 && \ + defined(HAVE_SEM_TIMEDWAIT)) +# define _Py_USE_SEMAPHORES +# include +#endif + + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _PySemaphore { +#if defined(MS_WINDOWS) + HANDLE platform_sem; +#elif defined(_Py_USE_SEMAPHORES) + sem_t platform_sem; +#else + pthread_mutex_t mutex; + pthread_cond_t cond; + int counter; +#endif +} _PySemaphore; + +// Puts the current thread to sleep until _PySemaphore_Wakeup() is called. +// If `detach` is true, then the thread will detach/release the GIL while +// sleeping. +PyAPI_FUNC(int) +_PySemaphore_Wait(_PySemaphore *sema, PyTime_t timeout_ns, int detach); + +// Wakes up a single thread waiting on sema. Note that _PySemaphore_Wakeup() +// can be called before _PySemaphore_Wait(). +PyAPI_FUNC(void) +_PySemaphore_Wakeup(_PySemaphore *sema); + +// Initializes/destroys a semaphore +PyAPI_FUNC(void) _PySemaphore_Init(_PySemaphore *sema); +PyAPI_FUNC(void) _PySemaphore_Destroy(_PySemaphore *sema); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_SEMAPHORE_H */ diff --git a/Include/internal/pycore_setobject.h b/Include/internal/pycore_setobject.h new file mode 100644 index 0000000000000000000000000000000000000000..0494c07fe1869d3e821247be921c28af0cf60f8c --- /dev/null +++ b/Include/internal/pycore_setobject.h @@ -0,0 +1,39 @@ +#ifndef Py_INTERNAL_SETOBJECT_H +#define Py_INTERNAL_SETOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Export for '_abc' shared extension +PyAPI_FUNC(int) _PySet_NextEntry( + PyObject *set, + Py_ssize_t *pos, + PyObject **key, + Py_hash_t *hash); + +// Export for '_pickle' shared extension +PyAPI_FUNC(int) _PySet_NextEntryRef( + PyObject *set, + Py_ssize_t *pos, + PyObject **key, + Py_hash_t *hash); + +// Export for '_pickle' shared extension +PyAPI_FUNC(int) _PySet_Update(PyObject *set, PyObject *iterable); + +// Export for the gdb plugin's (python-gdb.py) benefit +PyAPI_DATA(PyObject *) _PySet_Dummy; + +PyAPI_FUNC(int) _PySet_Contains(PySetObject *so, PyObject *key); + +// Clears the set without acquiring locks. Used by _PyCode_Fini. +extern void _PySet_ClearInternal(PySetObject *so); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_SETOBJECT_H diff --git a/Include/internal/pycore_signal.h b/Include/internal/pycore_signal.h new file mode 100644 index 0000000000000000000000000000000000000000..47213a34ab77b526be8ef0672720811d519c8856 --- /dev/null +++ b/Include/internal/pycore_signal.h @@ -0,0 +1,108 @@ +// Define Py_NSIG constant for signal handling. + +#ifndef Py_INTERNAL_SIGNAL_H +#define Py_INTERNAL_SIGNAL_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include // NSIG + + +// Restore signals that the interpreter has called SIG_IGN on to SIG_DFL. +// Export for '_posixsubprocess' shared extension. +PyAPI_FUNC(void) _Py_RestoreSignals(void); + +#ifdef _SIG_MAXSIG + // gh-91145: On FreeBSD, defines NSIG as 32: it doesn't include + // realtime signals: [SIGRTMIN,SIGRTMAX]. Use _SIG_MAXSIG instead. For + // example on x86-64 FreeBSD 13, SIGRTMAX is 126 and _SIG_MAXSIG is 128. +# define Py_NSIG _SIG_MAXSIG +#elif defined(NSIG) +# define Py_NSIG NSIG +#elif defined(_NSIG) +# define Py_NSIG _NSIG // BSD/SysV +#elif defined(_SIGMAX) +# define Py_NSIG (_SIGMAX + 1) // QNX +#elif defined(SIGMAX) +# define Py_NSIG (SIGMAX + 1) // djgpp +#else +# define Py_NSIG 64 // Use a reasonable default value +#endif + +#define INVALID_FD (-1) + +struct _signals_runtime_state { + struct { + // tripped and func should be accessed using atomic ops. + int tripped; + PyObject* func; + } handlers[Py_NSIG]; + + volatile struct { +#ifdef MS_WINDOWS + /* This would be "SOCKET fd" if were always included. + It isn't so we must cast to SOCKET where appropriate. */ + volatile int fd; +#elif defined(__VXWORKS__) + int fd; +#else + sig_atomic_t fd; +#endif + + int warn_on_full_buffer; +#ifdef MS_WINDOWS + int use_send; +#endif + } wakeup; + + /* Speed up sigcheck() when none tripped. + is_tripped should be accessed using atomic ops. */ + int is_tripped; + + /* These objects necessarily belong to the main interpreter. */ + PyObject *default_handler; + PyObject *ignore_handler; + +#ifdef MS_WINDOWS + /* This would be "HANDLE sigint_event" if were always included. + It isn't so we must cast to HANDLE everywhere "sigint_event" is used. */ + void *sigint_event; +#endif + + /* True if the main interpreter thread exited due to an unhandled + * KeyboardInterrupt exception, suggesting the user pressed ^C. */ + int unhandled_keyboard_interrupt; +}; + +#ifdef MS_WINDOWS +# define _signals_WAKEUP_INIT \ + {.fd = INVALID_FD, .warn_on_full_buffer = 1, .use_send = 0} +#else +# define _signals_WAKEUP_INIT \ + {.fd = INVALID_FD, .warn_on_full_buffer = 1} +#endif + +#define _signals_RUNTIME_INIT \ + { \ + .wakeup = _signals_WAKEUP_INIT, \ + } + + +// Export for '_multiprocessing' shared extension +PyAPI_FUNC(int) _PyOS_IsMainThread(void); + +#ifdef MS_WINDOWS +// is not included by Python.h so use void* instead of HANDLE. +// Export for '_multiprocessing' shared extension +PyAPI_FUNC(void*) _PyOS_SigintEvent(void); +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_SIGNAL_H diff --git a/Include/internal/pycore_sliceobject.h b/Include/internal/pycore_sliceobject.h new file mode 100644 index 0000000000000000000000000000000000000000..ba8b1f1cb27dee3219436416586b2bb3e3b8d4e3 --- /dev/null +++ b/Include/internal/pycore_sliceobject.h @@ -0,0 +1,20 @@ +#ifndef Py_INTERNAL_SLICEOBJECT_H +#define Py_INTERNAL_SLICEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* runtime lifecycle */ + +PyAPI_FUNC(PyObject *) +_PyBuildSlice_ConsumeRefs(PyObject *start, PyObject *stop); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_SLICEOBJECT_H */ diff --git a/Include/internal/pycore_stackref.h b/Include/internal/pycore_stackref.h new file mode 100644 index 0000000000000000000000000000000000000000..93898174789f7b3b937e6be6957c5ac3178758ca --- /dev/null +++ b/Include/internal/pycore_stackref.h @@ -0,0 +1,195 @@ +#ifndef Py_INTERNAL_STACKREF_H +#define Py_INTERNAL_STACKREF_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include + +typedef union { + uintptr_t bits; +} _PyStackRef; + +static const _PyStackRef Py_STACKREF_NULL = { .bits = 0 }; + +#define Py_TAG_DEFERRED (1) + +// Gets a PyObject * from a _PyStackRef +#if defined(Py_GIL_DISABLED) +static inline PyObject * +PyStackRef_Get(_PyStackRef tagged) +{ + PyObject *cleared = ((PyObject *)((tagged).bits & (~Py_TAG_DEFERRED))); + return cleared; +} +#else +# define PyStackRef_Get(tagged) ((PyObject *)((tagged).bits)) +#endif + +// Converts a PyObject * to a PyStackRef, stealing the reference. +#if defined(Py_GIL_DISABLED) +static inline _PyStackRef +_PyStackRef_StealRef(PyObject *obj) +{ + // Make sure we don't take an already tagged value. + assert(((uintptr_t)obj & Py_TAG_DEFERRED) == 0); + return ((_PyStackRef){.bits = ((uintptr_t)(obj))}); +} +# define PyStackRef_StealRef(obj) _PyStackRef_StealRef(_PyObject_CAST(obj)) +#else +# define PyStackRef_StealRef(obj) ((_PyStackRef){.bits = ((uintptr_t)(obj))}) +#endif + +// Converts a PyObject * to a PyStackRef, with a new reference +#if defined(Py_GIL_DISABLED) +static inline _PyStackRef +_PyStackRef_NewRefDeferred(PyObject *obj) +{ + // Make sure we don't take an already tagged value. + assert(((uintptr_t)obj & Py_TAG_DEFERRED) == 0); + assert(obj != NULL); + if (_PyObject_HasDeferredRefcount(obj)) { + return (_PyStackRef){ .bits = (uintptr_t)obj | Py_TAG_DEFERRED }; + } + else { + return (_PyStackRef){ .bits = (uintptr_t)Py_NewRef(obj) }; + } +} +# define PyStackRef_NewRefDeferred(obj) _PyStackRef_NewRefDeferred(_PyObject_CAST(obj)) +#else +# define PyStackRef_NewRefDeferred(obj) PyStackRef_NewRef(((_PyStackRef){.bits = ((uintptr_t)(obj))})) +#endif + +#if defined(Py_GIL_DISABLED) +static inline _PyStackRef +_PyStackRef_XNewRefDeferred(PyObject *obj) +{ + // Make sure we don't take an already tagged value. + assert(((uintptr_t)obj & Py_TAG_DEFERRED) == 0); + if (obj == NULL) { + return Py_STACKREF_NULL; + } + return _PyStackRef_NewRefDeferred(obj); +} +# define PyStackRef_XNewRefDeferred(obj) _PyStackRef_XNewRefDeferred(_PyObject_CAST(obj)) +#else +# define PyStackRef_XNewRefDeferred(obj) PyStackRef_XNewRef(((_PyStackRef){.bits = ((uintptr_t)(obj))})) +#endif + +// Converts a PyStackRef back to a PyObject *. +#if defined(Py_GIL_DISABLED) +static inline PyObject * +PyStackRef_StealObject(_PyStackRef tagged) +{ + if ((tagged.bits & Py_TAG_DEFERRED) == Py_TAG_DEFERRED) { + assert(_PyObject_HasDeferredRefcount(PyStackRef_Get(tagged))); + return Py_NewRef(PyStackRef_Get(tagged)); + } + return PyStackRef_Get(tagged); +} +#else +# define PyStackRef_StealObject(tagged) PyStackRef_Get(tagged) +#endif + +static inline void +_Py_untag_stack_borrowed(PyObject **dst, const _PyStackRef *src, size_t length) +{ + for (size_t i = 0; i < length; i++) { + dst[i] = PyStackRef_Get(src[i]); + } +} + +static inline void +_Py_untag_stack_steal(PyObject **dst, const _PyStackRef *src, size_t length) +{ + for (size_t i = 0; i < length; i++) { + dst[i] = PyStackRef_StealObject(src[i]); + } +} + + +#define PyStackRef_XSETREF(dst, src) \ + do { \ + _PyStackRef *_tmp_dst_ptr = &(dst); \ + _PyStackRef _tmp_old_dst = (*_tmp_dst_ptr); \ + *_tmp_dst_ptr = (src); \ + PyStackRef_XDECREF(_tmp_old_dst); \ + } while (0) + +#define PyStackRef_SETREF(dst, src) \ + do { \ + _PyStackRef *_tmp_dst_ptr = &(dst); \ + _PyStackRef _tmp_old_dst = (*_tmp_dst_ptr); \ + *_tmp_dst_ptr = (src); \ + PyStackRef_DECREF(_tmp_old_dst); \ + } while (0) + +#define PyStackRef_CLEAR(op) \ + do { \ + _PyStackRef *_tmp_op_ptr = &(op); \ + _PyStackRef _tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op.bits != Py_STACKREF_NULL.bits) { \ + *_tmp_op_ptr = Py_STACKREF_NULL; \ + PyStackRef_DECREF(_tmp_old_op); \ + } \ + } while (0) + +#if defined(Py_GIL_DISABLED) +static inline void +PyStackRef_DECREF(_PyStackRef tagged) +{ + if ((tagged.bits & Py_TAG_DEFERRED) == Py_TAG_DEFERRED) { + return; + } + Py_DECREF(PyStackRef_Get(tagged)); +} +#else +# define PyStackRef_DECREF(op) Py_DECREF(PyStackRef_Get(op)) +#endif + +#if defined(Py_GIL_DISABLED) +static inline void +PyStackRef_INCREF(_PyStackRef tagged) +{ + if ((tagged.bits & Py_TAG_DEFERRED) == Py_TAG_DEFERRED) { + assert(_PyObject_HasDeferredRefcount(PyStackRef_Get(tagged))); + return; + } + Py_INCREF(PyStackRef_Get(tagged)); +} +#else +# define PyStackRef_INCREF(op) Py_INCREF(PyStackRef_Get(op)) +#endif + +static inline void +PyStackRef_XDECREF(_PyStackRef op) +{ + if (op.bits != Py_STACKREF_NULL.bits) { + PyStackRef_DECREF(op); + } +} + +static inline _PyStackRef +PyStackRef_NewRef(_PyStackRef obj) +{ + PyStackRef_INCREF(obj); + return obj; +} + +static inline _PyStackRef +PyStackRef_XNewRef(_PyStackRef obj) +{ + if (obj.bits == Py_STACKREF_NULL.bits) { + return obj; + } + return PyStackRef_NewRef(obj); +} + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_STACKREF_H */ diff --git a/Include/internal/pycore_strhex.h b/Include/internal/pycore_strhex.h new file mode 100644 index 0000000000000000000000000000000000000000..225f423912f2c27eabe64de90a3f9b916cb03577 --- /dev/null +++ b/Include/internal/pycore_strhex.h @@ -0,0 +1,39 @@ +#ifndef Py_INTERNAL_STRHEX_H +#define Py_INTERNAL_STRHEX_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Returns a str() containing the hex representation of argbuf. +// Export for '_hashlib' shared extension. +PyAPI_FUNC(PyObject*) _Py_strhex(const + char* argbuf, + const Py_ssize_t arglen); + +// Returns a bytes() containing the ASCII hex representation of argbuf. +extern PyObject* _Py_strhex_bytes( + const char* argbuf, + const Py_ssize_t arglen); + +// These variants include support for a separator between every N bytes: +extern PyObject* _Py_strhex_with_sep( + const char* argbuf, + const Py_ssize_t arglen, + PyObject* sep, + const int bytes_per_group); + +// Export for 'binascii' shared extension +PyAPI_FUNC(PyObject*) _Py_strhex_bytes_with_sep( + const char* argbuf, + const Py_ssize_t arglen, + PyObject* sep, + const int bytes_per_group); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_STRHEX_H */ diff --git a/Include/internal/pycore_structseq.h b/Include/internal/pycore_structseq.h new file mode 100644 index 0000000000000000000000000000000000000000..5cff165627502bd865abda793c469c4eeb01f588 --- /dev/null +++ b/Include/internal/pycore_structseq.h @@ -0,0 +1,40 @@ +#ifndef Py_INTERNAL_STRUCTSEQ_H +#define Py_INTERNAL_STRUCTSEQ_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +/* other API */ + +// Export for '_curses' shared extension +PyAPI_FUNC(PyTypeObject*) _PyStructSequence_NewType( + PyStructSequence_Desc *desc, + unsigned long tp_flags); + +extern int _PyStructSequence_InitBuiltinWithFlags( + PyInterpreterState *interp, + PyTypeObject *type, + PyStructSequence_Desc *desc, + unsigned long tp_flags); + +static inline int +_PyStructSequence_InitBuiltin(PyInterpreterState *interp, + PyTypeObject *type, + PyStructSequence_Desc *desc) +{ + return _PyStructSequence_InitBuiltinWithFlags(interp, type, desc, 0); +} + +extern void _PyStructSequence_FiniBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_STRUCTSEQ_H */ diff --git a/Include/internal/pycore_symtable.h b/Include/internal/pycore_symtable.h new file mode 100644 index 0000000000000000000000000000000000000000..90252bf8365443774f7c40cc523662f50c7e2d39 --- /dev/null +++ b/Include/internal/pycore_symtable.h @@ -0,0 +1,204 @@ +#ifndef Py_INTERNAL_SYMTABLE_H +#define Py_INTERNAL_SYMTABLE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +struct _mod; // Type defined in pycore_ast.h + +typedef enum _block_type { + FunctionBlock, ClassBlock, ModuleBlock, + // Used for annotations if 'from __future__ import annotations' is active. + // Annotation blocks cannot bind names and are not evaluated. + AnnotationBlock, + + // The following blocks are used for generics and type aliases. These work + // mostly like functions (see PEP 695 for details). The three different + // blocks function identically; they are different enum entries only so + // that error messages can be more precise. + + // The block to enter when processing a "type" (PEP 695) construction, + // e.g., "type MyGeneric[T] = list[T]". + TypeAliasBlock, + // The block to enter when processing a "generic" (PEP 695) object, + // e.g., "def foo[T](): pass" or "class A[T]: pass". + TypeParametersBlock, + // The block to enter when processing the bound, the constraint tuple + // or the default value of a single "type variable" in the formal sense, + // i.e., a TypeVar, a TypeVarTuple or a ParamSpec object (the latter two + // do not support a bound or a constraint tuple). + TypeVariableBlock, +} _Py_block_ty; + +typedef enum _comprehension_type { + NoComprehension = 0, + ListComprehension = 1, + DictComprehension = 2, + SetComprehension = 3, + GeneratorExpression = 4 } _Py_comprehension_ty; + +/* source location information */ +typedef struct { + int lineno; + int end_lineno; + int col_offset; + int end_col_offset; +} _Py_SourceLocation; + +#define SRC_LOCATION_FROM_AST(n) \ + (_Py_SourceLocation){ \ + .lineno = (n)->lineno, \ + .end_lineno = (n)->end_lineno, \ + .col_offset = (n)->col_offset, \ + .end_col_offset = (n)->end_col_offset } + +static const _Py_SourceLocation NO_LOCATION = {-1, -1, -1, -1}; + +/* __future__ information */ +typedef struct { + int ff_features; /* flags set by future statements */ + _Py_SourceLocation ff_location; /* location of last future statement */ +} _PyFutureFeatures; + +struct _symtable_entry; + +struct symtable { + PyObject *st_filename; /* name of file being compiled, + decoded from the filesystem encoding */ + struct _symtable_entry *st_cur; /* current symbol table entry */ + struct _symtable_entry *st_top; /* symbol table entry for module */ + PyObject *st_blocks; /* dict: map AST node addresses + * to symbol table entries */ + PyObject *st_stack; /* list: stack of namespace info */ + PyObject *st_global; /* borrowed ref to st_top->ste_symbols */ + int st_nblocks; /* number of blocks used. kept for + consistency with the corresponding + compiler structure */ + PyObject *st_private; /* name of current class or NULL */ + _PyFutureFeatures *st_future; /* module's future features that affect + the symbol table */ + int recursion_depth; /* current recursion depth */ + int recursion_limit; /* recursion limit */ +}; + +typedef struct _symtable_entry { + PyObject_HEAD + PyObject *ste_id; /* int: key in ste_table->st_blocks */ + PyObject *ste_symbols; /* dict: variable names to flags */ + PyObject *ste_name; /* string: name of current block */ + PyObject *ste_varnames; /* list of function parameters */ + PyObject *ste_children; /* list of child blocks */ + PyObject *ste_directives;/* locations of global and nonlocal statements */ + PyObject *ste_mangled_names; /* set of names for which mangling should be applied */ + + _Py_block_ty ste_type; + // Optional string set by symtable.c and used when reporting errors. + // The content of that string is a description of the current "context". + // + // For instance, if we are processing the default value of the type + // variable "T" in "def foo[T = int](): pass", `ste_scope_info` is + // set to "a TypeVar default". + const char *ste_scope_info; + + int ste_nested; /* true if block is nested */ + unsigned ste_free : 1; /* true if block has free variables */ + unsigned ste_child_free : 1; /* true if a child block has free vars, + including free refs to globals */ + unsigned ste_generator : 1; /* true if namespace is a generator */ + unsigned ste_coroutine : 1; /* true if namespace is a coroutine */ + _Py_comprehension_ty ste_comprehension; /* Kind of comprehension (if any) */ + unsigned ste_varargs : 1; /* true if block has varargs */ + unsigned ste_varkeywords : 1; /* true if block has varkeywords */ + unsigned ste_returns_value : 1; /* true if namespace uses return with + an argument */ + unsigned ste_needs_class_closure : 1; /* for class scopes, true if a + closure over __class__ + should be created */ + unsigned ste_needs_classdict : 1; /* for class scopes, true if a closure + over the class dict should be created */ + unsigned ste_comp_inlined : 1; /* true if this comprehension is inlined */ + unsigned ste_comp_iter_target : 1; /* true if visiting comprehension target */ + unsigned ste_can_see_class_scope : 1; /* true if this block can see names bound in an + enclosing class scope */ + int ste_comp_iter_expr; /* non-zero if visiting a comprehension range expression */ + int ste_lineno; /* first line of block */ + int ste_col_offset; /* offset of first line of block */ + int ste_end_lineno; /* end line of block */ + int ste_end_col_offset; /* end offset of first line of block */ + int ste_opt_lineno; /* lineno of last exec or import * */ + int ste_opt_col_offset; /* offset of last exec or import * */ + struct symtable *ste_table; +} PySTEntryObject; + +extern PyTypeObject PySTEntry_Type; + +#define PySTEntry_Check(op) Py_IS_TYPE((op), &PySTEntry_Type) + +extern long _PyST_GetSymbol(PySTEntryObject *, PyObject *); +extern int _PyST_GetScope(PySTEntryObject *, PyObject *); +extern int _PyST_IsFunctionLike(PySTEntryObject *); + +extern struct symtable* _PySymtable_Build( + struct _mod *mod, + PyObject *filename, + _PyFutureFeatures *future); +extern PySTEntryObject* _PySymtable_Lookup(struct symtable *, void *); + +extern void _PySymtable_Free(struct symtable *); + +extern PyObject *_Py_MaybeMangle(PyObject *privateobj, PySTEntryObject *ste, PyObject *name); +extern PyObject* _Py_Mangle(PyObject *p, PyObject *name); + +/* Flags for def-use information */ + +#define DEF_GLOBAL 1 /* global stmt */ +#define DEF_LOCAL 2 /* assignment in code block */ +#define DEF_PARAM (2<<1) /* formal parameter */ +#define DEF_NONLOCAL (2<<2) /* nonlocal stmt */ +#define USE (2<<3) /* name is used */ +#define DEF_FREE (2<<4) /* name used but not defined in nested block */ +#define DEF_FREE_CLASS (2<<5) /* free variable from class's method */ +#define DEF_IMPORT (2<<6) /* assignment occurred via import */ +#define DEF_ANNOT (2<<7) /* this name is annotated */ +#define DEF_COMP_ITER (2<<8) /* this name is a comprehension iteration variable */ +#define DEF_TYPE_PARAM (2<<9) /* this name is a type parameter */ +#define DEF_COMP_CELL (2<<10) /* this name is a cell in an inlined comprehension */ + +#define DEF_BOUND (DEF_LOCAL | DEF_PARAM | DEF_IMPORT) + +/* GLOBAL_EXPLICIT and GLOBAL_IMPLICIT are used internally by the symbol + table. GLOBAL is returned from PyST_GetScope() for either of them. + It is stored in ste_symbols at bits 13-16. +*/ +#define SCOPE_OFFSET 12 +#define SCOPE_MASK (DEF_GLOBAL | DEF_LOCAL | DEF_PARAM | DEF_NONLOCAL) + +#define LOCAL 1 +#define GLOBAL_EXPLICIT 2 +#define GLOBAL_IMPLICIT 3 +#define FREE 4 +#define CELL 5 + +#define GENERATOR 1 +#define GENERATOR_EXPRESSION 2 + +// Used by symtablemodule.c +extern struct symtable* _Py_SymtableStringObjectFlags( + const char *str, + PyObject *filename, + int start, + PyCompilerFlags *flags); + +int _PyFuture_FromAST( + struct _mod * mod, + PyObject *filename, + _PyFutureFeatures* futures); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_SYMTABLE_H */ diff --git a/Include/internal/pycore_sysmodule.h b/Include/internal/pycore_sysmodule.h new file mode 100644 index 0000000000000000000000000000000000000000..6df574487bcd1b28b05913403dd9a05a172354ca --- /dev/null +++ b/Include/internal/pycore_sysmodule.h @@ -0,0 +1,38 @@ +#ifndef Py_INTERNAL_SYSMODULE_H +#define Py_INTERNAL_SYSMODULE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +PyAPI_FUNC(PyObject *) _PySys_GetAttr(PyThreadState *, PyObject *); /* unused */ +PyAPI_FUNC(int) _PySys_GetOptionalAttr(PyObject *, PyObject **); +PyAPI_FUNC(int) _PySys_GetOptionalAttrString(const char *, PyObject **); +PyAPI_FUNC(PyObject *) _PySys_GetRequiredAttr(PyObject *); +PyAPI_FUNC(PyObject *) _PySys_GetRequiredAttrString(const char *); + +// Export for '_pickle' shared extension +PyAPI_FUNC(size_t) _PySys_GetSizeOf(PyObject *); + +extern int _PySys_Audit( + PyThreadState *tstate, + const char *event, + const char *argFormat, + ...); + +// _PySys_ClearAuditHooks() must not be exported: use extern rather than +// PyAPI_FUNC(). We want minimal exposure of this function. +extern void _PySys_ClearAuditHooks(PyThreadState *tstate); + +extern int _PySys_SetAttr(PyObject *, PyObject *); + +extern int _PySys_ClearAttrString(PyInterpreterState *interp, + const char *name, int verbose); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_SYSMODULE_H */ diff --git a/Include/internal/pycore_time.h b/Include/internal/pycore_time.h new file mode 100644 index 0000000000000000000000000000000000000000..205ac5d3781ddd6c6b70ae4f86c8901a1b064737 --- /dev/null +++ b/Include/internal/pycore_time.h @@ -0,0 +1,337 @@ +// Internal PyTime_t C API: see Doc/c-api/time.rst for the documentation. +// +// The PyTime_t type is an integer to support directly common arithmetic +// operations such as t1 + t2. +// +// Time formats: +// +// * Seconds. +// * Seconds as a floating-point number (C double). +// * Milliseconds (10^-3 seconds). +// * Microseconds (10^-6 seconds). +// * 100 nanoseconds (10^-7 seconds), used on Windows. +// * Nanoseconds (10^-9 seconds). +// * timeval structure, 1 microsecond (10^-6 seconds). +// * timespec structure, 1 nanosecond (10^-9 seconds). +// +// Note that PyTime_t is now specified as int64_t, in nanoseconds. +// (If we need to change this, we'll need new public API with new names.) +// Previously, PyTime_t was configurable (in theory); some comments and code +// might still allude to that. +// +// Integer overflows are detected and raise OverflowError. Conversion to a +// resolution larger than 1 nanosecond is rounded correctly with the requested +// rounding mode. Available rounding modes: +// +// * Round towards minus infinity (-inf). For example, used to read a clock. +// * Round towards infinity (+inf). For example, used for timeout to wait "at +// least" N seconds. +// * Round to nearest with ties going to nearest even integer. For example, used +// to round from a Python float. +// * Round away from zero. For example, used for timeout. +// +// Some functions clamp the result in the range [PyTime_MIN; PyTime_MAX]. The +// caller doesn't have to handle errors and so doesn't need to hold the GIL to +// handle exceptions. For example, _PyTime_Add(t1, t2) computes t1+t2 and +// clamps the result on overflow. +// +// Clocks: +// +// * System clock +// * Monotonic clock +// * Performance counter +// +// Internally, operations like (t * k / q) with integers are implemented in a +// way to reduce the risk of integer overflow. Such operation is used to convert a +// clock value expressed in ticks with a frequency to PyTime_t, like +// QueryPerformanceCounter() with QueryPerformanceFrequency() on Windows. + + +#ifndef Py_INTERNAL_TIME_H +#define Py_INTERNAL_TIME_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + + +#ifdef __clang__ +struct timeval; +#endif + +#define _SIZEOF_PYTIME_T 8 + +typedef enum { + // Round towards minus infinity (-inf). + // For example, used to read a clock. + _PyTime_ROUND_FLOOR=0, + + // Round towards infinity (+inf). + // For example, used for timeout to wait "at least" N seconds. + _PyTime_ROUND_CEILING=1, + + // Round to nearest with ties going to nearest even integer. + // For example, used to round from a Python float. + _PyTime_ROUND_HALF_EVEN=2, + + // Round away from zero + // For example, used for timeout. _PyTime_ROUND_CEILING rounds + // -1e-9 to 0 milliseconds which causes bpo-31786 issue. + // _PyTime_ROUND_UP rounds -1e-9 to -1 millisecond which keeps + // the timeout sign as expected. select.poll(timeout) must block + // for negative values. + _PyTime_ROUND_UP=3, + + // _PyTime_ROUND_TIMEOUT (an alias for _PyTime_ROUND_UP) should be + // used for timeouts. + _PyTime_ROUND_TIMEOUT = _PyTime_ROUND_UP +} _PyTime_round_t; + + +// Convert a time_t to a PyLong. +// Export for '_testinternalcapi' shared extension +PyAPI_FUNC(PyObject*) _PyLong_FromTime_t(time_t sec); + +// Convert a PyLong to a time_t. +// Export for '_datetime' shared extension +PyAPI_FUNC(time_t) _PyLong_AsTime_t(PyObject *obj); + +// Convert a number of seconds, int or float, to time_t. +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyTime_ObjectToTime_t( + PyObject *obj, + time_t *sec, + _PyTime_round_t); + +// Convert a number of seconds, int or float, to a timeval structure. +// usec is in the range [0; 999999] and rounded towards zero. +// For example, -1.2 is converted to (-2, 800000). +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyTime_ObjectToTimeval( + PyObject *obj, + time_t *sec, + long *usec, + _PyTime_round_t); + +// Convert a number of seconds, int or float, to a timespec structure. +// nsec is in the range [0; 999999999] and rounded towards zero. +// For example, -1.2 is converted to (-2, 800000000). +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(int) _PyTime_ObjectToTimespec( + PyObject *obj, + time_t *sec, + long *nsec, + _PyTime_round_t); + + +// Create a timestamp from a number of seconds. +// Export for '_socket' shared extension. +PyAPI_FUNC(PyTime_t) _PyTime_FromSeconds(int seconds); + +// Create a timestamp from a number of seconds in double. +extern int _PyTime_FromSecondsDouble( + double seconds, + _PyTime_round_t round, + PyTime_t *result); + +// Macro to create a timestamp from a number of seconds, no integer overflow. +// Only use the macro for small values, prefer _PyTime_FromSeconds(). +#define _PYTIME_FROMSECONDS(seconds) \ + ((PyTime_t)(seconds) * (1000 * 1000 * 1000)) + +// Create a timestamp from a number of microseconds. +// Clamp to [PyTime_MIN; PyTime_MAX] on overflow. +extern PyTime_t _PyTime_FromMicrosecondsClamp(PyTime_t us); + +// Create a timestamp from a Python int object (number of nanoseconds). +// Export for '_lsprof' shared extension. +PyAPI_FUNC(int) _PyTime_FromLong(PyTime_t *t, + PyObject *obj); + +// Convert a number of seconds (Python float or int) to a timestamp. +// Raise an exception and return -1 on error, return 0 on success. +// Export for '_socket' shared extension. +PyAPI_FUNC(int) _PyTime_FromSecondsObject(PyTime_t *t, + PyObject *obj, + _PyTime_round_t round); + +// Convert a number of milliseconds (Python float or int, 10^-3) to a timestamp. +// Raise an exception and return -1 on error, return 0 on success. +// Export for 'select' shared extension. +PyAPI_FUNC(int) _PyTime_FromMillisecondsObject(PyTime_t *t, + PyObject *obj, + _PyTime_round_t round); + +// Convert timestamp to a number of milliseconds (10^-3 seconds). +// Export for '_ssl' shared extension. +PyAPI_FUNC(PyTime_t) _PyTime_AsMilliseconds(PyTime_t t, + _PyTime_round_t round); + +// Convert timestamp to a number of microseconds (10^-6 seconds). +// Export for '_queue' shared extension. +PyAPI_FUNC(PyTime_t) _PyTime_AsMicroseconds(PyTime_t t, + _PyTime_round_t round); + +#ifdef MS_WINDOWS +// Convert timestamp to a number of 100 nanoseconds (10^-7 seconds). +extern PyTime_t _PyTime_As100Nanoseconds(PyTime_t t, + _PyTime_round_t round); +#endif + +// Convert a timestamp (number of nanoseconds) as a Python int object. +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(PyObject*) _PyTime_AsLong(PyTime_t t); + +#ifndef MS_WINDOWS +// Create a timestamp from a timeval structure. +// Raise an exception and return -1 on overflow, return 0 on success. +extern int _PyTime_FromTimeval(PyTime_t *tp, struct timeval *tv); +#endif + +// Convert a timestamp to a timeval structure (microsecond resolution). +// tv_usec is always positive. +// Raise an exception and return -1 if the conversion overflowed, +// return 0 on success. +// Export for 'select' shared extension. +PyAPI_FUNC(int) _PyTime_AsTimeval(PyTime_t t, + struct timeval *tv, + _PyTime_round_t round); + +// Similar to _PyTime_AsTimeval() but don't raise an exception on overflow. +// On overflow, clamp tv_sec to PyTime_t min/max. +// Export for 'select' shared extension. +PyAPI_FUNC(void) _PyTime_AsTimeval_clamp(PyTime_t t, + struct timeval *tv, + _PyTime_round_t round); + +// Convert a timestamp to a number of seconds (secs) and microseconds (us). +// us is always positive. This function is similar to _PyTime_AsTimeval() +// except that secs is always a time_t type, whereas the timeval structure +// uses a C long for tv_sec on Windows. +// Raise an exception and return -1 if the conversion overflowed, +// return 0 on success. +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyTime_AsTimevalTime_t( + PyTime_t t, + time_t *secs, + int *us, + _PyTime_round_t round); + +#if defined(HAVE_CLOCK_GETTIME) || defined(HAVE_KQUEUE) +// Create a timestamp from a timespec structure. +// Raise an exception and return -1 on overflow, return 0 on success. +extern int _PyTime_FromTimespec(PyTime_t *tp, const struct timespec *ts); + +// Convert a timestamp to a timespec structure (nanosecond resolution). +// tv_nsec is always positive. +// Raise an exception and return -1 on error, return 0 on success. +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(int) _PyTime_AsTimespec(PyTime_t t, struct timespec *ts); + +// Similar to _PyTime_AsTimespec() but don't raise an exception on overflow. +// On overflow, clamp tv_sec to PyTime_t min/max. +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(void) _PyTime_AsTimespec_clamp(PyTime_t t, struct timespec *ts); +#endif + + +// Compute t1 + t2. Clamp to [PyTime_MIN; PyTime_MAX] on overflow. +extern PyTime_t _PyTime_Add(PyTime_t t1, PyTime_t t2); + +// Structure used by time.get_clock_info() +typedef struct { + const char *implementation; + int monotonic; + int adjustable; + double resolution; +} _Py_clock_info_t; + +// Get the current time from the system clock. +// On success, set *t and *info (if not NULL), and return 0. +// On error, raise an exception and return -1. +extern int _PyTime_TimeWithInfo( + PyTime_t *t, + _Py_clock_info_t *info); + +// Get the time of a monotonic clock, i.e. a clock that cannot go backwards. +// The clock is not affected by system clock updates. The reference point of +// the returned value is undefined, so that only the difference between the +// results of consecutive calls is valid. +// +// Fill info (if set) with information of the function used to get the time. +// +// Return 0 on success, raise an exception and return -1 on error. +// Export for '_testsinglephase' shared extension. +PyAPI_FUNC(int) _PyTime_MonotonicWithInfo( + PyTime_t *t, + _Py_clock_info_t *info); + + +// Converts a timestamp to the Gregorian time, using the local time zone. +// Return 0 on success, raise an exception and return -1 on error. +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyTime_localtime(time_t t, struct tm *tm); + +// Converts a timestamp to the Gregorian time, assuming UTC. +// Return 0 on success, raise an exception and return -1 on error. +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyTime_gmtime(time_t t, struct tm *tm); + + +// Get the performance counter: clock with the highest available resolution to +// measure a short duration. +// +// Fill info (if set) with information of the function used to get the time. +// +// Return 0 on success, raise an exception and return -1 on error. +extern int _PyTime_PerfCounterWithInfo( + PyTime_t *t, + _Py_clock_info_t *info); + + +// --- _PyDeadline ----------------------------------------------------------- + +// Create a deadline. +// Pseudo code: return PyTime_MonotonicRaw() + timeout +// Export for '_ssl' shared extension. +PyAPI_FUNC(PyTime_t) _PyDeadline_Init(PyTime_t timeout); + +// Get remaining time from a deadline. +// Pseudo code: return deadline - PyTime_MonotonicRaw() +// Export for '_ssl' shared extension. +PyAPI_FUNC(PyTime_t) _PyDeadline_Get(PyTime_t deadline); + + +// --- _PyTimeFraction ------------------------------------------------------- + +typedef struct { + PyTime_t numer; + PyTime_t denom; +} _PyTimeFraction; + +// Set a fraction. +// Return 0 on success. +// Return -1 if the fraction is invalid. +extern int _PyTimeFraction_Set( + _PyTimeFraction *frac, + PyTime_t numer, + PyTime_t denom); + +// Compute ticks * frac.numer / frac.denom. +// Clamp to [PyTime_MIN; PyTime_MAX] on overflow. +extern PyTime_t _PyTimeFraction_Mul( + PyTime_t ticks, + const _PyTimeFraction *frac); + +// Compute a clock resolution: frac.numer / frac.denom / 1e9. +extern double _PyTimeFraction_Resolution( + const _PyTimeFraction *frac); + + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_TIME_H diff --git a/Include/internal/pycore_token.h b/Include/internal/pycore_token.h new file mode 100644 index 0000000000000000000000000000000000000000..571cd6249f28126a573667ea41b0d52138aa9cff --- /dev/null +++ b/Include/internal/pycore_token.h @@ -0,0 +1,106 @@ +// Auto-generated by Tools/build/generate_token.py + +/* Token types */ +#ifndef Py_INTERNAL_TOKEN_H +#define Py_INTERNAL_TOKEN_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#undef TILDE /* Prevent clash of our definition with system macro. Ex AIX, ioctl.h */ + +#define ENDMARKER 0 +#define NAME 1 +#define NUMBER 2 +#define STRING 3 +#define NEWLINE 4 +#define INDENT 5 +#define DEDENT 6 +#define LPAR 7 +#define RPAR 8 +#define LSQB 9 +#define RSQB 10 +#define COLON 11 +#define COMMA 12 +#define SEMI 13 +#define PLUS 14 +#define MINUS 15 +#define STAR 16 +#define SLASH 17 +#define VBAR 18 +#define AMPER 19 +#define LESS 20 +#define GREATER 21 +#define EQUAL 22 +#define DOT 23 +#define PERCENT 24 +#define LBRACE 25 +#define RBRACE 26 +#define EQEQUAL 27 +#define NOTEQUAL 28 +#define LESSEQUAL 29 +#define GREATEREQUAL 30 +#define TILDE 31 +#define CIRCUMFLEX 32 +#define LEFTSHIFT 33 +#define RIGHTSHIFT 34 +#define DOUBLESTAR 35 +#define PLUSEQUAL 36 +#define MINEQUAL 37 +#define STAREQUAL 38 +#define SLASHEQUAL 39 +#define PERCENTEQUAL 40 +#define AMPEREQUAL 41 +#define VBAREQUAL 42 +#define CIRCUMFLEXEQUAL 43 +#define LEFTSHIFTEQUAL 44 +#define RIGHTSHIFTEQUAL 45 +#define DOUBLESTAREQUAL 46 +#define DOUBLESLASH 47 +#define DOUBLESLASHEQUAL 48 +#define AT 49 +#define ATEQUAL 50 +#define RARROW 51 +#define ELLIPSIS 52 +#define COLONEQUAL 53 +#define EXCLAMATION 54 +#define OP 55 +#define TYPE_IGNORE 56 +#define TYPE_COMMENT 57 +#define SOFT_KEYWORD 58 +#define FSTRING_START 59 +#define FSTRING_MIDDLE 60 +#define FSTRING_END 61 +#define COMMENT 62 +#define NL 63 +#define ERRORTOKEN 64 +#define N_TOKENS 66 +#define NT_OFFSET 256 + +/* Special definitions for cooperation with parser */ + +#define ISTERMINAL(x) ((x) < NT_OFFSET) +#define ISNONTERMINAL(x) ((x) >= NT_OFFSET) +#define ISEOF(x) ((x) == ENDMARKER) +#define ISWHITESPACE(x) ((x) == ENDMARKER || \ + (x) == NEWLINE || \ + (x) == INDENT || \ + (x) == DEDENT) +#define ISSTRINGLIT(x) ((x) == STRING || \ + (x) == FSTRING_MIDDLE) + + +// Export these 4 symbols for 'test_peg_generator' +PyAPI_DATA(const char * const) _PyParser_TokenNames[]; /* Token names */ +PyAPI_FUNC(int) _PyToken_OneChar(int); +PyAPI_FUNC(int) _PyToken_TwoChars(int, int); +PyAPI_FUNC(int) _PyToken_ThreeChars(int, int, int); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_TOKEN_H diff --git a/Include/internal/pycore_traceback.h b/Include/internal/pycore_traceback.h new file mode 100644 index 0000000000000000000000000000000000000000..10922bff98bd4bc0060189833d05b59ac4dc649f --- /dev/null +++ b/Include/internal/pycore_traceback.h @@ -0,0 +1,106 @@ +#ifndef Py_INTERNAL_TRACEBACK_H +#define Py_INTERNAL_TRACEBACK_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// Export for '_ctypes' shared extension +PyAPI_FUNC(int) _Py_DisplaySourceLine(PyObject *, PyObject *, int, int, int *, PyObject **); + +// Export for 'pyexact' shared extension +PyAPI_FUNC(void) _PyTraceback_Add(const char *, const char *, int); + +/* Write the Python traceback into the file 'fd'. For example: + + Traceback (most recent call first): + File "xxx", line xxx in + File "xxx", line xxx in + ... + File "xxx", line xxx in + + This function is written for debug purpose only, to dump the traceback in + the worst case: after a segmentation fault, at fatal error, etc. That's why, + it is very limited. Strings are truncated to 100 characters and encoded to + ASCII with backslashreplace. It doesn't write the source code, only the + function name, filename and line number of each frame. Write only the first + 100 frames: if the traceback is truncated, write the line " ...". + + This function is signal safe. */ + +extern void _Py_DumpTraceback( + int fd, + PyThreadState *tstate); + +/* Write the traceback of all threads into the file 'fd'. current_thread can be + NULL. + + Return NULL on success, or an error message on error. + + This function is written for debug purpose only. It calls + _Py_DumpTraceback() for each thread, and so has the same limitations. It + only write the traceback of the first 100 threads: write "..." if there are + more threads. + + If current_tstate is NULL, the function tries to get the Python thread state + of the current thread. It is not an error if the function is unable to get + the current Python thread state. + + If interp is NULL, the function tries to get the interpreter state from + the current Python thread state, or from + _PyGILState_GetInterpreterStateUnsafe() in last resort. + + It is better to pass NULL to interp and current_tstate, the function tries + different options to retrieve this information. + + This function is signal safe. */ + +extern const char* _Py_DumpTracebackThreads( + int fd, + PyInterpreterState *interp, + PyThreadState *current_tstate); + +/* Write a Unicode object into the file descriptor fd. Encode the string to + ASCII using the backslashreplace error handler. + + Do nothing if text is not a Unicode object. The function accepts Unicode + string which is not ready (PyUnicode_WCHAR_KIND). + + This function is signal safe. */ +extern void _Py_DumpASCII(int fd, PyObject *text); + +/* Format an integer as decimal into the file descriptor fd. + + This function is signal safe. */ +extern void _Py_DumpDecimal( + int fd, + size_t value); + +/* Format an integer as hexadecimal with width digits into fd file descriptor. + The function is signal safe. */ +extern void _Py_DumpHexadecimal( + int fd, + uintptr_t value, + Py_ssize_t width); + +extern PyObject* _PyTraceBack_FromFrame( + PyObject *tb_next, + PyFrameObject *frame); + +#define EXCEPTION_TB_HEADER "Traceback (most recent call last):\n" +#define EXCEPTION_GROUP_TB_HEADER "Exception Group Traceback (most recent call last):\n" + +/* Write the traceback tb to file f. Prefix each line with + indent spaces followed by the margin (if it is not NULL). */ +extern int _PyTraceBack_Print( + PyObject *tb, const char *header, PyObject *f); +extern int _Py_WriteIndentedMargin(int, const char*, PyObject *); +extern int _Py_WriteIndent(int, PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TRACEBACK_H */ diff --git a/Include/internal/pycore_tracemalloc.h b/Include/internal/pycore_tracemalloc.h new file mode 100644 index 0000000000000000000000000000000000000000..f70d47074f813c630c4017788a69299d744fbba4 --- /dev/null +++ b/Include/internal/pycore_tracemalloc.h @@ -0,0 +1,170 @@ +#ifndef Py_INTERNAL_TRACEMALLOC_H +#define Py_INTERNAL_TRACEMALLOC_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_hashtable.h" // _Py_hashtable_t + + +/* Trace memory blocks allocated by PyMem_RawMalloc() */ +#define TRACE_RAW_MALLOC + + +struct _PyTraceMalloc_Config { + /* Module initialized? + Variable protected by the GIL */ + enum { + TRACEMALLOC_NOT_INITIALIZED, + TRACEMALLOC_INITIALIZED, + TRACEMALLOC_FINALIZED + } initialized; + + /* Is tracemalloc tracing memory allocations? + Variable protected by the GIL */ + int tracing; + + /* limit of the number of frames in a traceback, 1 by default. + Variable protected by the GIL. */ + int max_nframe; +}; + + +/* Pack the frame_t structure to reduce the memory footprint on 64-bit + architectures: 12 bytes instead of 16. */ +#if defined(_MSC_VER) +#pragma pack(push, 4) +#endif + +struct +#ifdef __GNUC__ +__attribute__((packed)) +#endif +tracemalloc_frame { + /* filename cannot be NULL: "" is used if the Python frame + filename is NULL */ + PyObject *filename; + unsigned int lineno; +}; +#ifdef _MSC_VER +#pragma pack(pop) +#endif + +struct tracemalloc_traceback { + Py_uhash_t hash; + /* Number of frames stored */ + uint16_t nframe; + /* Total number of frames the traceback had */ + uint16_t total_nframe; + struct tracemalloc_frame frames[1]; +}; + + +struct _tracemalloc_runtime_state { + struct _PyTraceMalloc_Config config; + + /* Protected by the GIL */ + struct { + PyMemAllocatorEx mem; + PyMemAllocatorEx raw; + PyMemAllocatorEx obj; + } allocators; + +#if defined(TRACE_RAW_MALLOC) + PyThread_type_lock tables_lock; +#endif + /* Size in bytes of currently traced memory. + Protected by TABLES_LOCK(). */ + size_t traced_memory; + /* Peak size in bytes of traced memory. + Protected by TABLES_LOCK(). */ + size_t peak_traced_memory; + /* Hash table used as a set to intern filenames: + PyObject* => PyObject*. + Protected by the GIL */ + _Py_hashtable_t *filenames; + /* Buffer to store a new traceback in traceback_new(). + Protected by the GIL. */ + struct tracemalloc_traceback *traceback; + /* Hash table used as a set to intern tracebacks: + traceback_t* => traceback_t* + Protected by the GIL */ + _Py_hashtable_t *tracebacks; + /* pointer (void*) => trace (trace_t*). + Protected by TABLES_LOCK(). */ + _Py_hashtable_t *traces; + /* domain (unsigned int) => traces (_Py_hashtable_t). + Protected by TABLES_LOCK(). */ + _Py_hashtable_t *domains; + + struct tracemalloc_traceback empty_traceback; + + Py_tss_t reentrant_key; +}; + +#define _tracemalloc_runtime_state_INIT \ + { \ + .config = { \ + .initialized = TRACEMALLOC_NOT_INITIALIZED, \ + .tracing = 0, \ + .max_nframe = 1, \ + }, \ + .reentrant_key = Py_tss_NEEDS_INIT, \ + } + + +// Get the traceback where a memory block was allocated. +// +// Return a tuple of (filename: str, lineno: int) tuples. +// +// Return None if the tracemalloc module is disabled or if the memory block +// is not tracked by tracemalloc. +// +// Raise an exception and return NULL on error. +// +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(PyObject*) _PyTraceMalloc_GetTraceback( + unsigned int domain, + uintptr_t ptr); + +/* Return non-zero if tracemalloc is tracing */ +extern int _PyTraceMalloc_IsTracing(void); + +/* Clear the tracemalloc traces */ +extern void _PyTraceMalloc_ClearTraces(void); + +/* Clear the tracemalloc traces */ +extern PyObject* _PyTraceMalloc_GetTraces(void); + +/* Clear tracemalloc traceback for an object */ +extern PyObject* _PyTraceMalloc_GetObjectTraceback(PyObject *obj); + +/* Initialize tracemalloc */ +extern PyStatus _PyTraceMalloc_Init(void); + +/* Start tracemalloc */ +extern int _PyTraceMalloc_Start(int max_nframe); + +/* Stop tracemalloc */ +extern void _PyTraceMalloc_Stop(void); + +/* Get the tracemalloc traceback limit */ +extern int _PyTraceMalloc_GetTracebackLimit(void); + +/* Get the memory usage of tracemalloc in bytes */ +extern size_t _PyTraceMalloc_GetMemory(void); + +/* Get the current size and peak size of traced memory blocks as a 2-tuple */ +extern PyObject* _PyTraceMalloc_GetTracedMemory(void); + +/* Set the peak size of traced memory blocks to the current size */ +extern void _PyTraceMalloc_ResetPeak(void); + +#ifdef __cplusplus +} +#endif +#endif // !Py_INTERNAL_TRACEMALLOC_H diff --git a/Include/internal/pycore_tstate.h b/Include/internal/pycore_tstate.h new file mode 100644 index 0000000000000000000000000000000000000000..1ed5b1d826aaa4a4b047017ffc3fc26300f9d12e --- /dev/null +++ b/Include/internal/pycore_tstate.h @@ -0,0 +1,46 @@ +#ifndef Py_INTERNAL_TSTATE_H +#define Py_INTERNAL_TSTATE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_brc.h" // struct _brc_thread_state +#include "pycore_freelist.h" // struct _Py_freelist_state +#include "pycore_mimalloc.h" // struct _mimalloc_thread_state +#include "pycore_qsbr.h" // struct qsbr + + +// Every PyThreadState is actually allocated as a _PyThreadStateImpl. The +// PyThreadState fields are exposed as part of the C API, although most fields +// are intended to be private. The _PyThreadStateImpl fields not exposed. +typedef struct _PyThreadStateImpl { + // semi-public fields are in PyThreadState. + PyThreadState base; + + PyObject *asyncio_running_loop; // Strong reference + + struct _qsbr_thread_state *qsbr; // only used by free-threaded build + struct llist_node mem_free_queue; // delayed free queue + +#ifdef Py_GIL_DISABLED + struct _gc_thread_state gc; + struct _mimalloc_thread_state mimalloc; + struct _Py_object_freelists freelists; + struct _brc_thread_state brc; +#endif + +#if defined(Py_REF_DEBUG) && defined(Py_GIL_DISABLED) + Py_ssize_t reftotal; // this thread's total refcount operations +#endif + +} _PyThreadStateImpl; + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TSTATE_H */ diff --git a/Include/internal/pycore_tuple.h b/Include/internal/pycore_tuple.h new file mode 100644 index 0000000000000000000000000000000000000000..14a9e42c3a324cd73bd0527fe5ba805f7a91d7b0 --- /dev/null +++ b/Include/internal/pycore_tuple.h @@ -0,0 +1,35 @@ +#ifndef Py_INTERNAL_TUPLE_H +#define Py_INTERNAL_TUPLE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern void _PyTuple_MaybeUntrack(PyObject *); +extern void _PyTuple_DebugMallocStats(FILE *out); + +/* runtime lifecycle */ + +extern PyStatus _PyTuple_InitGlobalObjects(PyInterpreterState *); + + +/* other API */ + +#define _PyTuple_ITEMS(op) _Py_RVALUE(_PyTuple_CAST(op)->ob_item) + +extern PyObject *_PyTuple_FromArray(PyObject *const *, Py_ssize_t); +PyAPI_FUNC(PyObject *)_PyTuple_FromArraySteal(PyObject *const *, Py_ssize_t); + +typedef struct { + PyObject_HEAD + Py_ssize_t it_index; + PyTupleObject *it_seq; /* Set to NULL when iterator is exhausted */ +} _PyTupleIterObject; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TUPLE_H */ diff --git a/Include/internal/pycore_typeobject.h b/Include/internal/pycore_typeobject.h new file mode 100644 index 0000000000000000000000000000000000000000..164b243dae7806e2678536181f82d3a03f1df533 --- /dev/null +++ b/Include/internal/pycore_typeobject.h @@ -0,0 +1,245 @@ +#ifndef Py_INTERNAL_TYPEOBJECT_H +#define Py_INTERNAL_TYPEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_moduleobject.h" // PyModuleObject +#include "pycore_lock.h" // PyMutex + + +/* state */ + +#define _Py_TYPE_BASE_VERSION_TAG (2<<16) +#define _Py_MAX_GLOBAL_TYPE_VERSION_TAG (_Py_TYPE_BASE_VERSION_TAG - 1) + +/* For now we hard-code this to a value for which we are confident + all the static builtin types will fit (for all builds). */ +#define _Py_MAX_MANAGED_STATIC_BUILTIN_TYPES 200 +#define _Py_MAX_MANAGED_STATIC_EXT_TYPES 10 +#define _Py_MAX_MANAGED_STATIC_TYPES \ + (_Py_MAX_MANAGED_STATIC_BUILTIN_TYPES + _Py_MAX_MANAGED_STATIC_EXT_TYPES) + +struct _types_runtime_state { + /* Used to set PyTypeObject.tp_version_tag for core static types. */ + // bpo-42745: next_version_tag remains shared by all interpreters + // because of static types. + unsigned int next_version_tag; + + struct { + struct { + PyTypeObject *type; + int64_t interp_count; + } types[_Py_MAX_MANAGED_STATIC_TYPES]; + } managed_static; +}; + + +// Type attribute lookup cache: speed up attribute and method lookups, +// see _PyType_Lookup(). +struct type_cache_entry { + unsigned int version; // initialized from type->tp_version_tag +#ifdef Py_GIL_DISABLED + _PySeqLock sequence; +#endif + PyObject *name; // reference to exactly a str or None + PyObject *value; // borrowed reference or NULL +}; + +#define MCACHE_SIZE_EXP 12 + +struct type_cache { + struct type_cache_entry hashtable[1 << MCACHE_SIZE_EXP]; +}; + +typedef struct { + PyTypeObject *type; + int isbuiltin; + int readying; + int ready; + // XXX tp_dict can probably be statically allocated, + // instead of dynamically and stored on the interpreter. + PyObject *tp_dict; + PyObject *tp_subclasses; + /* We never clean up weakrefs for static builtin types since + they will effectively never get triggered. However, there + are also some diagnostic uses for the list of weakrefs, + so we still keep it. */ + PyObject *tp_weaklist; +} managed_static_type_state; + +struct types_state { + /* Used to set PyTypeObject.tp_version_tag. + It starts at _Py_MAX_GLOBAL_TYPE_VERSION_TAG + 1, + where all those lower numbers are used for core static types. */ + unsigned int next_version_tag; + + struct type_cache type_cache; + + /* Every static builtin type is initialized for each interpreter + during its own initialization, including for the main interpreter + during global runtime initialization. This is done by calling + _PyStaticType_InitBuiltin(). + + The first time a static builtin type is initialized, all the + normal PyType_Ready() stuff happens. The only difference from + normal is that there are three PyTypeObject fields holding + objects which are stored here (on PyInterpreterState) rather + than in the corresponding PyTypeObject fields. Those are: + tp_dict (cls.__dict__), tp_subclasses (cls.__subclasses__), + and tp_weaklist. + + When a subinterpreter is initialized, each static builtin type + is still initialized, but only the interpreter-specific portion, + namely those three objects. + + Those objects are stored in the PyInterpreterState.types.builtins + array, at the index corresponding to each specific static builtin + type. That index (a size_t value) is stored in the tp_subclasses + field. For static builtin types, we re-purposed the now-unused + tp_subclasses to avoid adding another field to PyTypeObject. + In all other cases tp_subclasses holds a dict like before. + (The field was previously defined as PyObject*, but is now void* + to reflect its dual use.) + + The index for each static builtin type isn't statically assigned. + Instead it is calculated the first time a type is initialized + (by the main interpreter). The index matches the order in which + the type was initialized relative to the others. The actual + value comes from the current value of num_builtins_initialized, + as each type is initialized for the main interpreter. + + num_builtins_initialized is incremented once for each static + builtin type. Once initialization is over for a subinterpreter, + the value will be the same as for all other interpreters. */ + struct { + size_t num_initialized; + managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_BUILTIN_TYPES]; + } builtins; + /* We apply a similar strategy for managed extension modules. */ + struct { + size_t num_initialized; + size_t next_index; + managed_static_type_state initialized[_Py_MAX_MANAGED_STATIC_EXT_TYPES]; + } for_extensions; + PyMutex mutex; +}; + + +/* runtime lifecycle */ + +extern PyStatus _PyTypes_InitTypes(PyInterpreterState *); +extern void _PyTypes_FiniTypes(PyInterpreterState *); +extern void _PyTypes_FiniExtTypes(PyInterpreterState *interp); +extern void _PyTypes_Fini(PyInterpreterState *); +extern void _PyTypes_AfterFork(void); + +/* other API */ + +/* Length of array of slotdef pointers used to store slots with the + same __name__. There should be at most MAX_EQUIV-1 slotdef entries with + the same __name__, for any __name__. Since that's a static property, it is + appropriate to declare fixed-size arrays for this. */ +#define MAX_EQUIV 10 + +typedef struct wrapperbase pytype_slotdef; + + +static inline PyObject ** +_PyStaticType_GET_WEAKREFS_LISTPTR(managed_static_type_state *state) +{ + assert(state != NULL); + return &state->tp_weaklist; +} + +extern int _PyStaticType_InitBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); +extern void _PyStaticType_FiniBuiltin( + PyInterpreterState *interp, + PyTypeObject *type); +extern void _PyStaticType_ClearWeakRefs( + PyInterpreterState *interp, + PyTypeObject *type); +extern managed_static_type_state * _PyStaticType_GetState( + PyInterpreterState *interp, + PyTypeObject *type); + +// Export for '_datetime' shared extension. +PyAPI_FUNC(int) _PyStaticType_InitForExtension( + PyInterpreterState *interp, + PyTypeObject *self); + + +/* Like PyType_GetModuleState, but skips verification + * that type is a heap type with an associated module */ +static inline void * +_PyType_GetModuleState(PyTypeObject *type) +{ + assert(PyType_Check(type)); + assert(type->tp_flags & Py_TPFLAGS_HEAPTYPE); + PyHeapTypeObject *et = (PyHeapTypeObject *)type; + assert(et->ht_module); + PyModuleObject *mod = (PyModuleObject *)(et->ht_module); + assert(mod != NULL); + return mod->md_state; +} + + +// Export for 'math' shared extension, used via _PyType_IsReady() static inline +// function +PyAPI_FUNC(PyObject *) _PyType_GetDict(PyTypeObject *); + +extern PyObject * _PyType_GetBases(PyTypeObject *type); +extern PyObject * _PyType_GetMRO(PyTypeObject *type); +extern PyObject* _PyType_GetSubclasses(PyTypeObject *); +extern int _PyType_HasSubclasses(PyTypeObject *); +PyAPI_FUNC(PyObject *) _PyType_GetModuleByDef2(PyTypeObject *, PyTypeObject *, PyModuleDef *); +PyAPI_FUNC(PyObject *) _PyType_GetModuleByDef3(PyTypeObject *, PyTypeObject *, PyTypeObject *, PyModuleDef *); + +// PyType_Ready() must be called if _PyType_IsReady() is false. +// See also the Py_TPFLAGS_READY flag. +static inline int +_PyType_IsReady(PyTypeObject *type) +{ + return _PyType_GetDict(type) != NULL; +} + +extern PyObject* _Py_type_getattro_impl(PyTypeObject *type, PyObject *name, + int *suppress_missing_attribute); +extern PyObject* _Py_type_getattro(PyObject *type, PyObject *name); + +extern PyObject* _Py_BaseObject_RichCompare(PyObject* self, PyObject* other, int op); + +extern PyObject* _Py_slot_tp_getattro(PyObject *self, PyObject *name); +extern PyObject* _Py_slot_tp_getattr_hook(PyObject *self, PyObject *name); + +extern PyTypeObject _PyBufferWrapper_Type; + +PyAPI_FUNC(PyObject*) _PySuper_Lookup(PyTypeObject *su_type, PyObject *su_obj, + PyObject *name, int *meth_found); + +extern PyObject* _PyType_GetFullyQualifiedName(PyTypeObject *type, char sep); + +// Perform the following operation, in a thread-safe way when required by the +// build mode. +// +// self->tp_flags = (self->tp_flags & ~mask) | flags; +extern void _PyType_SetFlags(PyTypeObject *self, unsigned long mask, + unsigned long flags); +extern int _PyType_AddMethod(PyTypeObject *, PyMethodDef *); + +// Like _PyType_SetFlags(), but apply the operation to self and any of its +// subclasses without Py_TPFLAGS_IMMUTABLETYPE set. +extern void _PyType_SetFlagsRecursive(PyTypeObject *self, unsigned long mask, + unsigned long flags); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TYPEOBJECT_H */ diff --git a/Include/internal/pycore_typevarobject.h b/Include/internal/pycore_typevarobject.h new file mode 100644 index 0000000000000000000000000000000000000000..a368edebd622a163b3bc9de07bff1e219fd85d29 --- /dev/null +++ b/Include/internal/pycore_typevarobject.h @@ -0,0 +1,27 @@ +#ifndef Py_INTERNAL_TYPEVAROBJECT_H +#define Py_INTERNAL_TYPEVAROBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +extern PyObject *_Py_make_typevar(PyObject *, PyObject *, PyObject *); +extern PyObject *_Py_make_paramspec(PyThreadState *, PyObject *); +extern PyObject *_Py_make_typevartuple(PyThreadState *, PyObject *); +extern PyObject *_Py_make_typealias(PyThreadState *, PyObject *); +extern PyObject *_Py_subscript_generic(PyThreadState *, PyObject *); +extern PyObject *_Py_set_typeparam_default(PyThreadState *, PyObject *, PyObject *); +extern int _Py_initialize_generic(PyInterpreterState *); +extern void _Py_clear_generic_types(PyInterpreterState *); + +extern PyTypeObject _PyTypeAlias_Type; +extern PyTypeObject _PyNoDefault_Type; +extern PyObject _Py_NoDefaultStruct; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_TYPEVAROBJECT_H */ diff --git a/Include/internal/pycore_ucnhash.h b/Include/internal/pycore_ucnhash.h new file mode 100644 index 0000000000000000000000000000000000000000..1561dfbb3150d3fe3d61898c51701ef2831e8e2f --- /dev/null +++ b/Include/internal/pycore_ucnhash.h @@ -0,0 +1,36 @@ +/* Unicode name database interface */ +#ifndef Py_INTERNAL_UCNHASH_H +#define Py_INTERNAL_UCNHASH_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* revised ucnhash CAPI interface (exported through a "wrapper") */ + +#define PyUnicodeData_CAPSULE_NAME "unicodedata._ucnhash_CAPI" + +typedef struct { + + /* Get name for a given character code. + Returns non-zero if success, zero if not. + Does not set Python exceptions. */ + int (*getname)(Py_UCS4 code, char* buffer, int buflen, + int with_alias_and_seq); + + /* Get character code for a given name. + Same error handling as for getname(). */ + int (*getcode)(const char* name, int namelen, Py_UCS4* code, + int with_named_seq); + +} _PyUnicode_Name_CAPI; + +extern _PyUnicode_Name_CAPI* _PyUnicode_GetNameCAPI(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UCNHASH_H */ diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h new file mode 100644 index 0000000000000000000000000000000000000000..5ebc7c120fc29dea743e0caea32d0ece54fc9898 --- /dev/null +++ b/Include/internal/pycore_unicodeobject.h @@ -0,0 +1,351 @@ +#ifndef Py_INTERNAL_UNICODEOBJECT_H +#define Py_INTERNAL_UNICODEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_lock.h" // PyMutex +#include "pycore_fileutils.h" // _Py_error_handler +#include "pycore_identifier.h" // _Py_Identifier +#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI +#include "pycore_global_objects.h" // _Py_SINGLETON + +/* --- Characters Type APIs ----------------------------------------------- */ + +extern int _PyUnicode_IsXidStart(Py_UCS4 ch); +extern int _PyUnicode_IsXidContinue(Py_UCS4 ch); +extern int _PyUnicode_ToLowerFull(Py_UCS4 ch, Py_UCS4 *res); +extern int _PyUnicode_ToTitleFull(Py_UCS4 ch, Py_UCS4 *res); +extern int _PyUnicode_ToUpperFull(Py_UCS4 ch, Py_UCS4 *res); +extern int _PyUnicode_ToFoldedFull(Py_UCS4 ch, Py_UCS4 *res); +extern int _PyUnicode_IsCaseIgnorable(Py_UCS4 ch); +extern int _PyUnicode_IsCased(Py_UCS4 ch); + +/* --- Unicode API -------------------------------------------------------- */ + +// Export for '_json' shared extension +PyAPI_FUNC(int) _PyUnicode_CheckConsistency( + PyObject *op, + int check_content); + +PyAPI_FUNC(void) _PyUnicode_ExactDealloc(PyObject *op); +extern Py_ssize_t _PyUnicode_InternedSize(void); +extern Py_ssize_t _PyUnicode_InternedSize_Immortal(void); + +// Get a copy of a Unicode string. +// Export for '_datetime' shared extension. +PyAPI_FUNC(PyObject*) _PyUnicode_Copy( + PyObject *unicode); + +/* Unsafe version of PyUnicode_Fill(): don't check arguments and so may crash + if parameters are invalid (e.g. if length is longer than the string). */ +extern void _PyUnicode_FastFill( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t length, + Py_UCS4 fill_char + ); + +/* Unsafe version of PyUnicode_CopyCharacters(): don't check arguments and so + may crash if parameters are invalid (e.g. if the output string + is too short). */ +extern void _PyUnicode_FastCopyCharacters( + PyObject *to, + Py_ssize_t to_start, + PyObject *from, + Py_ssize_t from_start, + Py_ssize_t how_many + ); + +/* Create a new string from a buffer of ASCII characters. + WARNING: Don't check if the string contains any non-ASCII character. */ +extern PyObject* _PyUnicode_FromASCII( + const char *buffer, + Py_ssize_t size); + +/* Compute the maximum character of the substring unicode[start:end]. + Return 127 for an empty string. */ +extern Py_UCS4 _PyUnicode_FindMaxChar ( + PyObject *unicode, + Py_ssize_t start, + Py_ssize_t end); + +/* --- _PyUnicodeWriter API ----------------------------------------------- */ + +/* Format the object based on the format_spec, as defined in PEP 3101 + (Advanced String Formatting). */ +extern int _PyUnicode_FormatAdvancedWriter( + _PyUnicodeWriter *writer, + PyObject *obj, + PyObject *format_spec, + Py_ssize_t start, + Py_ssize_t end); + +/* --- UTF-7 Codecs ------------------------------------------------------- */ + +extern PyObject* _PyUnicode_EncodeUTF7( + PyObject *unicode, /* Unicode object */ + int base64SetO, /* Encode RFC2152 Set O characters in base64 */ + int base64WhiteSpace, /* Encode whitespace (sp, ht, nl, cr) in base64 */ + const char *errors); /* error handling */ + +/* --- UTF-8 Codecs ------------------------------------------------------- */ + +// Export for '_tkinter' shared extension. +PyAPI_FUNC(PyObject*) _PyUnicode_AsUTF8String( + PyObject *unicode, + const char *errors); + +/* --- UTF-32 Codecs ------------------------------------------------------ */ + +// Export for '_tkinter' shared extension +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF32( + PyObject *object, /* Unicode object */ + const char *errors, /* error handling */ + int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + +/* --- UTF-16 Codecs ------------------------------------------------------ */ + +// Returns a Python string object holding the UTF-16 encoded value of +// the Unicode data. +// +// If byteorder is not 0, output is written according to the following +// byte order: +// +// byteorder == -1: little endian +// byteorder == 0: native byte order (writes a BOM mark) +// byteorder == 1: big endian +// +// If byteorder is 0, the output string will always start with the +// Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is +// prepended. +// +// Export for '_tkinter' shared extension +PyAPI_FUNC(PyObject*) _PyUnicode_EncodeUTF16( + PyObject* unicode, /* Unicode object */ + const char *errors, /* error handling */ + int byteorder); /* byteorder to use 0=BOM+native;-1=LE,1=BE */ + +/* --- Unicode-Escape Codecs ---------------------------------------------- */ + +/* Variant of PyUnicode_DecodeUnicodeEscape that supports partial decoding. */ +extern PyObject* _PyUnicode_DecodeUnicodeEscapeStateful( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed); /* bytes consumed */ + +// Helper for PyUnicode_DecodeUnicodeEscape that detects invalid escape +// chars. +// Export for test_peg_generator. +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal2( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed, /* bytes consumed */ + int *first_invalid_escape_char, /* on return, if not -1, contain the first + invalid escaped char (<= 0xff) or invalid + octal escape (> 0xff) in string. */ + const char **first_invalid_escape_ptr); /* on return, if not NULL, may + point to the first invalid escaped + char in string. + May be NULL if errors is not NULL. */ +// Export for binary compatibility. +PyAPI_FUNC(PyObject*) _PyUnicode_DecodeUnicodeEscapeInternal( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed, /* bytes consumed */ + const char **first_invalid_escape); /* on return, points to first + invalid escaped char in + string. */ + +/* --- Raw-Unicode-Escape Codecs ---------------------------------------------- */ + +/* Variant of PyUnicode_DecodeRawUnicodeEscape that supports partial decoding. */ +extern PyObject* _PyUnicode_DecodeRawUnicodeEscapeStateful( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed); /* bytes consumed */ + +/* --- Latin-1 Codecs ----------------------------------------------------- */ + +extern PyObject* _PyUnicode_AsLatin1String( + PyObject* unicode, + const char* errors); + +/* --- ASCII Codecs ------------------------------------------------------- */ + +extern PyObject* _PyUnicode_AsASCIIString( + PyObject* unicode, + const char* errors); + +/* --- Character Map Codecs ----------------------------------------------- */ + +/* Translate an Unicode object by applying a character mapping table to + it and return the resulting Unicode object. + + The mapping table must map Unicode ordinal integers to Unicode strings, + Unicode ordinal integers or None (causing deletion of the character). + + Mapping tables may be dictionaries or sequences. Unmapped character + ordinals (ones which cause a LookupError) are left untouched and + are copied as-is. +*/ +extern PyObject* _PyUnicode_EncodeCharmap( + PyObject *unicode, /* Unicode object */ + PyObject *mapping, /* encoding mapping */ + const char *errors); /* error handling */ + +/* --- Decimal Encoder ---------------------------------------------------- */ + +// Coverts a Unicode object holding a decimal value to an ASCII string +// for using in int, float and complex parsers. +// Transforms code points that have decimal digit property to the +// corresponding ASCII digit code points. Transforms spaces to ASCII. +// Transforms code points starting from the first non-ASCII code point that +// is neither a decimal digit nor a space to the end into '?'. +// +// Export for '_testinternalcapi' shared extension. +PyAPI_FUNC(PyObject*) _PyUnicode_TransformDecimalAndSpaceToASCII( + PyObject *unicode); /* Unicode object */ + +/* --- Methods & Slots ---------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) _PyUnicode_JoinArray( + PyObject *separator, + PyObject *const *items, + Py_ssize_t seqlen + ); + +/* Test whether a unicode is equal to ASCII identifier. Return 1 if true, + 0 otherwise. The right argument must be ASCII identifier. + Any error occurs inside will be cleared before return. */ +extern int _PyUnicode_EqualToASCIIId( + PyObject *left, /* Left string */ + _Py_Identifier *right /* Right identifier */ + ); + +// Test whether a unicode is equal to ASCII string. Return 1 if true, +// 0 otherwise. The right argument must be ASCII-encoded string. +// Any error occurs inside will be cleared before return. +// Export for '_ctypes' shared extension +PyAPI_FUNC(int) _PyUnicode_EqualToASCIIString( + PyObject *left, + const char *right /* ASCII-encoded string */ + ); + +/* Externally visible for str.strip(unicode) */ +extern PyObject* _PyUnicode_XStrip( + PyObject *self, + int striptype, + PyObject *sepobj + ); + + +/* Using explicit passed-in values, insert the thousands grouping + into the string pointed to by buffer. For the argument descriptions, + see Objects/stringlib/localeutil.h */ +extern Py_ssize_t _PyUnicode_InsertThousandsGrouping( + _PyUnicodeWriter *writer, + Py_ssize_t n_buffer, + PyObject *digits, + Py_ssize_t d_pos, + Py_ssize_t n_digits, + Py_ssize_t min_width, + const char *grouping, + PyObject *thousands_sep, + Py_UCS4 *maxchar); + +/* --- Misc functions ----------------------------------------------------- */ + +extern PyObject* _PyUnicode_FormatLong(PyObject *, int, int, int); + +/* Fast equality check when the inputs are known to be exact unicode types + and where the hash values are equal (i.e. a very probable match) */ +extern int _PyUnicode_EQ(PyObject *, PyObject *); + +// Equality check. +// Export for '_pickle' shared extension. +PyAPI_FUNC(int) _PyUnicode_Equal(PyObject *, PyObject *); + +extern int _PyUnicode_WideCharString_Converter(PyObject *, void *); +extern int _PyUnicode_WideCharString_Opt_Converter(PyObject *, void *); + +// Export for test_peg_generator +PyAPI_FUNC(Py_ssize_t) _PyUnicode_ScanIdentifier(PyObject *); + +/* --- Runtime lifecycle -------------------------------------------------- */ + +extern void _PyUnicode_InitState(PyInterpreterState *); +extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *); +extern PyStatus _PyUnicode_InitTypes(PyInterpreterState *); +extern void _PyUnicode_Fini(PyInterpreterState *); +extern void _PyUnicode_FiniTypes(PyInterpreterState *); + +extern PyTypeObject _PyUnicodeASCIIIter_Type; + +/* --- Interning ---------------------------------------------------------- */ + +// All these are "ref-neutral", like the public PyUnicode_InternInPlace. + +// Explicit interning routines: +PyAPI_FUNC(void) _PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **); +PyAPI_FUNC(void) _PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **); +// Left here to help backporting: +PyAPI_FUNC(void) _PyUnicode_InternInPlace(PyInterpreterState *interp, PyObject **p); +// Only for singletons in the _PyRuntime struct: +extern void _PyUnicode_InternStatic(PyInterpreterState *interp, PyObject **); + +/* --- Other API ---------------------------------------------------------- */ + +struct _Py_unicode_runtime_ids { + PyMutex mutex; + // next_index value must be preserved when Py_Initialize()/Py_Finalize() + // is called multiple times: see _PyUnicode_FromId() implementation. + Py_ssize_t next_index; +}; + +struct _Py_unicode_runtime_state { + struct _Py_unicode_runtime_ids ids; +}; + +/* fs_codec.encoding is initialized to NULL. + Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */ +struct _Py_unicode_fs_codec { + char *encoding; // Filesystem encoding (encoded to UTF-8) + int utf8; // encoding=="utf-8"? + char *errors; // Filesystem errors (encoded to UTF-8) + _Py_error_handler error_handler; +}; + +struct _Py_unicode_ids { + Py_ssize_t size; + PyObject **array; +}; + +struct _Py_unicode_state { + struct _Py_unicode_fs_codec fs_codec; + + _PyUnicode_Name_CAPI *ucnhash_capi; + + // Unicode identifiers (_Py_Identifier): see _PyUnicode_FromId() + struct _Py_unicode_ids ids; +}; + +extern void _PyUnicode_ClearInterned(PyInterpreterState *interp); + +// Like PyUnicode_AsUTF8(), but check for embedded null characters. +// Export for '_sqlite3' shared extension. +PyAPI_FUNC(const char *) _PyUnicode_AsUTF8NoNUL(PyObject *); + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UNICODEOBJECT_H */ diff --git a/Include/internal/pycore_unicodeobject_generated.h b/Include/internal/pycore_unicodeobject_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..7f6b6e07984a9a9741ce9329cac07d7e301d0405 --- /dev/null +++ b/Include/internal/pycore_unicodeobject_generated.h @@ -0,0 +1,2980 @@ +#ifndef Py_INTERNAL_UNICODEOBJECT_GENERATED_H +#define Py_INTERNAL_UNICODEOBJECT_GENERATED_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +/* The following is auto-generated by Tools/build/generate_global_objects.py. */ +static inline void +_PyUnicode_InitStaticStrings(PyInterpreterState *interp) { + PyObject *string; + string = &_Py_ID(CANCELLED); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(FINISHED); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(False); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(JSONDecodeError); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(PENDING); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(Py_Repr); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(TextIOWrapper); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(True); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(WarningMessage); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_WindowsConsoleIO); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__IOBase_closed); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__abc_tpflags__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__abs__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__abstractmethods__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__add__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__aenter__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__aexit__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__aiter__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__all__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__and__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__anext__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__annotations__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__args__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__await__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__bases__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__bool__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__buffer__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__build_class__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__builtins__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__bytes__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__call__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__cantrace__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__class__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__class_getitem__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__classcell__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__classdict__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__classdictcell__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__complex__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__contains__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__copy__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ctypes_from_outparam__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__del__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__delattr__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__delete__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__delitem__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__dict__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__dictoffset__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__dir__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__divmod__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__doc__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__enter__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__eq__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__exit__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__file__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__firstlineno__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__float__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__floordiv__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__format__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__fspath__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ge__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__get__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getattr__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getattribute__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getinitargs__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getitem__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getnewargs__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getnewargs_ex__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__getstate__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__gt__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__hash__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__iadd__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__iand__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ifloordiv__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ilshift__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__imatmul__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__imod__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__import__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__imul__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__index__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__init__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__init_subclass__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__instancecheck__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__int__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__invert__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ior__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ipow__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__irshift__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__isabstractmethod__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__isub__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__iter__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__itruediv__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ixor__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__le__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__len__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__length_hint__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__lltrace__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__loader__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__lshift__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__lt__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__main__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__match_args__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__matmul__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__missing__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__mod__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__module__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__mro_entries__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__mul__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__name__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ne__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__neg__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__new__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__newobj__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__newobj_ex__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__next__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__notes__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__or__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__orig_class__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__origin__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__package__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__parameters__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__path__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__pos__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__pow__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__prepare__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__qualname__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__radd__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rand__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rdivmod__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__reduce__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__reduce_ex__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__release_buffer__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__repr__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__reversed__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rfloordiv__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rlshift__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rmatmul__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rmod__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rmul__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__ror__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__round__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rpow__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rrshift__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rshift__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rsub__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rtruediv__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__rxor__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__set__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__set_name__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__setattr__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__setitem__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__setstate__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__sizeof__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__slotnames__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__slots__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__spec__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__static_attributes__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__str__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__sub__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__subclasscheck__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__subclasshook__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__truediv__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__trunc__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__type_params__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__typing_is_unpacked_typevartuple__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__typing_prepare_subst__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__typing_subst__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__typing_unpacked_tuple_args__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__warningregistry__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__weaklistoffset__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__weakref__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(__xor__); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_abc_impl); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_abstract_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_active); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_align_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_annotation); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_anonymous_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_argtypes_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_as_parameter_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_asyncio_future_blocking); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_blksize); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_bootstrap); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_check_retval_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_dealloc_warn); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_feature_version); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_field_types); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_fields_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_finalizing); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_find_and_load); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_fix_up_module); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_flags_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_get_sourcefile); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_handle_fromlist); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_initializing); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_io); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_is_text_encoding); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_length_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_limbo); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_lock_unlock_module); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_loop); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_needs_com_addref_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_only_immortal); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_pack_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_restype_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_showwarnmsg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_shutdown); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_slotnames); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_strptime); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_strptime_datetime); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_swappedbytes_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_type_); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_uninitialized_submodules); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_warn_unawaited_coroutine); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(_xoptions); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(abs_tol); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(access); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(aclose); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(add); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(add_done_callback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(after_in_child); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(after_in_parent); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(aggregate_class); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(alias); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(allow_code); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(append); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(arg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(argdefs); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(args); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(arguments); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(argv); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(as_integer_ratio); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(asend); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ast); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(athrow); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(attribute); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(authorizer_callback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(autocommit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(backtick); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(base); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(before); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(big); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(binary_form); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(block); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(bound); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(buffer); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(buffer_callback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(buffer_size); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(buffering); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(buffers); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(bufsize); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(builtins); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(byteorder); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(bytes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(bytes_per_sep); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(c_call); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(c_exception); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(c_return); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cached_datetime_module); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cached_statements); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cadata); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cafile); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(call); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(call_exception_handler); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(call_soon); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(callback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cancel); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(capath); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(category); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cb_type); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(certfile); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(check_same_thread); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(clear); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(close); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(closed); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(closefd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(closure); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_argcount); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_cellvars); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_code); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_consts); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_exceptiontable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_filename); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_firstlineno); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_flags); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_freevars); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_kwonlyargcount); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_linetable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_name); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_names); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_nlocals); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_posonlyargcount); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_qualname); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_stacksize); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(co_varnames); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(code); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(col_offset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(command); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(comment_factory); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(compile_mode); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(consts); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(context); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(contravariant); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cookie); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(copy); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(copyreg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(coro); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(count); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(covariant); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(cwd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(data); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(database); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(day); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(decode); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(decoder); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(default); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(defaultaction); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(delete); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(depth); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(desired_access); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(detect_types); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(deterministic); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(device); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dict); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dictcomp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(difference_update); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(digest); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(digest_size); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(digestmod); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dir_fd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(discard); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dispatch_table); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(displayhook); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dklen); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(doc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dont_inherit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dst); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(dst_dir_fd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(eager_start); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(effective_ids); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(element_factory); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(encode); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(encoding); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(end); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(end_col_offset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(end_lineno); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(end_offset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(endpos); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(entrypoint); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(env); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(errors); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(event); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(eventmask); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(exc_type); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(exc_value); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(excepthook); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(exception); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(existing_file_name); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(exp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(extend); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(extra_tokens); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(facility); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(factory); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(false); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(family); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fanout); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fd2); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fdel); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fget); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(file); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(file_actions); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(filename); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fileno); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(filepath); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fillvalue); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(filter); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(filters); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(final); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(find_class); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fix_imports); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(flags); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(flush); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fold); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(follow_symlinks); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(format); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(from_param); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fromlist); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fromtimestamp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fromutc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(fset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(func); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(future); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(generation); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(genexpr); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(get); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(get_debug); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(get_event_loop); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(get_loop); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(get_source); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(getattr); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(getstate); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(gid); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(globals); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(groupindex); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(groups); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(handle); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(handle_seq); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(has_location); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(hash_name); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(header); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(headers); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(hi); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(hook); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(hour); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ident); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(identity_hint); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ignore); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(imag); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(importlib); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(in_fd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(incoming); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(indexgroup); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(inf); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(infer_variance); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(inherit_handle); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(inheritable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(initial); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(initial_bytes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(initial_owner); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(initial_state); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(initial_value); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(initval); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(inner_size); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(input); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(insert_comments); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(insert_pis); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(instructions); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(intern); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(intersection); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(interval); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(is_running); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(isatty); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(isinstance); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(isoformat); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(isolation_level); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(istext); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(item); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(items); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(iter); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(iterable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(iterations); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(join); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(jump); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(keepends); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(key); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(keyfile); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(keys); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(kind); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(kw); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(kw1); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(kw2); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(kwdefaults); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(label); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(lambda); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(last); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(last_exc); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(last_node); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(last_traceback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(last_type); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(last_value); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(latin1); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(leaf_size); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(len); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(length); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(level); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(limit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(line); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(line_buffering); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(lineno); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(listcomp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(little); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(lo); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(locale); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(locals); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(logoption); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(loop); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(manual_reset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(mapping); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(match); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(max_length); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(maxdigits); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(maxevents); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(maxlen); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(maxmem); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(maxsplit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(maxvalue); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(memLevel); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(memlimit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(message); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(metaclass); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(metadata); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(method); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(microsecond); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(milliseconds); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(minute); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(mod); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(mode); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(module); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(module_globals); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(modules); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(month); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(mro); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(msg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(mutex); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(mycmp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(n_arg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(n_fields); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(n_sequence_fields); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(n_unnamed_fields); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(name); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(name_from); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(namespace_separator); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(namespaces); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(narg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ndigits); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(nested); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(new_file_name); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(new_limit); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(newline); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(newlines); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(next); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(nlocals); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(node_depth); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(node_offset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ns); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(nstype); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(nt); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(null); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(number); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(obj); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(object); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(offset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(offset_dst); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(offset_src); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(on_type_read); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(onceregistry); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(only_keys); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(oparg); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(opcode); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(open); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(opener); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(operation); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(optimize); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(options); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(order); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(origin); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(out_fd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(outgoing); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(overlapped); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(owner); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pages); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(parent); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(password); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(path); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pattern); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(peek); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(persistent_id); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(persistent_load); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(person); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pi_factory); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pid); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(policy); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pos); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pos1); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(pos2); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(posix); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(print_file_and_line); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(priority); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(progress); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(progress_handler); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(progress_routine); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(proto); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(protocol); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ps1); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(ps2); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(query); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(quotetabs); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(raw); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(read); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(read1); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(readable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(readall); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(readinto); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(readinto1); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(readline); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(readonly); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(real); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(reducer_override); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(registry); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(rel_tol); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(release); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(reload); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(repl); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(replace); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(reserved); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(reset); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(resetids); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(return); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(reverse); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(reversed); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(salt); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sched_priority); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(scheduler); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(second); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(security_attributes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(seek); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(seekable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(selectors); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(self); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(send); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sep); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sequence); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(server_hostname); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(server_side); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(session); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(setcomp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(setpgroup); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(setsid); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(setsigdef); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(setsigmask); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(setstate); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(shape); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(show_cmd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(signed); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(size); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sizehint); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(skip_file_prefixes); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sleep); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sock); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sort); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(source); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(source_traceback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(spam); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(src); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(src_dir_fd); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(stacklevel); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(start); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(statement); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(status); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(stderr); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(stdin); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(stdout); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(step); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(steps); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(store_name); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(strategy); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(strftime); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(strict); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(strict_mode); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(string); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(sub_key); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(symmetric_difference_update); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tabsize); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tag); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(target); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(target_is_directory); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(task); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tb_frame); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tb_lasti); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tb_lineno); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tb_next); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tell); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(template); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(term); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(text); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(threading); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(throw); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(timeout); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(times); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(timetuple); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(top); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(trace_callback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(traceback); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(trailers); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(translate); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(true); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(truncate); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(twice); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(txt); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(type); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(type_params); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tz); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tzinfo); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(tzname); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(uid); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(unlink); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(unraisablehook); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(uri); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(usedforsecurity); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(value); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(values); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(version); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(volume); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(wait_all); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(warn_on_full_buffer); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(warnings); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(warnoptions); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(wbits); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(week); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(weekday); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(which); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(who); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(withdata); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(writable); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(write); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(write_through); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(year); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_ID(zdict); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(empty); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(dbl_percent); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(dot_locals); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(defaults); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(generic_base); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(kwdefaults); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(type_params); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(str_replace_inf); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_null); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_dictcomp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_genexpr); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_lambda); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_listcomp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_module); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_setcomp); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_string); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(anon_unknown); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(json_decoder); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(list_err); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(utf_8); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(dbl_open_br); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); + string = &_Py_STR(dbl_close_br); + _PyUnicode_InternStatic(interp, &string); + assert(_PyUnicode_CheckConsistency(string, 1)); + assert(PyUnicode_GET_LENGTH(string) != 1); +} +/* End auto-generated code */ +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UNICODEOBJECT_GENERATED_H */ diff --git a/Include/internal/pycore_unionobject.h b/Include/internal/pycore_unionobject.h new file mode 100644 index 0000000000000000000000000000000000000000..6ece7134cdeca030ff4f177d5d62b7a6e0adcabb --- /dev/null +++ b/Include/internal/pycore_unionobject.h @@ -0,0 +1,25 @@ +#ifndef Py_INTERNAL_UNIONOBJECT_H +#define Py_INTERNAL_UNIONOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +// For extensions created by test_peg_generator +PyAPI_DATA(PyTypeObject) _PyUnion_Type; +PyAPI_FUNC(PyObject *) _Py_union_type_or(PyObject *, PyObject *); + +#define _PyUnion_Check(op) Py_IS_TYPE((op), &_PyUnion_Type) + +#define _PyGenericAlias_Check(op) PyObject_TypeCheck((op), &Py_GenericAliasType) +extern PyObject *_Py_subs_parameters(PyObject *, PyObject *, PyObject *, PyObject *); +extern PyObject *_Py_make_parameters(PyObject *); +extern PyObject *_Py_union_args(PyObject *self); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_UNIONOBJECT_H */ diff --git a/Include/internal/pycore_uop_ids.h b/Include/internal/pycore_uop_ids.h new file mode 100644 index 0000000000000000000000000000000000000000..1e6ef8e54a221ac7a535fc83048e0806367e9836 --- /dev/null +++ b/Include/internal/pycore_uop_ids.h @@ -0,0 +1,294 @@ +// This file is generated by Tools/cases_generator/uop_id_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifndef Py_CORE_UOP_IDS_H +#define Py_CORE_UOP_IDS_H +#ifdef __cplusplus +extern "C" { +#endif + +#define _EXIT_TRACE 300 +#define _SET_IP 301 +#define _BEFORE_ASYNC_WITH BEFORE_ASYNC_WITH +#define _BEFORE_WITH BEFORE_WITH +#define _BINARY_OP 302 +#define _BINARY_OP_ADD_FLOAT 303 +#define _BINARY_OP_ADD_INT 304 +#define _BINARY_OP_ADD_UNICODE 305 +#define _BINARY_OP_MULTIPLY_FLOAT 306 +#define _BINARY_OP_MULTIPLY_INT 307 +#define _BINARY_OP_SUBTRACT_FLOAT 308 +#define _BINARY_OP_SUBTRACT_INT 309 +#define _BINARY_SLICE BINARY_SLICE +#define _BINARY_SUBSCR 310 +#define _BINARY_SUBSCR_DICT BINARY_SUBSCR_DICT +#define _BINARY_SUBSCR_GETITEM BINARY_SUBSCR_GETITEM +#define _BINARY_SUBSCR_LIST_INT BINARY_SUBSCR_LIST_INT +#define _BINARY_SUBSCR_STR_INT BINARY_SUBSCR_STR_INT +#define _BINARY_SUBSCR_TUPLE_INT BINARY_SUBSCR_TUPLE_INT +#define _BUILD_CONST_KEY_MAP BUILD_CONST_KEY_MAP +#define _BUILD_LIST BUILD_LIST +#define _BUILD_MAP BUILD_MAP +#define _BUILD_SET BUILD_SET +#define _BUILD_SLICE BUILD_SLICE +#define _BUILD_STRING BUILD_STRING +#define _BUILD_TUPLE BUILD_TUPLE +#define _CALL 311 +#define _CALL_ALLOC_AND_ENTER_INIT CALL_ALLOC_AND_ENTER_INIT +#define _CALL_BUILTIN_CLASS 312 +#define _CALL_BUILTIN_FAST 313 +#define _CALL_BUILTIN_FAST_WITH_KEYWORDS 314 +#define _CALL_BUILTIN_O 315 +#define _CALL_FUNCTION_EX CALL_FUNCTION_EX +#define _CALL_INTRINSIC_1 CALL_INTRINSIC_1 +#define _CALL_INTRINSIC_2 CALL_INTRINSIC_2 +#define _CALL_ISINSTANCE CALL_ISINSTANCE +#define _CALL_KW CALL_KW +#define _CALL_LEN CALL_LEN +#define _CALL_METHOD_DESCRIPTOR_FAST 316 +#define _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 317 +#define _CALL_METHOD_DESCRIPTOR_NOARGS 318 +#define _CALL_METHOD_DESCRIPTOR_O 319 +#define _CALL_NON_PY_GENERAL 320 +#define _CALL_STR_1 321 +#define _CALL_TUPLE_1 322 +#define _CALL_TYPE_1 CALL_TYPE_1 +#define _CHECK_ATTR_CLASS 323 +#define _CHECK_ATTR_METHOD_LAZY_DICT 324 +#define _CHECK_ATTR_MODULE 325 +#define _CHECK_ATTR_WITH_HINT 326 +#define _CHECK_CALL_BOUND_METHOD_EXACT_ARGS 327 +#define _CHECK_EG_MATCH CHECK_EG_MATCH +#define _CHECK_EXC_MATCH CHECK_EXC_MATCH +#define _CHECK_FUNCTION 328 +#define _CHECK_FUNCTION_EXACT_ARGS 329 +#define _CHECK_FUNCTION_VERSION 330 +#define _CHECK_IS_NOT_PY_CALLABLE 331 +#define _CHECK_MANAGED_OBJECT_HAS_VALUES 332 +#define _CHECK_METHOD_VERSION 333 +#define _CHECK_PEP_523 334 +#define _CHECK_PERIODIC 335 +#define _CHECK_STACK_SPACE 336 +#define _CHECK_STACK_SPACE_OPERAND 337 +#define _CHECK_VALIDITY 338 +#define _CHECK_VALIDITY_AND_SET_IP 339 +#define _COLD_EXIT 340 +#define _COMPARE_OP 341 +#define _COMPARE_OP_FLOAT 342 +#define _COMPARE_OP_INT 343 +#define _COMPARE_OP_STR 344 +#define _CONTAINS_OP 345 +#define _CONTAINS_OP_DICT CONTAINS_OP_DICT +#define _CONTAINS_OP_SET CONTAINS_OP_SET +#define _CONVERT_VALUE CONVERT_VALUE +#define _COPY COPY +#define _COPY_FREE_VARS COPY_FREE_VARS +#define _DELETE_ATTR DELETE_ATTR +#define _DELETE_DEREF DELETE_DEREF +#define _DELETE_FAST DELETE_FAST +#define _DELETE_GLOBAL DELETE_GLOBAL +#define _DELETE_NAME DELETE_NAME +#define _DELETE_SUBSCR DELETE_SUBSCR +#define _DEOPT 346 +#define _DICT_MERGE DICT_MERGE +#define _DICT_UPDATE DICT_UPDATE +#define _DYNAMIC_EXIT 347 +#define _END_SEND END_SEND +#define _ERROR_POP_N 348 +#define _EXIT_INIT_CHECK EXIT_INIT_CHECK +#define _EXPAND_METHOD 349 +#define _FATAL_ERROR 350 +#define _FORMAT_SIMPLE FORMAT_SIMPLE +#define _FORMAT_WITH_SPEC FORMAT_WITH_SPEC +#define _FOR_ITER 351 +#define _FOR_ITER_GEN_FRAME 352 +#define _FOR_ITER_TIER_TWO 353 +#define _GET_AITER GET_AITER +#define _GET_ANEXT GET_ANEXT +#define _GET_AWAITABLE GET_AWAITABLE +#define _GET_ITER GET_ITER +#define _GET_LEN GET_LEN +#define _GET_YIELD_FROM_ITER GET_YIELD_FROM_ITER +#define _GUARD_BOTH_FLOAT 354 +#define _GUARD_BOTH_INT 355 +#define _GUARD_BOTH_UNICODE 356 +#define _GUARD_BUILTINS_VERSION 357 +#define _GUARD_DORV_NO_DICT 358 +#define _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT 359 +#define _GUARD_GLOBALS_VERSION 360 +#define _GUARD_IS_FALSE_POP 361 +#define _GUARD_IS_NONE_POP 362 +#define _GUARD_IS_NOT_NONE_POP 363 +#define _GUARD_IS_TRUE_POP 364 +#define _GUARD_KEYS_VERSION 365 +#define _GUARD_NOS_FLOAT 366 +#define _GUARD_NOS_INT 367 +#define _GUARD_NOT_EXHAUSTED_LIST 368 +#define _GUARD_NOT_EXHAUSTED_RANGE 369 +#define _GUARD_NOT_EXHAUSTED_TUPLE 370 +#define _GUARD_TOS_FLOAT 371 +#define _GUARD_TOS_INT 372 +#define _GUARD_TYPE_VERSION 373 +#define _INIT_CALL_BOUND_METHOD_EXACT_ARGS 374 +#define _INIT_CALL_PY_EXACT_ARGS 375 +#define _INIT_CALL_PY_EXACT_ARGS_0 376 +#define _INIT_CALL_PY_EXACT_ARGS_1 377 +#define _INIT_CALL_PY_EXACT_ARGS_2 378 +#define _INIT_CALL_PY_EXACT_ARGS_3 379 +#define _INIT_CALL_PY_EXACT_ARGS_4 380 +#define _INSTRUMENTED_CALL INSTRUMENTED_CALL +#define _INSTRUMENTED_CALL_FUNCTION_EX INSTRUMENTED_CALL_FUNCTION_EX +#define _INSTRUMENTED_CALL_KW INSTRUMENTED_CALL_KW +#define _INSTRUMENTED_FOR_ITER INSTRUMENTED_FOR_ITER +#define _INSTRUMENTED_INSTRUCTION INSTRUMENTED_INSTRUCTION +#define _INSTRUMENTED_JUMP_BACKWARD INSTRUMENTED_JUMP_BACKWARD +#define _INSTRUMENTED_JUMP_FORWARD INSTRUMENTED_JUMP_FORWARD +#define _INSTRUMENTED_LOAD_SUPER_ATTR INSTRUMENTED_LOAD_SUPER_ATTR +#define _INSTRUMENTED_POP_JUMP_IF_FALSE INSTRUMENTED_POP_JUMP_IF_FALSE +#define _INSTRUMENTED_POP_JUMP_IF_NONE INSTRUMENTED_POP_JUMP_IF_NONE +#define _INSTRUMENTED_POP_JUMP_IF_NOT_NONE INSTRUMENTED_POP_JUMP_IF_NOT_NONE +#define _INSTRUMENTED_POP_JUMP_IF_TRUE INSTRUMENTED_POP_JUMP_IF_TRUE +#define _INSTRUMENTED_RESUME INSTRUMENTED_RESUME +#define _INSTRUMENTED_RETURN_CONST INSTRUMENTED_RETURN_CONST +#define _INSTRUMENTED_RETURN_VALUE INSTRUMENTED_RETURN_VALUE +#define _INSTRUMENTED_YIELD_VALUE INSTRUMENTED_YIELD_VALUE +#define _INTERNAL_INCREMENT_OPT_COUNTER 381 +#define _IS_NONE 382 +#define _IS_OP IS_OP +#define _ITER_CHECK_LIST 383 +#define _ITER_CHECK_RANGE 384 +#define _ITER_CHECK_TUPLE 385 +#define _ITER_JUMP_LIST 386 +#define _ITER_JUMP_RANGE 387 +#define _ITER_JUMP_TUPLE 388 +#define _ITER_NEXT_LIST 389 +#define _ITER_NEXT_RANGE 390 +#define _ITER_NEXT_TUPLE 391 +#define _JUMP_TO_TOP 392 +#define _LIST_APPEND LIST_APPEND +#define _LIST_EXTEND LIST_EXTEND +#define _LOAD_ASSERTION_ERROR LOAD_ASSERTION_ERROR +#define _LOAD_ATTR 393 +#define _LOAD_ATTR_CLASS 394 +#define _LOAD_ATTR_CLASS_0 395 +#define _LOAD_ATTR_CLASS_1 396 +#define _LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN +#define _LOAD_ATTR_INSTANCE_VALUE 397 +#define _LOAD_ATTR_INSTANCE_VALUE_0 398 +#define _LOAD_ATTR_INSTANCE_VALUE_1 399 +#define _LOAD_ATTR_METHOD_LAZY_DICT 400 +#define _LOAD_ATTR_METHOD_NO_DICT 401 +#define _LOAD_ATTR_METHOD_WITH_VALUES 402 +#define _LOAD_ATTR_MODULE 403 +#define _LOAD_ATTR_NONDESCRIPTOR_NO_DICT 404 +#define _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 405 +#define _LOAD_ATTR_PROPERTY LOAD_ATTR_PROPERTY +#define _LOAD_ATTR_SLOT 406 +#define _LOAD_ATTR_SLOT_0 407 +#define _LOAD_ATTR_SLOT_1 408 +#define _LOAD_ATTR_WITH_HINT 409 +#define _LOAD_BUILD_CLASS LOAD_BUILD_CLASS +#define _LOAD_CONST LOAD_CONST +#define _LOAD_CONST_INLINE 410 +#define _LOAD_CONST_INLINE_BORROW 411 +#define _LOAD_CONST_INLINE_BORROW_WITH_NULL 412 +#define _LOAD_CONST_INLINE_WITH_NULL 413 +#define _LOAD_DEREF LOAD_DEREF +#define _LOAD_FAST 414 +#define _LOAD_FAST_0 415 +#define _LOAD_FAST_1 416 +#define _LOAD_FAST_2 417 +#define _LOAD_FAST_3 418 +#define _LOAD_FAST_4 419 +#define _LOAD_FAST_5 420 +#define _LOAD_FAST_6 421 +#define _LOAD_FAST_7 422 +#define _LOAD_FAST_AND_CLEAR LOAD_FAST_AND_CLEAR +#define _LOAD_FAST_CHECK LOAD_FAST_CHECK +#define _LOAD_FAST_LOAD_FAST LOAD_FAST_LOAD_FAST +#define _LOAD_FROM_DICT_OR_DEREF LOAD_FROM_DICT_OR_DEREF +#define _LOAD_FROM_DICT_OR_GLOBALS LOAD_FROM_DICT_OR_GLOBALS +#define _LOAD_GLOBAL 423 +#define _LOAD_GLOBAL_BUILTINS 424 +#define _LOAD_GLOBAL_MODULE 425 +#define _LOAD_LOCALS LOAD_LOCALS +#define _LOAD_NAME LOAD_NAME +#define _LOAD_SUPER_ATTR_ATTR LOAD_SUPER_ATTR_ATTR +#define _LOAD_SUPER_ATTR_METHOD LOAD_SUPER_ATTR_METHOD +#define _MAKE_CELL MAKE_CELL +#define _MAKE_FUNCTION MAKE_FUNCTION +#define _MAP_ADD MAP_ADD +#define _MATCH_CLASS MATCH_CLASS +#define _MATCH_KEYS MATCH_KEYS +#define _MATCH_MAPPING MATCH_MAPPING +#define _MATCH_SEQUENCE MATCH_SEQUENCE +#define _NOP NOP +#define _POP_EXCEPT POP_EXCEPT +#define _POP_FRAME 426 +#define _POP_JUMP_IF_FALSE 427 +#define _POP_JUMP_IF_TRUE 428 +#define _POP_TOP POP_TOP +#define _POP_TOP_LOAD_CONST_INLINE_BORROW 429 +#define _PUSH_EXC_INFO PUSH_EXC_INFO +#define _PUSH_FRAME 430 +#define _PUSH_NULL PUSH_NULL +#define _PY_FRAME_GENERAL 431 +#define _REPLACE_WITH_TRUE 432 +#define _RESUME_CHECK RESUME_CHECK +#define _RETURN_GENERATOR RETURN_GENERATOR +#define _SAVE_RETURN_OFFSET 433 +#define _SEND 434 +#define _SEND_GEN SEND_GEN +#define _SETUP_ANNOTATIONS SETUP_ANNOTATIONS +#define _SET_ADD SET_ADD +#define _SET_FUNCTION_ATTRIBUTE SET_FUNCTION_ATTRIBUTE +#define _SET_UPDATE SET_UPDATE +#define _START_EXECUTOR 435 +#define _STORE_ATTR 436 +#define _STORE_ATTR_INSTANCE_VALUE 437 +#define _STORE_ATTR_SLOT 438 +#define _STORE_ATTR_WITH_HINT STORE_ATTR_WITH_HINT +#define _STORE_DEREF STORE_DEREF +#define _STORE_FAST 439 +#define _STORE_FAST_0 440 +#define _STORE_FAST_1 441 +#define _STORE_FAST_2 442 +#define _STORE_FAST_3 443 +#define _STORE_FAST_4 444 +#define _STORE_FAST_5 445 +#define _STORE_FAST_6 446 +#define _STORE_FAST_7 447 +#define _STORE_FAST_LOAD_FAST STORE_FAST_LOAD_FAST +#define _STORE_FAST_STORE_FAST STORE_FAST_STORE_FAST +#define _STORE_GLOBAL STORE_GLOBAL +#define _STORE_NAME STORE_NAME +#define _STORE_SLICE STORE_SLICE +#define _STORE_SUBSCR 448 +#define _STORE_SUBSCR_DICT STORE_SUBSCR_DICT +#define _STORE_SUBSCR_LIST_INT STORE_SUBSCR_LIST_INT +#define _SWAP SWAP +#define _TIER2_RESUME_CHECK 449 +#define _TO_BOOL 450 +#define _TO_BOOL_BOOL TO_BOOL_BOOL +#define _TO_BOOL_INT TO_BOOL_INT +#define _TO_BOOL_LIST TO_BOOL_LIST +#define _TO_BOOL_NONE TO_BOOL_NONE +#define _TO_BOOL_STR TO_BOOL_STR +#define _UNARY_INVERT UNARY_INVERT +#define _UNARY_NEGATIVE UNARY_NEGATIVE +#define _UNARY_NOT UNARY_NOT +#define _UNPACK_EX UNPACK_EX +#define _UNPACK_SEQUENCE 451 +#define _UNPACK_SEQUENCE_LIST UNPACK_SEQUENCE_LIST +#define _UNPACK_SEQUENCE_TUPLE UNPACK_SEQUENCE_TUPLE +#define _UNPACK_SEQUENCE_TWO_TUPLE UNPACK_SEQUENCE_TWO_TUPLE +#define _WITH_EXCEPT_START WITH_EXCEPT_START +#define _YIELD_VALUE YIELD_VALUE +#define MAX_UOP_ID 451 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CORE_UOP_IDS_H */ diff --git a/Include/internal/pycore_uop_metadata.h b/Include/internal/pycore_uop_metadata.h new file mode 100644 index 0000000000000000000000000000000000000000..02ffc769c180eaa09e28b9f6c0a1b199be5b0f37 --- /dev/null +++ b/Include/internal/pycore_uop_metadata.h @@ -0,0 +1,1006 @@ +// This file is generated by Tools/cases_generator/uop_metadata_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifndef Py_CORE_UOP_METADATA_H +#define Py_CORE_UOP_METADATA_H +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "pycore_uop_ids.h" +extern const uint16_t _PyUop_Flags[MAX_UOP_ID+1]; +extern const uint8_t _PyUop_Replication[MAX_UOP_ID+1]; +extern const char * const _PyOpcode_uop_name[MAX_UOP_ID+1]; + +extern int _PyUop_num_popped(int opcode, int oparg); + +#ifdef NEED_OPCODE_METADATA +const uint16_t _PyUop_Flags[MAX_UOP_ID+1] = { + [_NOP] = HAS_PURE_FLAG, + [_RESUME_CHECK] = HAS_DEOPT_FLAG, + [_LOAD_FAST_CHECK] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_FAST_0] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_1] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_2] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_3] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_4] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_5] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_6] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_7] = HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_PURE_FLAG, + [_LOAD_FAST_AND_CLEAR] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_LOAD_CONST] = HAS_ARG_FLAG | HAS_CONST_FLAG | HAS_PURE_FLAG, + [_STORE_FAST_0] = HAS_LOCAL_FLAG, + [_STORE_FAST_1] = HAS_LOCAL_FLAG, + [_STORE_FAST_2] = HAS_LOCAL_FLAG, + [_STORE_FAST_3] = HAS_LOCAL_FLAG, + [_STORE_FAST_4] = HAS_LOCAL_FLAG, + [_STORE_FAST_5] = HAS_LOCAL_FLAG, + [_STORE_FAST_6] = HAS_LOCAL_FLAG, + [_STORE_FAST_7] = HAS_LOCAL_FLAG, + [_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_STORE_FAST_LOAD_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_STORE_FAST_STORE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG, + [_POP_TOP] = HAS_PURE_FLAG, + [_PUSH_NULL] = HAS_PURE_FLAG, + [_END_SEND] = HAS_PURE_FLAG, + [_UNARY_NEGATIVE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_UNARY_NOT] = HAS_PURE_FLAG, + [_TO_BOOL] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_TO_BOOL_BOOL] = HAS_EXIT_FLAG, + [_TO_BOOL_INT] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, + [_TO_BOOL_LIST] = HAS_EXIT_FLAG, + [_TO_BOOL_NONE] = HAS_EXIT_FLAG, + [_TO_BOOL_STR] = HAS_EXIT_FLAG | HAS_ESCAPES_FLAG, + [_REPLACE_WITH_TRUE] = 0, + [_UNARY_INVERT] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_BOTH_INT] = HAS_EXIT_FLAG, + [_GUARD_NOS_INT] = HAS_EXIT_FLAG, + [_GUARD_TOS_INT] = HAS_EXIT_FLAG, + [_BINARY_OP_MULTIPLY_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_ADD_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_INT] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_GUARD_BOTH_FLOAT] = HAS_EXIT_FLAG, + [_GUARD_NOS_FLOAT] = HAS_EXIT_FLAG, + [_GUARD_TOS_FLOAT] = HAS_EXIT_FLAG, + [_BINARY_OP_MULTIPLY_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_ADD_FLOAT] = HAS_PURE_FLAG, + [_BINARY_OP_SUBTRACT_FLOAT] = HAS_PURE_FLAG, + [_GUARD_BOTH_UNICODE] = HAS_EXIT_FLAG, + [_BINARY_OP_ADD_UNICODE] = HAS_ERROR_FLAG | HAS_PURE_FLAG, + [_BINARY_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_SLICE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BINARY_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_STR_INT] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_TUPLE_INT] = HAS_DEOPT_FLAG, + [_BINARY_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LIST_APPEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_SET_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_SUBSCR_LIST_INT] = HAS_DEOPT_FLAG, + [_STORE_SUBSCR_DICT] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_DELETE_SUBSCR] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_INTRINSIC_1] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_INTRINSIC_2] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_POP_FRAME] = 0, + [_GET_AITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GET_ANEXT] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_GET_AWAITABLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_YIELD_VALUE] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_POP_EXCEPT] = HAS_ESCAPES_FLAG, + [_LOAD_ASSERTION_ERROR] = 0, + [_LOAD_BUILD_CLASS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_DELETE_NAME] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_UNPACK_SEQUENCE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_UNPACK_SEQUENCE_TWO_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_TUPLE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_SEQUENCE_LIST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_UNPACK_EX] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_DELETE_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_DELETE_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_LOCALS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_GLOBAL] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_GLOBALS_VERSION] = HAS_DEOPT_FLAG, + [_GUARD_BUILTINS_VERSION] = HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_LOAD_GLOBAL_BUILTINS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_DELETE_FAST] = HAS_ARG_FLAG | HAS_LOCAL_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_MAKE_CELL] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG, + [_DELETE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_FROM_DICT_OR_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_STORE_DEREF] = HAS_ARG_FLAG | HAS_FREE_FLAG | HAS_ESCAPES_FLAG, + [_COPY_FREE_VARS] = HAS_ARG_FLAG, + [_BUILD_STRING] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_BUILD_TUPLE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_BUILD_LIST] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_LIST_EXTEND] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_SET_UPDATE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BUILD_MAP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_SETUP_ANNOTATIONS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_BUILD_CONST_KEY_MAP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_DICT_UPDATE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_DICT_MERGE] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_MAP_ADD] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_SUPER_ATTR_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_SUPER_ATTR_METHOD] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_LOAD_ATTR] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GUARD_TYPE_VERSION] = HAS_EXIT_FLAG, + [_CHECK_MANAGED_OBJECT_HAS_VALUES] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_INSTANCE_VALUE_0] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_INSTANCE_VALUE_1] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_INSTANCE_VALUE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_OPARG_AND_1_FLAG, + [_CHECK_ATTR_MODULE] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_MODULE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CHECK_ATTR_WITH_HINT] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_WITH_HINT] = HAS_ARG_FLAG | HAS_NAME_FLAG | HAS_DEOPT_FLAG, + [_LOAD_ATTR_SLOT_0] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_SLOT_1] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_SLOT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_OPARG_AND_1_FLAG, + [_CHECK_ATTR_CLASS] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_CLASS_0] = 0, + [_LOAD_ATTR_CLASS_1] = 0, + [_LOAD_ATTR_CLASS] = HAS_ARG_FLAG | HAS_OPARG_AND_1_FLAG, + [_GUARD_DORV_NO_DICT] = HAS_DEOPT_FLAG, + [_STORE_ATTR_INSTANCE_VALUE] = 0, + [_STORE_ATTR_SLOT] = 0, + [_COMPARE_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_COMPARE_OP_FLOAT] = HAS_ARG_FLAG, + [_COMPARE_OP_INT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_COMPARE_OP_STR] = HAS_ARG_FLAG, + [_IS_OP] = HAS_ARG_FLAG, + [_CONTAINS_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CONTAINS_OP_SET] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CONTAINS_OP_DICT] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_EG_MATCH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_EXC_MATCH] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_IS_NONE] = 0, + [_GET_LEN] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_MATCH_CLASS] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_MATCH_MAPPING] = 0, + [_MATCH_SEQUENCE] = 0, + [_MATCH_KEYS] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GET_ITER] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_GET_YIELD_FROM_ITER] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_FOR_ITER_TIER_TWO] = HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_ITER_CHECK_LIST] = HAS_EXIT_FLAG, + [_GUARD_NOT_EXHAUSTED_LIST] = HAS_EXIT_FLAG, + [_ITER_NEXT_LIST] = 0, + [_ITER_CHECK_TUPLE] = HAS_EXIT_FLAG, + [_GUARD_NOT_EXHAUSTED_TUPLE] = HAS_EXIT_FLAG, + [_ITER_NEXT_TUPLE] = 0, + [_ITER_CHECK_RANGE] = HAS_EXIT_FLAG, + [_GUARD_NOT_EXHAUSTED_RANGE] = HAS_EXIT_FLAG, + [_ITER_NEXT_RANGE] = HAS_ERROR_FLAG, + [_FOR_ITER_GEN_FRAME] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_WITH_EXCEPT_START] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_PUSH_EXC_INFO] = 0, + [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = HAS_DEOPT_FLAG, + [_GUARD_KEYS_VERSION] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_METHOD_WITH_VALUES] = HAS_ARG_FLAG, + [_LOAD_ATTR_METHOD_NO_DICT] = HAS_ARG_FLAG, + [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = HAS_ARG_FLAG, + [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = HAS_ARG_FLAG, + [_CHECK_ATTR_METHOD_LAZY_DICT] = HAS_DEOPT_FLAG, + [_LOAD_ATTR_METHOD_LAZY_DICT] = HAS_ARG_FLAG, + [_CHECK_PERIODIC] = HAS_EVAL_BREAK_FLAG, + [_PY_FRAME_GENERAL] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_FUNCTION_VERSION] = HAS_ARG_FLAG | HAS_EXIT_FLAG, + [_CHECK_METHOD_VERSION] = HAS_ARG_FLAG | HAS_EXIT_FLAG, + [_EXPAND_METHOD] = HAS_ARG_FLAG, + [_CHECK_IS_NOT_PY_CALLABLE] = HAS_ARG_FLAG | HAS_EXIT_FLAG, + [_CALL_NON_PY_GENERAL] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG, + [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = HAS_ARG_FLAG, + [_CHECK_PEP_523] = HAS_DEOPT_FLAG, + [_CHECK_FUNCTION_EXACT_ARGS] = HAS_ARG_FLAG | HAS_EXIT_FLAG, + [_CHECK_STACK_SPACE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_0] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_1] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_2] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_3] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS_4] = HAS_PURE_FLAG, + [_INIT_CALL_PY_EXACT_ARGS] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_PUSH_FRAME] = 0, + [_CALL_TYPE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG, + [_CALL_STR_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_TUPLE_1] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_EXIT_INIT_CHECK] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_CALL_BUILTIN_CLASS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_BUILTIN_O] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_BUILTIN_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_LEN] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_CALL_ISINSTANCE] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_CALL_METHOD_DESCRIPTOR_O] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_METHOD_DESCRIPTOR_NOARGS] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_CALL_METHOD_DESCRIPTOR_FAST] = HAS_ARG_FLAG | HAS_DEOPT_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_MAKE_FUNCTION] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_SET_FUNCTION_ATTRIBUTE] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_RETURN_GENERATOR] = HAS_ERROR_FLAG | HAS_ERROR_NO_POP_FLAG | HAS_ESCAPES_FLAG, + [_BUILD_SLICE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_CONVERT_VALUE] = HAS_ARG_FLAG | HAS_ERROR_FLAG, + [_FORMAT_SIMPLE] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_FORMAT_WITH_SPEC] = HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_COPY] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_BINARY_OP] = HAS_ARG_FLAG | HAS_ERROR_FLAG | HAS_ESCAPES_FLAG, + [_SWAP] = HAS_ARG_FLAG | HAS_PURE_FLAG, + [_GUARD_IS_TRUE_POP] = HAS_EXIT_FLAG, + [_GUARD_IS_FALSE_POP] = HAS_EXIT_FLAG, + [_GUARD_IS_NONE_POP] = HAS_EXIT_FLAG, + [_GUARD_IS_NOT_NONE_POP] = HAS_EXIT_FLAG, + [_JUMP_TO_TOP] = 0, + [_SET_IP] = 0, + [_CHECK_STACK_SPACE_OPERAND] = HAS_DEOPT_FLAG, + [_SAVE_RETURN_OFFSET] = HAS_ARG_FLAG, + [_EXIT_TRACE] = 0, + [_CHECK_VALIDITY] = HAS_DEOPT_FLAG, + [_LOAD_CONST_INLINE] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_WITH_NULL] = HAS_PURE_FLAG, + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = HAS_PURE_FLAG, + [_CHECK_FUNCTION] = HAS_DEOPT_FLAG, + [_INTERNAL_INCREMENT_OPT_COUNTER] = 0, + [_COLD_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_DYNAMIC_EXIT] = HAS_ARG_FLAG | HAS_ESCAPES_FLAG, + [_START_EXECUTOR] = HAS_DEOPT_FLAG, + [_FATAL_ERROR] = 0, + [_CHECK_VALIDITY_AND_SET_IP] = HAS_DEOPT_FLAG, + [_DEOPT] = 0, + [_ERROR_POP_N] = HAS_ARG_FLAG, + [_TIER2_RESUME_CHECK] = HAS_DEOPT_FLAG, +}; + +const uint8_t _PyUop_Replication[MAX_UOP_ID+1] = { + [_LOAD_FAST] = 8, + [_STORE_FAST] = 8, + [_INIT_CALL_PY_EXACT_ARGS] = 5, +}; + +const char *const _PyOpcode_uop_name[MAX_UOP_ID+1] = { + [_BINARY_OP] = "_BINARY_OP", + [_BINARY_OP_ADD_FLOAT] = "_BINARY_OP_ADD_FLOAT", + [_BINARY_OP_ADD_INT] = "_BINARY_OP_ADD_INT", + [_BINARY_OP_ADD_UNICODE] = "_BINARY_OP_ADD_UNICODE", + [_BINARY_OP_MULTIPLY_FLOAT] = "_BINARY_OP_MULTIPLY_FLOAT", + [_BINARY_OP_MULTIPLY_INT] = "_BINARY_OP_MULTIPLY_INT", + [_BINARY_OP_SUBTRACT_FLOAT] = "_BINARY_OP_SUBTRACT_FLOAT", + [_BINARY_OP_SUBTRACT_INT] = "_BINARY_OP_SUBTRACT_INT", + [_BINARY_SLICE] = "_BINARY_SLICE", + [_BINARY_SUBSCR] = "_BINARY_SUBSCR", + [_BINARY_SUBSCR_DICT] = "_BINARY_SUBSCR_DICT", + [_BINARY_SUBSCR_LIST_INT] = "_BINARY_SUBSCR_LIST_INT", + [_BINARY_SUBSCR_STR_INT] = "_BINARY_SUBSCR_STR_INT", + [_BINARY_SUBSCR_TUPLE_INT] = "_BINARY_SUBSCR_TUPLE_INT", + [_BUILD_CONST_KEY_MAP] = "_BUILD_CONST_KEY_MAP", + [_BUILD_LIST] = "_BUILD_LIST", + [_BUILD_MAP] = "_BUILD_MAP", + [_BUILD_SLICE] = "_BUILD_SLICE", + [_BUILD_STRING] = "_BUILD_STRING", + [_BUILD_TUPLE] = "_BUILD_TUPLE", + [_CALL_BUILTIN_CLASS] = "_CALL_BUILTIN_CLASS", + [_CALL_BUILTIN_FAST] = "_CALL_BUILTIN_FAST", + [_CALL_BUILTIN_FAST_WITH_KEYWORDS] = "_CALL_BUILTIN_FAST_WITH_KEYWORDS", + [_CALL_BUILTIN_O] = "_CALL_BUILTIN_O", + [_CALL_INTRINSIC_1] = "_CALL_INTRINSIC_1", + [_CALL_INTRINSIC_2] = "_CALL_INTRINSIC_2", + [_CALL_ISINSTANCE] = "_CALL_ISINSTANCE", + [_CALL_LEN] = "_CALL_LEN", + [_CALL_METHOD_DESCRIPTOR_FAST] = "_CALL_METHOD_DESCRIPTOR_FAST", + [_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS] = "_CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS", + [_CALL_METHOD_DESCRIPTOR_NOARGS] = "_CALL_METHOD_DESCRIPTOR_NOARGS", + [_CALL_METHOD_DESCRIPTOR_O] = "_CALL_METHOD_DESCRIPTOR_O", + [_CALL_NON_PY_GENERAL] = "_CALL_NON_PY_GENERAL", + [_CALL_STR_1] = "_CALL_STR_1", + [_CALL_TUPLE_1] = "_CALL_TUPLE_1", + [_CALL_TYPE_1] = "_CALL_TYPE_1", + [_CHECK_ATTR_CLASS] = "_CHECK_ATTR_CLASS", + [_CHECK_ATTR_METHOD_LAZY_DICT] = "_CHECK_ATTR_METHOD_LAZY_DICT", + [_CHECK_ATTR_MODULE] = "_CHECK_ATTR_MODULE", + [_CHECK_ATTR_WITH_HINT] = "_CHECK_ATTR_WITH_HINT", + [_CHECK_CALL_BOUND_METHOD_EXACT_ARGS] = "_CHECK_CALL_BOUND_METHOD_EXACT_ARGS", + [_CHECK_EG_MATCH] = "_CHECK_EG_MATCH", + [_CHECK_EXC_MATCH] = "_CHECK_EXC_MATCH", + [_CHECK_FUNCTION] = "_CHECK_FUNCTION", + [_CHECK_FUNCTION_EXACT_ARGS] = "_CHECK_FUNCTION_EXACT_ARGS", + [_CHECK_FUNCTION_VERSION] = "_CHECK_FUNCTION_VERSION", + [_CHECK_IS_NOT_PY_CALLABLE] = "_CHECK_IS_NOT_PY_CALLABLE", + [_CHECK_MANAGED_OBJECT_HAS_VALUES] = "_CHECK_MANAGED_OBJECT_HAS_VALUES", + [_CHECK_METHOD_VERSION] = "_CHECK_METHOD_VERSION", + [_CHECK_PEP_523] = "_CHECK_PEP_523", + [_CHECK_PERIODIC] = "_CHECK_PERIODIC", + [_CHECK_STACK_SPACE] = "_CHECK_STACK_SPACE", + [_CHECK_STACK_SPACE_OPERAND] = "_CHECK_STACK_SPACE_OPERAND", + [_CHECK_VALIDITY] = "_CHECK_VALIDITY", + [_CHECK_VALIDITY_AND_SET_IP] = "_CHECK_VALIDITY_AND_SET_IP", + [_COLD_EXIT] = "_COLD_EXIT", + [_COMPARE_OP] = "_COMPARE_OP", + [_COMPARE_OP_FLOAT] = "_COMPARE_OP_FLOAT", + [_COMPARE_OP_INT] = "_COMPARE_OP_INT", + [_COMPARE_OP_STR] = "_COMPARE_OP_STR", + [_CONTAINS_OP] = "_CONTAINS_OP", + [_CONTAINS_OP_DICT] = "_CONTAINS_OP_DICT", + [_CONTAINS_OP_SET] = "_CONTAINS_OP_SET", + [_CONVERT_VALUE] = "_CONVERT_VALUE", + [_COPY] = "_COPY", + [_COPY_FREE_VARS] = "_COPY_FREE_VARS", + [_DELETE_ATTR] = "_DELETE_ATTR", + [_DELETE_DEREF] = "_DELETE_DEREF", + [_DELETE_FAST] = "_DELETE_FAST", + [_DELETE_GLOBAL] = "_DELETE_GLOBAL", + [_DELETE_NAME] = "_DELETE_NAME", + [_DELETE_SUBSCR] = "_DELETE_SUBSCR", + [_DEOPT] = "_DEOPT", + [_DICT_MERGE] = "_DICT_MERGE", + [_DICT_UPDATE] = "_DICT_UPDATE", + [_DYNAMIC_EXIT] = "_DYNAMIC_EXIT", + [_END_SEND] = "_END_SEND", + [_ERROR_POP_N] = "_ERROR_POP_N", + [_EXIT_INIT_CHECK] = "_EXIT_INIT_CHECK", + [_EXIT_TRACE] = "_EXIT_TRACE", + [_EXPAND_METHOD] = "_EXPAND_METHOD", + [_FATAL_ERROR] = "_FATAL_ERROR", + [_FORMAT_SIMPLE] = "_FORMAT_SIMPLE", + [_FORMAT_WITH_SPEC] = "_FORMAT_WITH_SPEC", + [_FOR_ITER_GEN_FRAME] = "_FOR_ITER_GEN_FRAME", + [_FOR_ITER_TIER_TWO] = "_FOR_ITER_TIER_TWO", + [_GET_AITER] = "_GET_AITER", + [_GET_ANEXT] = "_GET_ANEXT", + [_GET_AWAITABLE] = "_GET_AWAITABLE", + [_GET_ITER] = "_GET_ITER", + [_GET_LEN] = "_GET_LEN", + [_GET_YIELD_FROM_ITER] = "_GET_YIELD_FROM_ITER", + [_GUARD_BOTH_FLOAT] = "_GUARD_BOTH_FLOAT", + [_GUARD_BOTH_INT] = "_GUARD_BOTH_INT", + [_GUARD_BOTH_UNICODE] = "_GUARD_BOTH_UNICODE", + [_GUARD_BUILTINS_VERSION] = "_GUARD_BUILTINS_VERSION", + [_GUARD_DORV_NO_DICT] = "_GUARD_DORV_NO_DICT", + [_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT] = "_GUARD_DORV_VALUES_INST_ATTR_FROM_DICT", + [_GUARD_GLOBALS_VERSION] = "_GUARD_GLOBALS_VERSION", + [_GUARD_IS_FALSE_POP] = "_GUARD_IS_FALSE_POP", + [_GUARD_IS_NONE_POP] = "_GUARD_IS_NONE_POP", + [_GUARD_IS_NOT_NONE_POP] = "_GUARD_IS_NOT_NONE_POP", + [_GUARD_IS_TRUE_POP] = "_GUARD_IS_TRUE_POP", + [_GUARD_KEYS_VERSION] = "_GUARD_KEYS_VERSION", + [_GUARD_NOS_FLOAT] = "_GUARD_NOS_FLOAT", + [_GUARD_NOS_INT] = "_GUARD_NOS_INT", + [_GUARD_NOT_EXHAUSTED_LIST] = "_GUARD_NOT_EXHAUSTED_LIST", + [_GUARD_NOT_EXHAUSTED_RANGE] = "_GUARD_NOT_EXHAUSTED_RANGE", + [_GUARD_NOT_EXHAUSTED_TUPLE] = "_GUARD_NOT_EXHAUSTED_TUPLE", + [_GUARD_TOS_FLOAT] = "_GUARD_TOS_FLOAT", + [_GUARD_TOS_INT] = "_GUARD_TOS_INT", + [_GUARD_TYPE_VERSION] = "_GUARD_TYPE_VERSION", + [_INIT_CALL_BOUND_METHOD_EXACT_ARGS] = "_INIT_CALL_BOUND_METHOD_EXACT_ARGS", + [_INIT_CALL_PY_EXACT_ARGS] = "_INIT_CALL_PY_EXACT_ARGS", + [_INIT_CALL_PY_EXACT_ARGS_0] = "_INIT_CALL_PY_EXACT_ARGS_0", + [_INIT_CALL_PY_EXACT_ARGS_1] = "_INIT_CALL_PY_EXACT_ARGS_1", + [_INIT_CALL_PY_EXACT_ARGS_2] = "_INIT_CALL_PY_EXACT_ARGS_2", + [_INIT_CALL_PY_EXACT_ARGS_3] = "_INIT_CALL_PY_EXACT_ARGS_3", + [_INIT_CALL_PY_EXACT_ARGS_4] = "_INIT_CALL_PY_EXACT_ARGS_4", + [_INTERNAL_INCREMENT_OPT_COUNTER] = "_INTERNAL_INCREMENT_OPT_COUNTER", + [_IS_NONE] = "_IS_NONE", + [_IS_OP] = "_IS_OP", + [_ITER_CHECK_LIST] = "_ITER_CHECK_LIST", + [_ITER_CHECK_RANGE] = "_ITER_CHECK_RANGE", + [_ITER_CHECK_TUPLE] = "_ITER_CHECK_TUPLE", + [_ITER_NEXT_LIST] = "_ITER_NEXT_LIST", + [_ITER_NEXT_RANGE] = "_ITER_NEXT_RANGE", + [_ITER_NEXT_TUPLE] = "_ITER_NEXT_TUPLE", + [_JUMP_TO_TOP] = "_JUMP_TO_TOP", + [_LIST_APPEND] = "_LIST_APPEND", + [_LIST_EXTEND] = "_LIST_EXTEND", + [_LOAD_ASSERTION_ERROR] = "_LOAD_ASSERTION_ERROR", + [_LOAD_ATTR] = "_LOAD_ATTR", + [_LOAD_ATTR_CLASS] = "_LOAD_ATTR_CLASS", + [_LOAD_ATTR_CLASS_0] = "_LOAD_ATTR_CLASS_0", + [_LOAD_ATTR_CLASS_1] = "_LOAD_ATTR_CLASS_1", + [_LOAD_ATTR_INSTANCE_VALUE] = "_LOAD_ATTR_INSTANCE_VALUE", + [_LOAD_ATTR_INSTANCE_VALUE_0] = "_LOAD_ATTR_INSTANCE_VALUE_0", + [_LOAD_ATTR_INSTANCE_VALUE_1] = "_LOAD_ATTR_INSTANCE_VALUE_1", + [_LOAD_ATTR_METHOD_LAZY_DICT] = "_LOAD_ATTR_METHOD_LAZY_DICT", + [_LOAD_ATTR_METHOD_NO_DICT] = "_LOAD_ATTR_METHOD_NO_DICT", + [_LOAD_ATTR_METHOD_WITH_VALUES] = "_LOAD_ATTR_METHOD_WITH_VALUES", + [_LOAD_ATTR_MODULE] = "_LOAD_ATTR_MODULE", + [_LOAD_ATTR_NONDESCRIPTOR_NO_DICT] = "_LOAD_ATTR_NONDESCRIPTOR_NO_DICT", + [_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES] = "_LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES", + [_LOAD_ATTR_SLOT] = "_LOAD_ATTR_SLOT", + [_LOAD_ATTR_SLOT_0] = "_LOAD_ATTR_SLOT_0", + [_LOAD_ATTR_SLOT_1] = "_LOAD_ATTR_SLOT_1", + [_LOAD_ATTR_WITH_HINT] = "_LOAD_ATTR_WITH_HINT", + [_LOAD_BUILD_CLASS] = "_LOAD_BUILD_CLASS", + [_LOAD_CONST] = "_LOAD_CONST", + [_LOAD_CONST_INLINE] = "_LOAD_CONST_INLINE", + [_LOAD_CONST_INLINE_BORROW] = "_LOAD_CONST_INLINE_BORROW", + [_LOAD_CONST_INLINE_BORROW_WITH_NULL] = "_LOAD_CONST_INLINE_BORROW_WITH_NULL", + [_LOAD_CONST_INLINE_WITH_NULL] = "_LOAD_CONST_INLINE_WITH_NULL", + [_LOAD_DEREF] = "_LOAD_DEREF", + [_LOAD_FAST] = "_LOAD_FAST", + [_LOAD_FAST_0] = "_LOAD_FAST_0", + [_LOAD_FAST_1] = "_LOAD_FAST_1", + [_LOAD_FAST_2] = "_LOAD_FAST_2", + [_LOAD_FAST_3] = "_LOAD_FAST_3", + [_LOAD_FAST_4] = "_LOAD_FAST_4", + [_LOAD_FAST_5] = "_LOAD_FAST_5", + [_LOAD_FAST_6] = "_LOAD_FAST_6", + [_LOAD_FAST_7] = "_LOAD_FAST_7", + [_LOAD_FAST_AND_CLEAR] = "_LOAD_FAST_AND_CLEAR", + [_LOAD_FAST_CHECK] = "_LOAD_FAST_CHECK", + [_LOAD_FAST_LOAD_FAST] = "_LOAD_FAST_LOAD_FAST", + [_LOAD_FROM_DICT_OR_DEREF] = "_LOAD_FROM_DICT_OR_DEREF", + [_LOAD_GLOBAL] = "_LOAD_GLOBAL", + [_LOAD_GLOBAL_BUILTINS] = "_LOAD_GLOBAL_BUILTINS", + [_LOAD_GLOBAL_MODULE] = "_LOAD_GLOBAL_MODULE", + [_LOAD_LOCALS] = "_LOAD_LOCALS", + [_LOAD_SUPER_ATTR_ATTR] = "_LOAD_SUPER_ATTR_ATTR", + [_LOAD_SUPER_ATTR_METHOD] = "_LOAD_SUPER_ATTR_METHOD", + [_MAKE_CELL] = "_MAKE_CELL", + [_MAKE_FUNCTION] = "_MAKE_FUNCTION", + [_MAP_ADD] = "_MAP_ADD", + [_MATCH_CLASS] = "_MATCH_CLASS", + [_MATCH_KEYS] = "_MATCH_KEYS", + [_MATCH_MAPPING] = "_MATCH_MAPPING", + [_MATCH_SEQUENCE] = "_MATCH_SEQUENCE", + [_NOP] = "_NOP", + [_POP_EXCEPT] = "_POP_EXCEPT", + [_POP_FRAME] = "_POP_FRAME", + [_POP_TOP] = "_POP_TOP", + [_POP_TOP_LOAD_CONST_INLINE_BORROW] = "_POP_TOP_LOAD_CONST_INLINE_BORROW", + [_PUSH_EXC_INFO] = "_PUSH_EXC_INFO", + [_PUSH_FRAME] = "_PUSH_FRAME", + [_PUSH_NULL] = "_PUSH_NULL", + [_PY_FRAME_GENERAL] = "_PY_FRAME_GENERAL", + [_REPLACE_WITH_TRUE] = "_REPLACE_WITH_TRUE", + [_RESUME_CHECK] = "_RESUME_CHECK", + [_RETURN_GENERATOR] = "_RETURN_GENERATOR", + [_SAVE_RETURN_OFFSET] = "_SAVE_RETURN_OFFSET", + [_SETUP_ANNOTATIONS] = "_SETUP_ANNOTATIONS", + [_SET_ADD] = "_SET_ADD", + [_SET_FUNCTION_ATTRIBUTE] = "_SET_FUNCTION_ATTRIBUTE", + [_SET_IP] = "_SET_IP", + [_SET_UPDATE] = "_SET_UPDATE", + [_START_EXECUTOR] = "_START_EXECUTOR", + [_STORE_ATTR] = "_STORE_ATTR", + [_STORE_ATTR_INSTANCE_VALUE] = "_STORE_ATTR_INSTANCE_VALUE", + [_STORE_ATTR_SLOT] = "_STORE_ATTR_SLOT", + [_STORE_DEREF] = "_STORE_DEREF", + [_STORE_FAST] = "_STORE_FAST", + [_STORE_FAST_0] = "_STORE_FAST_0", + [_STORE_FAST_1] = "_STORE_FAST_1", + [_STORE_FAST_2] = "_STORE_FAST_2", + [_STORE_FAST_3] = "_STORE_FAST_3", + [_STORE_FAST_4] = "_STORE_FAST_4", + [_STORE_FAST_5] = "_STORE_FAST_5", + [_STORE_FAST_6] = "_STORE_FAST_6", + [_STORE_FAST_7] = "_STORE_FAST_7", + [_STORE_FAST_LOAD_FAST] = "_STORE_FAST_LOAD_FAST", + [_STORE_FAST_STORE_FAST] = "_STORE_FAST_STORE_FAST", + [_STORE_GLOBAL] = "_STORE_GLOBAL", + [_STORE_NAME] = "_STORE_NAME", + [_STORE_SLICE] = "_STORE_SLICE", + [_STORE_SUBSCR] = "_STORE_SUBSCR", + [_STORE_SUBSCR_DICT] = "_STORE_SUBSCR_DICT", + [_STORE_SUBSCR_LIST_INT] = "_STORE_SUBSCR_LIST_INT", + [_SWAP] = "_SWAP", + [_TIER2_RESUME_CHECK] = "_TIER2_RESUME_CHECK", + [_TO_BOOL] = "_TO_BOOL", + [_TO_BOOL_BOOL] = "_TO_BOOL_BOOL", + [_TO_BOOL_INT] = "_TO_BOOL_INT", + [_TO_BOOL_LIST] = "_TO_BOOL_LIST", + [_TO_BOOL_NONE] = "_TO_BOOL_NONE", + [_TO_BOOL_STR] = "_TO_BOOL_STR", + [_UNARY_INVERT] = "_UNARY_INVERT", + [_UNARY_NEGATIVE] = "_UNARY_NEGATIVE", + [_UNARY_NOT] = "_UNARY_NOT", + [_UNPACK_EX] = "_UNPACK_EX", + [_UNPACK_SEQUENCE] = "_UNPACK_SEQUENCE", + [_UNPACK_SEQUENCE_LIST] = "_UNPACK_SEQUENCE_LIST", + [_UNPACK_SEQUENCE_TUPLE] = "_UNPACK_SEQUENCE_TUPLE", + [_UNPACK_SEQUENCE_TWO_TUPLE] = "_UNPACK_SEQUENCE_TWO_TUPLE", + [_WITH_EXCEPT_START] = "_WITH_EXCEPT_START", + [_YIELD_VALUE] = "_YIELD_VALUE", +}; +int _PyUop_num_popped(int opcode, int oparg) +{ + switch(opcode) { + case _NOP: + return 0; + case _RESUME_CHECK: + return 0; + case _LOAD_FAST_CHECK: + return 0; + case _LOAD_FAST_0: + return 0; + case _LOAD_FAST_1: + return 0; + case _LOAD_FAST_2: + return 0; + case _LOAD_FAST_3: + return 0; + case _LOAD_FAST_4: + return 0; + case _LOAD_FAST_5: + return 0; + case _LOAD_FAST_6: + return 0; + case _LOAD_FAST_7: + return 0; + case _LOAD_FAST: + return 0; + case _LOAD_FAST_AND_CLEAR: + return 0; + case _LOAD_FAST_LOAD_FAST: + return 0; + case _LOAD_CONST: + return 0; + case _STORE_FAST_0: + return 1; + case _STORE_FAST_1: + return 1; + case _STORE_FAST_2: + return 1; + case _STORE_FAST_3: + return 1; + case _STORE_FAST_4: + return 1; + case _STORE_FAST_5: + return 1; + case _STORE_FAST_6: + return 1; + case _STORE_FAST_7: + return 1; + case _STORE_FAST: + return 1; + case _STORE_FAST_LOAD_FAST: + return 1; + case _STORE_FAST_STORE_FAST: + return 2; + case _POP_TOP: + return 1; + case _PUSH_NULL: + return 0; + case _END_SEND: + return 2; + case _UNARY_NEGATIVE: + return 1; + case _UNARY_NOT: + return 1; + case _TO_BOOL: + return 1; + case _TO_BOOL_BOOL: + return 1; + case _TO_BOOL_INT: + return 1; + case _TO_BOOL_LIST: + return 1; + case _TO_BOOL_NONE: + return 1; + case _TO_BOOL_STR: + return 1; + case _REPLACE_WITH_TRUE: + return 1; + case _UNARY_INVERT: + return 1; + case _GUARD_BOTH_INT: + return 2; + case _GUARD_NOS_INT: + return 2; + case _GUARD_TOS_INT: + return 1; + case _BINARY_OP_MULTIPLY_INT: + return 2; + case _BINARY_OP_ADD_INT: + return 2; + case _BINARY_OP_SUBTRACT_INT: + return 2; + case _GUARD_BOTH_FLOAT: + return 2; + case _GUARD_NOS_FLOAT: + return 2; + case _GUARD_TOS_FLOAT: + return 1; + case _BINARY_OP_MULTIPLY_FLOAT: + return 2; + case _BINARY_OP_ADD_FLOAT: + return 2; + case _BINARY_OP_SUBTRACT_FLOAT: + return 2; + case _GUARD_BOTH_UNICODE: + return 2; + case _BINARY_OP_ADD_UNICODE: + return 2; + case _BINARY_SUBSCR: + return 2; + case _BINARY_SLICE: + return 3; + case _STORE_SLICE: + return 4; + case _BINARY_SUBSCR_LIST_INT: + return 2; + case _BINARY_SUBSCR_STR_INT: + return 2; + case _BINARY_SUBSCR_TUPLE_INT: + return 2; + case _BINARY_SUBSCR_DICT: + return 2; + case _LIST_APPEND: + return 2 + (oparg-1); + case _SET_ADD: + return 2 + (oparg-1); + case _STORE_SUBSCR: + return 3; + case _STORE_SUBSCR_LIST_INT: + return 3; + case _STORE_SUBSCR_DICT: + return 3; + case _DELETE_SUBSCR: + return 2; + case _CALL_INTRINSIC_1: + return 1; + case _CALL_INTRINSIC_2: + return 2; + case _POP_FRAME: + return 1; + case _GET_AITER: + return 1; + case _GET_ANEXT: + return 1; + case _GET_AWAITABLE: + return 1; + case _YIELD_VALUE: + return 1; + case _POP_EXCEPT: + return 1; + case _LOAD_ASSERTION_ERROR: + return 0; + case _LOAD_BUILD_CLASS: + return 0; + case _STORE_NAME: + return 1; + case _DELETE_NAME: + return 0; + case _UNPACK_SEQUENCE: + return 1; + case _UNPACK_SEQUENCE_TWO_TUPLE: + return 1; + case _UNPACK_SEQUENCE_TUPLE: + return 1; + case _UNPACK_SEQUENCE_LIST: + return 1; + case _UNPACK_EX: + return 1; + case _STORE_ATTR: + return 2; + case _DELETE_ATTR: + return 1; + case _STORE_GLOBAL: + return 1; + case _DELETE_GLOBAL: + return 0; + case _LOAD_LOCALS: + return 0; + case _LOAD_GLOBAL: + return 0; + case _GUARD_GLOBALS_VERSION: + return 0; + case _GUARD_BUILTINS_VERSION: + return 0; + case _LOAD_GLOBAL_MODULE: + return 0; + case _LOAD_GLOBAL_BUILTINS: + return 0; + case _DELETE_FAST: + return 0; + case _MAKE_CELL: + return 0; + case _DELETE_DEREF: + return 0; + case _LOAD_FROM_DICT_OR_DEREF: + return 1; + case _LOAD_DEREF: + return 0; + case _STORE_DEREF: + return 1; + case _COPY_FREE_VARS: + return 0; + case _BUILD_STRING: + return oparg; + case _BUILD_TUPLE: + return oparg; + case _BUILD_LIST: + return oparg; + case _LIST_EXTEND: + return 2 + (oparg-1); + case _SET_UPDATE: + return 2 + (oparg-1); + case _BUILD_MAP: + return oparg*2; + case _SETUP_ANNOTATIONS: + return 0; + case _BUILD_CONST_KEY_MAP: + return 1 + oparg; + case _DICT_UPDATE: + return 2 + (oparg - 1); + case _DICT_MERGE: + return 5 + (oparg - 1); + case _MAP_ADD: + return 3 + (oparg - 1); + case _LOAD_SUPER_ATTR_ATTR: + return 3; + case _LOAD_SUPER_ATTR_METHOD: + return 3; + case _LOAD_ATTR: + return 1; + case _GUARD_TYPE_VERSION: + return 1; + case _CHECK_MANAGED_OBJECT_HAS_VALUES: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE_0: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE_1: + return 1; + case _LOAD_ATTR_INSTANCE_VALUE: + return 1; + case _CHECK_ATTR_MODULE: + return 1; + case _LOAD_ATTR_MODULE: + return 1; + case _CHECK_ATTR_WITH_HINT: + return 1; + case _LOAD_ATTR_WITH_HINT: + return 1; + case _LOAD_ATTR_SLOT_0: + return 1; + case _LOAD_ATTR_SLOT_1: + return 1; + case _LOAD_ATTR_SLOT: + return 1; + case _CHECK_ATTR_CLASS: + return 1; + case _LOAD_ATTR_CLASS_0: + return 1; + case _LOAD_ATTR_CLASS_1: + return 1; + case _LOAD_ATTR_CLASS: + return 1; + case _GUARD_DORV_NO_DICT: + return 1; + case _STORE_ATTR_INSTANCE_VALUE: + return 2; + case _STORE_ATTR_SLOT: + return 2; + case _COMPARE_OP: + return 2; + case _COMPARE_OP_FLOAT: + return 2; + case _COMPARE_OP_INT: + return 2; + case _COMPARE_OP_STR: + return 2; + case _IS_OP: + return 2; + case _CONTAINS_OP: + return 2; + case _CONTAINS_OP_SET: + return 2; + case _CONTAINS_OP_DICT: + return 2; + case _CHECK_EG_MATCH: + return 2; + case _CHECK_EXC_MATCH: + return 2; + case _IS_NONE: + return 1; + case _GET_LEN: + return 1; + case _MATCH_CLASS: + return 3; + case _MATCH_MAPPING: + return 1; + case _MATCH_SEQUENCE: + return 1; + case _MATCH_KEYS: + return 2; + case _GET_ITER: + return 1; + case _GET_YIELD_FROM_ITER: + return 1; + case _FOR_ITER_TIER_TWO: + return 1; + case _ITER_CHECK_LIST: + return 1; + case _GUARD_NOT_EXHAUSTED_LIST: + return 1; + case _ITER_NEXT_LIST: + return 1; + case _ITER_CHECK_TUPLE: + return 1; + case _GUARD_NOT_EXHAUSTED_TUPLE: + return 1; + case _ITER_NEXT_TUPLE: + return 1; + case _ITER_CHECK_RANGE: + return 1; + case _GUARD_NOT_EXHAUSTED_RANGE: + return 1; + case _ITER_NEXT_RANGE: + return 1; + case _FOR_ITER_GEN_FRAME: + return 1; + case _WITH_EXCEPT_START: + return 4; + case _PUSH_EXC_INFO: + return 1; + case _GUARD_DORV_VALUES_INST_ATTR_FROM_DICT: + return 1; + case _GUARD_KEYS_VERSION: + return 1; + case _LOAD_ATTR_METHOD_WITH_VALUES: + return 1; + case _LOAD_ATTR_METHOD_NO_DICT: + return 1; + case _LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES: + return 1; + case _LOAD_ATTR_NONDESCRIPTOR_NO_DICT: + return 1; + case _CHECK_ATTR_METHOD_LAZY_DICT: + return 1; + case _LOAD_ATTR_METHOD_LAZY_DICT: + return 1; + case _CHECK_PERIODIC: + return 0; + case _PY_FRAME_GENERAL: + return 2 + oparg; + case _CHECK_FUNCTION_VERSION: + return 2 + oparg; + case _CHECK_METHOD_VERSION: + return 2 + oparg; + case _EXPAND_METHOD: + return 2 + oparg; + case _CHECK_IS_NOT_PY_CALLABLE: + return 2 + oparg; + case _CALL_NON_PY_GENERAL: + return 2 + oparg; + case _CHECK_CALL_BOUND_METHOD_EXACT_ARGS: + return 2 + oparg; + case _INIT_CALL_BOUND_METHOD_EXACT_ARGS: + return 2 + oparg; + case _CHECK_PEP_523: + return 0; + case _CHECK_FUNCTION_EXACT_ARGS: + return 2 + oparg; + case _CHECK_STACK_SPACE: + return 2 + oparg; + case _INIT_CALL_PY_EXACT_ARGS_0: + return 2 + oparg; + case _INIT_CALL_PY_EXACT_ARGS_1: + return 2 + oparg; + case _INIT_CALL_PY_EXACT_ARGS_2: + return 2 + oparg; + case _INIT_CALL_PY_EXACT_ARGS_3: + return 2 + oparg; + case _INIT_CALL_PY_EXACT_ARGS_4: + return 2 + oparg; + case _INIT_CALL_PY_EXACT_ARGS: + return 2 + oparg; + case _PUSH_FRAME: + return 1; + case _CALL_TYPE_1: + return 3; + case _CALL_STR_1: + return 3; + case _CALL_TUPLE_1: + return 3; + case _EXIT_INIT_CHECK: + return 1; + case _CALL_BUILTIN_CLASS: + return 2 + oparg; + case _CALL_BUILTIN_O: + return 2 + oparg; + case _CALL_BUILTIN_FAST: + return 2 + oparg; + case _CALL_BUILTIN_FAST_WITH_KEYWORDS: + return 2 + oparg; + case _CALL_LEN: + return 2 + oparg; + case _CALL_ISINSTANCE: + return 2 + oparg; + case _CALL_METHOD_DESCRIPTOR_O: + return 2 + oparg; + case _CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS: + return 2 + oparg; + case _CALL_METHOD_DESCRIPTOR_NOARGS: + return 2 + oparg; + case _CALL_METHOD_DESCRIPTOR_FAST: + return 2 + oparg; + case _MAKE_FUNCTION: + return 1; + case _SET_FUNCTION_ATTRIBUTE: + return 2; + case _RETURN_GENERATOR: + return 0; + case _BUILD_SLICE: + return 2 + ((oparg == 3) ? 1 : 0); + case _CONVERT_VALUE: + return 1; + case _FORMAT_SIMPLE: + return 1; + case _FORMAT_WITH_SPEC: + return 2; + case _COPY: + return 1 + (oparg-1); + case _BINARY_OP: + return 2; + case _SWAP: + return 2 + (oparg-2); + case _GUARD_IS_TRUE_POP: + return 1; + case _GUARD_IS_FALSE_POP: + return 1; + case _GUARD_IS_NONE_POP: + return 1; + case _GUARD_IS_NOT_NONE_POP: + return 1; + case _JUMP_TO_TOP: + return 0; + case _SET_IP: + return 0; + case _CHECK_STACK_SPACE_OPERAND: + return 0; + case _SAVE_RETURN_OFFSET: + return 0; + case _EXIT_TRACE: + return 0; + case _CHECK_VALIDITY: + return 0; + case _LOAD_CONST_INLINE: + return 0; + case _LOAD_CONST_INLINE_BORROW: + return 0; + case _POP_TOP_LOAD_CONST_INLINE_BORROW: + return 1; + case _LOAD_CONST_INLINE_WITH_NULL: + return 0; + case _LOAD_CONST_INLINE_BORROW_WITH_NULL: + return 0; + case _CHECK_FUNCTION: + return 0; + case _INTERNAL_INCREMENT_OPT_COUNTER: + return 1; + case _COLD_EXIT: + return 0; + case _DYNAMIC_EXIT: + return 0; + case _START_EXECUTOR: + return 0; + case _FATAL_ERROR: + return 0; + case _CHECK_VALIDITY_AND_SET_IP: + return 0; + case _DEOPT: + return 0; + case _ERROR_POP_N: + return oparg; + case _TIER2_RESUME_CHECK: + return 0; + default: + return -1; + } +} + +#endif // NEED_OPCODE_METADATA + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CORE_UOP_METADATA_H */ diff --git a/Include/internal/pycore_warnings.h b/Include/internal/pycore_warnings.h new file mode 100644 index 0000000000000000000000000000000000000000..f9f6559312f4ef670c16a16092c788a0ef428d79 --- /dev/null +++ b/Include/internal/pycore_warnings.h @@ -0,0 +1,31 @@ +#ifndef Py_INTERNAL_WARNINGS_H +#define Py_INTERNAL_WARNINGS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +struct _warnings_runtime_state { + /* Both 'filters' and 'onceregistry' can be set in warnings.py; + get_warnings_attr() will reset these variables accordingly. */ + PyObject *filters; /* List */ + PyObject *once_registry; /* Dict */ + PyObject *default_action; /* String */ + PyMutex mutex; + long filters_version; +}; + +extern int _PyWarnings_InitState(PyInterpreterState *interp); + +extern PyObject* _PyWarnings_Init(void); + +extern void _PyErr_WarnUnawaitedCoroutine(PyObject *coro); +extern void _PyErr_WarnUnawaitedAgenMethod(PyAsyncGenObject *agen, PyObject *method); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_WARNINGS_H */ diff --git a/Include/internal/pycore_weakref.h b/Include/internal/pycore_weakref.h new file mode 100644 index 0000000000000000000000000000000000000000..ff1395ea837dcb832761afb5ef3c1d4fb430bfb3 --- /dev/null +++ b/Include/internal/pycore_weakref.h @@ -0,0 +1,133 @@ +#ifndef Py_INTERNAL_WEAKREF_H +#define Py_INTERNAL_WEAKREF_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_BUILD_CORE +# error "this header requires Py_BUILD_CORE define" +#endif + +#include "pycore_critical_section.h" // Py_BEGIN_CRITICAL_SECTION() +#include "pycore_lock.h" +#include "pycore_object.h" // _Py_REF_IS_MERGED() +#include "pycore_pyatomic_ft_wrappers.h" + +#ifdef Py_GIL_DISABLED + +#define WEAKREF_LIST_LOCK(obj) \ + _PyInterpreterState_GET() \ + ->weakref_locks[((uintptr_t)obj) % NUM_WEAKREF_LIST_LOCKS] + +// Lock using the referenced object +#define LOCK_WEAKREFS(obj) \ + PyMutex_LockFlags(&WEAKREF_LIST_LOCK(obj), _Py_LOCK_DONT_DETACH) +#define UNLOCK_WEAKREFS(obj) PyMutex_Unlock(&WEAKREF_LIST_LOCK(obj)) + +// Lock using a weakref +#define LOCK_WEAKREFS_FOR_WR(wr) \ + PyMutex_LockFlags(wr->weakrefs_lock, _Py_LOCK_DONT_DETACH) +#define UNLOCK_WEAKREFS_FOR_WR(wr) PyMutex_Unlock(wr->weakrefs_lock) + +#define FT_CLEAR_WEAKREFS(obj, weakref_list) \ + do { \ + assert(Py_REFCNT(obj) == 0); \ + PyObject_ClearWeakRefs(obj); \ + } while (0) + +#else + +#define LOCK_WEAKREFS(obj) +#define UNLOCK_WEAKREFS(obj) + +#define LOCK_WEAKREFS_FOR_WR(wr) +#define UNLOCK_WEAKREFS_FOR_WR(wr) + +#define FT_CLEAR_WEAKREFS(obj, weakref_list) \ + do { \ + assert(Py_REFCNT(obj) == 0); \ + if (weakref_list != NULL) { \ + PyObject_ClearWeakRefs(obj); \ + } \ + } while (0) + +#endif + +static inline int _is_dead(PyObject *obj) +{ + // Explanation for the Py_REFCNT() check: when a weakref's target is part + // of a long chain of deallocations which triggers the trashcan mechanism, + // clearing the weakrefs can be delayed long after the target's refcount + // has dropped to zero. In the meantime, code accessing the weakref will + // be able to "see" the target object even though it is supposed to be + // unreachable. See issue gh-60806. +#if defined(Py_GIL_DISABLED) + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&obj->ob_ref_shared); + return shared == _Py_REF_SHARED(0, _Py_REF_MERGED); +#else + return (Py_REFCNT(obj) == 0); +#endif +} + +static inline PyObject* _PyWeakref_GET_REF(PyObject *ref_obj) +{ + assert(PyWeakref_Check(ref_obj)); + PyWeakReference *ref = _Py_CAST(PyWeakReference*, ref_obj); + + PyObject *obj = FT_ATOMIC_LOAD_PTR(ref->wr_object); + if (obj == Py_None) { + // clear_weakref() was called + return NULL; + } + + LOCK_WEAKREFS(obj); +#ifdef Py_GIL_DISABLED + if (ref->wr_object == Py_None) { + // clear_weakref() was called + UNLOCK_WEAKREFS(obj); + return NULL; + } +#endif + if (_Py_TryIncref(obj)) { + UNLOCK_WEAKREFS(obj); + return obj; + } + UNLOCK_WEAKREFS(obj); + return NULL; +} + +static inline int _PyWeakref_IS_DEAD(PyObject *ref_obj) +{ + assert(PyWeakref_Check(ref_obj)); + int ret = 0; + PyWeakReference *ref = _Py_CAST(PyWeakReference*, ref_obj); + PyObject *obj = FT_ATOMIC_LOAD_PTR(ref->wr_object); + if (obj == Py_None) { + // clear_weakref() was called + ret = 1; + } + else { + LOCK_WEAKREFS(obj); + // See _PyWeakref_GET_REF() for the rationale of this test +#ifdef Py_GIL_DISABLED + ret = (ref->wr_object == Py_None) || _is_dead(obj); +#else + ret = _is_dead(obj); +#endif + UNLOCK_WEAKREFS(obj); + } + return ret; +} + +extern Py_ssize_t _PyWeakref_GetWeakrefCount(PyObject *obj); + +// Clear all the weak references to obj but leave their callbacks uncalled and +// intact. +extern void _PyWeakref_ClearWeakRefsNoCallbacks(PyObject *obj); + +PyAPI_FUNC(int) _PyWeakref_IsDead(PyObject *weakref); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTERNAL_WEAKREF_H */ diff --git a/Include/intrcheck.h b/Include/intrcheck.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1feee83de483033212e158aa4a131b2cc17cd8 --- /dev/null +++ b/Include/intrcheck.h @@ -0,0 +1,23 @@ +#ifndef Py_INTRCHECK_H +#define Py_INTRCHECK_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(int) PyOS_InterruptOccurred(void); + +#ifdef HAVE_FORK +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000 +PyAPI_FUNC(void) PyOS_BeforeFork(void); +PyAPI_FUNC(void) PyOS_AfterFork_Parent(void); +PyAPI_FUNC(void) PyOS_AfterFork_Child(void); +#endif +#endif + +/* Deprecated, please use PyOS_AfterFork_Child() instead */ +Py_DEPRECATED(3.7) PyAPI_FUNC(void) PyOS_AfterFork(void); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_INTRCHECK_H */ diff --git a/Include/iterobject.h b/Include/iterobject.h new file mode 100644 index 0000000000000000000000000000000000000000..e69d09719bb4d12be2e22142b35ddaf3b0afb707 --- /dev/null +++ b/Include/iterobject.h @@ -0,0 +1,24 @@ +#ifndef Py_ITEROBJECT_H +#define Py_ITEROBJECT_H +/* Iterators (the basic kind, over a sequence) */ +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PySeqIter_Type; +PyAPI_DATA(PyTypeObject) PyCallIter_Type; + +#define PySeqIter_Check(op) Py_IS_TYPE((op), &PySeqIter_Type) + +PyAPI_FUNC(PyObject *) PySeqIter_New(PyObject *); + + +#define PyCallIter_Check(op) Py_IS_TYPE((op), &PyCallIter_Type) + +PyAPI_FUNC(PyObject *) PyCallIter_New(PyObject *, PyObject *); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ITEROBJECT_H */ + diff --git a/Include/listobject.h b/Include/listobject.h new file mode 100644 index 0000000000000000000000000000000000000000..e1e059b0ba7466c4a6d6fd54e5fe70bd99f4e8c5 --- /dev/null +++ b/Include/listobject.h @@ -0,0 +1,55 @@ +/* List object interface + + Another generally useful object type is a list of object pointers. + This is a mutable type: the list items can be changed, and items can be + added or removed. Out-of-range indices or non-list objects are ignored. + + WARNING: PyList_SetItem does not increment the new item's reference count, + but does decrement the reference count of the item it replaces, if not nil. + It does *decrement* the reference count if it is *not* inserted in the list. + Similarly, PyList_GetItem does not increment the returned item's reference + count. +*/ + +#ifndef Py_LISTOBJECT_H +#define Py_LISTOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyList_Type; +PyAPI_DATA(PyTypeObject) PyListIter_Type; +PyAPI_DATA(PyTypeObject) PyListRevIter_Type; + +#define PyList_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LIST_SUBCLASS) +#define PyList_CheckExact(op) Py_IS_TYPE((op), &PyList_Type) + +PyAPI_FUNC(PyObject *) PyList_New(Py_ssize_t size); +PyAPI_FUNC(Py_ssize_t) PyList_Size(PyObject *); + +PyAPI_FUNC(PyObject *) PyList_GetItem(PyObject *, Py_ssize_t); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(PyObject *) PyList_GetItemRef(PyObject *, Py_ssize_t); +#endif +PyAPI_FUNC(int) PyList_SetItem(PyObject *, Py_ssize_t, PyObject *); +PyAPI_FUNC(int) PyList_Insert(PyObject *, Py_ssize_t, PyObject *); +PyAPI_FUNC(int) PyList_Append(PyObject *, PyObject *); + +PyAPI_FUNC(PyObject *) PyList_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); +PyAPI_FUNC(int) PyList_SetSlice(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); + +PyAPI_FUNC(int) PyList_Sort(PyObject *); +PyAPI_FUNC(int) PyList_Reverse(PyObject *); +PyAPI_FUNC(PyObject *) PyList_AsTuple(PyObject *); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_LISTOBJECT_H +# include "cpython/listobject.h" +# undef Py_CPYTHON_LISTOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LISTOBJECT_H */ diff --git a/Include/lock.h b/Include/lock.h new file mode 100644 index 0000000000000000000000000000000000000000..782b9dbc70d056994c6bbd90d6a5606ef57369d2 --- /dev/null +++ b/Include/lock.h @@ -0,0 +1,16 @@ +#ifndef Py_LOCK_H +#define Py_LOCK_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_LOCK_H +# include "cpython/lock.h" +# undef Py_CPYTHON_LOCK_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LOCK_H */ diff --git a/Include/longobject.h b/Include/longobject.h new file mode 100644 index 0000000000000000000000000000000000000000..19104cd9d1bef9708e7d5e17bfc11dc5bab254bf --- /dev/null +++ b/Include/longobject.h @@ -0,0 +1,114 @@ +#ifndef Py_LONGOBJECT_H +#define Py_LONGOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Long (arbitrary precision) integer object interface */ + +// PyLong_Type is declared by object.h + +#define PyLong_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_LONG_SUBCLASS) +#define PyLong_CheckExact(op) Py_IS_TYPE((op), &PyLong_Type) + +PyAPI_FUNC(PyObject *) PyLong_FromLong(long); +PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLong(unsigned long); +PyAPI_FUNC(PyObject *) PyLong_FromSize_t(size_t); +PyAPI_FUNC(PyObject *) PyLong_FromSsize_t(Py_ssize_t); +PyAPI_FUNC(PyObject *) PyLong_FromDouble(double); + +PyAPI_FUNC(long) PyLong_AsLong(PyObject *); +PyAPI_FUNC(long) PyLong_AsLongAndOverflow(PyObject *, int *); +PyAPI_FUNC(Py_ssize_t) PyLong_AsSsize_t(PyObject *); +PyAPI_FUNC(size_t) PyLong_AsSize_t(PyObject *); +PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLong(PyObject *); +PyAPI_FUNC(unsigned long) PyLong_AsUnsignedLongMask(PyObject *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(int) PyLong_AsInt(PyObject *); +#endif + +PyAPI_FUNC(PyObject *) PyLong_GetInfo(void); + +/* It may be useful in the future. I've added it in the PyInt -> PyLong + cleanup to keep the extra information. [CH] */ +#define PyLong_AS_LONG(op) PyLong_AsLong(op) + +/* Issue #1983: pid_t can be longer than a C long on some systems */ +#if !defined(SIZEOF_PID_T) || SIZEOF_PID_T == SIZEOF_INT +#define _Py_PARSE_PID "i" +#define PyLong_FromPid PyLong_FromLong +# if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +# define PyLong_AsPid PyLong_AsInt +# elif SIZEOF_INT == SIZEOF_LONG +# define PyLong_AsPid PyLong_AsLong +# else +static inline int +PyLong_AsPid(PyObject *obj) +{ + int overflow; + long result = PyLong_AsLongAndOverflow(obj, &overflow); + if (overflow || result > INT_MAX || result < INT_MIN) { + PyErr_SetString(PyExc_OverflowError, + "Python int too large to convert to C int"); + return -1; + } + return (int)result; +} +# endif +#elif SIZEOF_PID_T == SIZEOF_LONG +#define _Py_PARSE_PID "l" +#define PyLong_FromPid PyLong_FromLong +#define PyLong_AsPid PyLong_AsLong +#elif defined(SIZEOF_LONG_LONG) && SIZEOF_PID_T == SIZEOF_LONG_LONG +#define _Py_PARSE_PID "L" +#define PyLong_FromPid PyLong_FromLongLong +#define PyLong_AsPid PyLong_AsLongLong +#else +#error "sizeof(pid_t) is neither sizeof(int), sizeof(long) or sizeof(long long)" +#endif /* SIZEOF_PID_T */ + +#if SIZEOF_VOID_P == SIZEOF_INT +# define _Py_PARSE_INTPTR "i" +# define _Py_PARSE_UINTPTR "I" +#elif SIZEOF_VOID_P == SIZEOF_LONG +# define _Py_PARSE_INTPTR "l" +# define _Py_PARSE_UINTPTR "k" +#elif defined(SIZEOF_LONG_LONG) && SIZEOF_VOID_P == SIZEOF_LONG_LONG +# define _Py_PARSE_INTPTR "L" +# define _Py_PARSE_UINTPTR "K" +#else +# error "void* different in size from int, long and long long" +#endif /* SIZEOF_VOID_P */ + +PyAPI_FUNC(double) PyLong_AsDouble(PyObject *); +PyAPI_FUNC(PyObject *) PyLong_FromVoidPtr(void *); +PyAPI_FUNC(void *) PyLong_AsVoidPtr(PyObject *); + +PyAPI_FUNC(PyObject *) PyLong_FromLongLong(long long); +PyAPI_FUNC(PyObject *) PyLong_FromUnsignedLongLong(unsigned long long); +PyAPI_FUNC(long long) PyLong_AsLongLong(PyObject *); +PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLong(PyObject *); +PyAPI_FUNC(unsigned long long) PyLong_AsUnsignedLongLongMask(PyObject *); +PyAPI_FUNC(long long) PyLong_AsLongLongAndOverflow(PyObject *, int *); + +PyAPI_FUNC(PyObject *) PyLong_FromString(const char *, char **, int); + +/* These aren't really part of the int object, but they're handy. The + functions are in Python/mystrtoul.c. + */ +PyAPI_FUNC(unsigned long) PyOS_strtoul(const char *, char **, int); +PyAPI_FUNC(long) PyOS_strtol(const char *, char **, int); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_LONGOBJECT_H +# include "cpython/longobject.h" +# undef Py_CPYTHON_LONGOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_LONGOBJECT_H */ diff --git a/Include/marshal.h b/Include/marshal.h new file mode 100644 index 0000000000000000000000000000000000000000..f8b0de80cfc38df628992918fc1e6cd19cdf4a6b --- /dev/null +++ b/Include/marshal.h @@ -0,0 +1,31 @@ + +/* Interface for marshal.c */ + +#ifndef Py_MARSHAL_H +#define Py_MARSHAL_H +#ifndef Py_LIMITED_API + +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(PyObject *) PyMarshal_ReadObjectFromString(const char *, + Py_ssize_t); +PyAPI_FUNC(PyObject *) PyMarshal_WriteObjectToString(PyObject *, int); + +#define Py_MARSHAL_VERSION 4 + +PyAPI_FUNC(long) PyMarshal_ReadLongFromFile(FILE *); +PyAPI_FUNC(int) PyMarshal_ReadShortFromFile(FILE *); +PyAPI_FUNC(PyObject *) PyMarshal_ReadObjectFromFile(FILE *); +PyAPI_FUNC(PyObject *) PyMarshal_ReadLastObjectFromFile(FILE *); + +PyAPI_FUNC(void) PyMarshal_WriteLongToFile(long, FILE *, int); +PyAPI_FUNC(void) PyMarshal_WriteObjectToFile(PyObject *, FILE *, int); + +#ifdef __cplusplus +} +#endif + +#endif /* Py_LIMITED_API */ +#endif /* !Py_MARSHAL_H */ diff --git a/Include/memoryobject.h b/Include/memoryobject.h new file mode 100644 index 0000000000000000000000000000000000000000..2c9146aa2b5b06ee2e5c54ec03f180a970e33a72 --- /dev/null +++ b/Include/memoryobject.h @@ -0,0 +1,34 @@ +/* Memory view object. In Python this is available as "memoryview". */ + +#ifndef Py_MEMORYOBJECT_H +#define Py_MEMORYOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyMemoryView_Type; + +#define PyMemoryView_Check(op) Py_IS_TYPE((op), &PyMemoryView_Type) + +PyAPI_FUNC(PyObject *) PyMemoryView_FromObject(PyObject *base); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject *) PyMemoryView_FromMemory(char *mem, Py_ssize_t size, + int flags); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030b0000 +PyAPI_FUNC(PyObject *) PyMemoryView_FromBuffer(const Py_buffer *info); +#endif +PyAPI_FUNC(PyObject *) PyMemoryView_GetContiguous(PyObject *base, + int buffertype, + char order); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_MEMORYOBJECT_H +# include "cpython/memoryobject.h" +# undef Py_CPYTHON_MEMORYOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MEMORYOBJECT_H */ diff --git a/Include/methodobject.h b/Include/methodobject.h new file mode 100644 index 0000000000000000000000000000000000000000..39272815b127f4e8c394cc95ce6aa17464f5b962 --- /dev/null +++ b/Include/methodobject.h @@ -0,0 +1,137 @@ + +/* Method object interface */ + +#ifndef Py_METHODOBJECT_H +#define Py_METHODOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* This is about the type 'builtin_function_or_method', + not Python methods in user-defined classes. See classobject.h + for the latter. */ + +PyAPI_DATA(PyTypeObject) PyCFunction_Type; + +#define PyCFunction_CheckExact(op) Py_IS_TYPE((op), &PyCFunction_Type) +#define PyCFunction_Check(op) PyObject_TypeCheck((op), &PyCFunction_Type) + +typedef PyObject *(*PyCFunction)(PyObject *, PyObject *); +typedef PyObject *(*PyCFunctionFast) (PyObject *, PyObject *const *, Py_ssize_t); +typedef PyObject *(*PyCFunctionWithKeywords)(PyObject *, PyObject *, + PyObject *); +typedef PyObject *(*PyCFunctionFastWithKeywords) (PyObject *, + PyObject *const *, Py_ssize_t, + PyObject *); +typedef PyObject *(*PyCMethod)(PyObject *, PyTypeObject *, PyObject *const *, + size_t, PyObject *); + +// For backwards compatibility. `METH_FASTCALL` was added to the stable API in +// 3.10 alongside `_PyCFunctionFastWithKeywords` and `_PyCFunctionFast`. +// Note that the underscore-prefixed names were documented in public docs; +// people may be using them. +typedef PyCFunctionFast _PyCFunctionFast; +typedef PyCFunctionFastWithKeywords _PyCFunctionFastWithKeywords; + +// Cast an function to the PyCFunction type to use it with PyMethodDef. +// +// This macro can be used to prevent compiler warnings if the first parameter +// uses a different pointer type than PyObject* (ex: METH_VARARGS and METH_O +// calling conventions). +// +// The macro can also be used for METH_FASTCALL and METH_VARARGS|METH_KEYWORDS +// calling conventions to avoid compiler warnings because the function has more +// than 2 parameters. The macro first casts the function to the +// "void func(void)" type to prevent compiler warnings. +// +// If a function is declared with the METH_NOARGS calling convention, it must +// have 2 parameters. Since the second parameter is unused, Py_UNUSED() can be +// used to prevent a compiler warning. If the function has a single parameter, +// it triggers an undefined behavior when Python calls it with 2 parameters +// (bpo-33012). +#define _PyCFunction_CAST(func) \ + _Py_CAST(PyCFunction, _Py_CAST(void(*)(void), (func))) + +PyAPI_FUNC(PyCFunction) PyCFunction_GetFunction(PyObject *); +PyAPI_FUNC(PyObject *) PyCFunction_GetSelf(PyObject *); +PyAPI_FUNC(int) PyCFunction_GetFlags(PyObject *); + +struct PyMethodDef { + const char *ml_name; /* The name of the built-in function/method */ + PyCFunction ml_meth; /* The C function that implements it */ + int ml_flags; /* Combination of METH_xxx flags, which mostly + describe the args expected by the C func */ + const char *ml_doc; /* The __doc__ attribute, or NULL */ +}; + +/* PyCFunction_New is declared as a function for stable ABI (declaration is + * needed for e.g. GCC with -fvisibility=hidden), but redefined as a macro + * that calls PyCFunction_NewEx. */ +PyAPI_FUNC(PyObject *) PyCFunction_New(PyMethodDef *, PyObject *); +#define PyCFunction_New(ML, SELF) PyCFunction_NewEx((ML), (SELF), NULL) + +/* PyCFunction_NewEx is similar: on 3.9+, this calls PyCMethod_New. */ +PyAPI_FUNC(PyObject *) PyCFunction_NewEx(PyMethodDef *, PyObject *, + PyObject *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +#define PyCFunction_NewEx(ML, SELF, MOD) PyCMethod_New((ML), (SELF), (MOD), NULL) +PyAPI_FUNC(PyObject *) PyCMethod_New(PyMethodDef *, PyObject *, + PyObject *, PyTypeObject *); +#endif + + +/* Flag passed to newmethodobject */ +/* #define METH_OLDARGS 0x0000 -- unsupported now */ +#define METH_VARARGS 0x0001 +#define METH_KEYWORDS 0x0002 +/* METH_NOARGS and METH_O must not be combined with the flags above. */ +#define METH_NOARGS 0x0004 +#define METH_O 0x0008 + +/* METH_CLASS and METH_STATIC are a little different; these control + the construction of methods for a class. These cannot be used for + functions in modules. */ +#define METH_CLASS 0x0010 +#define METH_STATIC 0x0020 + +/* METH_COEXIST allows a method to be entered even though a slot has + already filled the entry. When defined, the flag allows a separate + method, "__contains__" for example, to coexist with a defined + slot like sq_contains. */ + +#define METH_COEXIST 0x0040 + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030a0000 +# define METH_FASTCALL 0x0080 +#endif + +/* This bit is preserved for Stackless Python */ +#ifdef STACKLESS +# define METH_STACKLESS 0x0100 +#else +# define METH_STACKLESS 0x0000 +#endif + +/* METH_METHOD means the function stores an + * additional reference to the class that defines it; + * both self and class are passed to it. + * It uses PyCMethodObject instead of PyCFunctionObject. + * May not be combined with METH_NOARGS, METH_O, METH_CLASS or METH_STATIC. + */ + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +#define METH_METHOD 0x0200 +#endif + + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_METHODOBJECT_H +# include "cpython/methodobject.h" +# undef Py_CPYTHON_METHODOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_METHODOBJECT_H */ diff --git a/Include/modsupport.h b/Include/modsupport.h new file mode 100644 index 0000000000000000000000000000000000000000..af995f567b004c9a288f57118e16dab74be47878 --- /dev/null +++ b/Include/modsupport.h @@ -0,0 +1,146 @@ +// Module support interface + +#ifndef Py_MODSUPPORT_H +#define Py_MODSUPPORT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(int) PyArg_Parse(PyObject *, const char *, ...); +PyAPI_FUNC(int) PyArg_ParseTuple(PyObject *, const char *, ...); +PyAPI_FUNC(int) PyArg_ParseTupleAndKeywords(PyObject *, PyObject *, + const char *, PY_CXX_CONST char * const *, ...); +PyAPI_FUNC(int) PyArg_VaParse(PyObject *, const char *, va_list); +PyAPI_FUNC(int) PyArg_VaParseTupleAndKeywords(PyObject *, PyObject *, + const char *, PY_CXX_CONST char * const *, va_list); + +PyAPI_FUNC(int) PyArg_ValidateKeywordArguments(PyObject *); +PyAPI_FUNC(int) PyArg_UnpackTuple(PyObject *, const char *, Py_ssize_t, Py_ssize_t, ...); +PyAPI_FUNC(PyObject *) Py_BuildValue(const char *, ...); +PyAPI_FUNC(PyObject *) Py_VaBuildValue(const char *, va_list); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030a0000 +// Add an attribute with name 'name' and value 'obj' to the module 'mod. +// On success, return 0. +// On error, raise an exception and return -1. +PyAPI_FUNC(int) PyModule_AddObjectRef(PyObject *mod, const char *name, PyObject *value); +#endif /* Py_LIMITED_API */ + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +// Similar to PyModule_AddObjectRef() but steal a reference to 'value'. +PyAPI_FUNC(int) PyModule_Add(PyObject *mod, const char *name, PyObject *value); +#endif /* Py_LIMITED_API */ + +// Similar to PyModule_AddObjectRef() and PyModule_Add() but steal +// a reference to 'value' on success and only on success. +// Errorprone. Should not be used in new code. +PyAPI_FUNC(int) PyModule_AddObject(PyObject *mod, const char *, PyObject *value); + +PyAPI_FUNC(int) PyModule_AddIntConstant(PyObject *, const char *, long); +PyAPI_FUNC(int) PyModule_AddStringConstant(PyObject *, const char *, const char *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +/* New in 3.9 */ +PyAPI_FUNC(int) PyModule_AddType(PyObject *module, PyTypeObject *type); +#endif /* Py_LIMITED_API */ + +#define PyModule_AddIntMacro(m, c) PyModule_AddIntConstant((m), #c, (c)) +#define PyModule_AddStringMacro(m, c) PyModule_AddStringConstant((m), #c, (c)) + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* New in 3.5 */ +PyAPI_FUNC(int) PyModule_SetDocString(PyObject *, const char *); +PyAPI_FUNC(int) PyModule_AddFunctions(PyObject *, PyMethodDef *); +PyAPI_FUNC(int) PyModule_ExecDef(PyObject *module, PyModuleDef *def); +#endif + +#define Py_CLEANUP_SUPPORTED 0x20000 + +#define PYTHON_API_VERSION 1013 +#define PYTHON_API_STRING "1013" +/* The API version is maintained (independently from the Python version) + so we can detect mismatches between the interpreter and dynamically + loaded modules. These are diagnosed by an error message but + the module is still loaded (because the mismatch can only be tested + after loading the module). The error message is intended to + explain the core dump a few seconds later. + + The symbol PYTHON_API_STRING defines the same value as a string + literal. *** PLEASE MAKE SURE THE DEFINITIONS MATCH. *** + + Please add a line or two to the top of this log for each API + version change: + + 22-Feb-2006 MvL 1013 PEP 353 - long indices for sequence lengths + + 19-Aug-2002 GvR 1012 Changes to string object struct for + interning changes, saving 3 bytes. + + 17-Jul-2001 GvR 1011 Descr-branch, just to be on the safe side + + 25-Jan-2001 FLD 1010 Parameters added to PyCode_New() and + PyFrame_New(); Python 2.1a2 + + 14-Mar-2000 GvR 1009 Unicode API added + + 3-Jan-1999 GvR 1007 Decided to change back! (Don't reuse 1008!) + + 3-Dec-1998 GvR 1008 Python 1.5.2b1 + + 18-Jan-1997 GvR 1007 string interning and other speedups + + 11-Oct-1996 GvR renamed Py_Ellipses to Py_Ellipsis :-( + + 30-Jul-1996 GvR Slice and ellipses syntax added + + 23-Jul-1996 GvR For 1.4 -- better safe than sorry this time :-) + + 7-Nov-1995 GvR Keyword arguments (should've been done at 1.3 :-( ) + + 10-Jan-1995 GvR Renamed globals to new naming scheme + + 9-Jan-1995 GvR Initial version (incompatible with older API) +*/ + +/* The PYTHON_ABI_VERSION is introduced in PEP 384. For the lifetime of + Python 3, it will stay at the value of 3; changes to the limited API + must be performed in a strictly backwards-compatible manner. */ +#define PYTHON_ABI_VERSION 3 +#define PYTHON_ABI_STRING "3" + +PyAPI_FUNC(PyObject *) PyModule_Create2(PyModuleDef*, int apiver); + +#ifdef Py_LIMITED_API +#define PyModule_Create(module) \ + PyModule_Create2((module), PYTHON_ABI_VERSION) +#else +#define PyModule_Create(module) \ + PyModule_Create2((module), PYTHON_API_VERSION) +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* New in 3.5 */ +PyAPI_FUNC(PyObject *) PyModule_FromDefAndSpec2(PyModuleDef *def, + PyObject *spec, + int module_api_version); + +#ifdef Py_LIMITED_API +#define PyModule_FromDefAndSpec(module, spec) \ + PyModule_FromDefAndSpec2((module), (spec), PYTHON_ABI_VERSION) +#else +#define PyModule_FromDefAndSpec(module, spec) \ + PyModule_FromDefAndSpec2((module), (spec), PYTHON_API_VERSION) +#endif /* Py_LIMITED_API */ + +#endif /* New in 3.5 */ + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_MODSUPPORT_H +# include "cpython/modsupport.h" +# undef Py_CPYTHON_MODSUPPORT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MODSUPPORT_H */ diff --git a/Include/moduleobject.h b/Include/moduleobject.h new file mode 100644 index 0000000000000000000000000000000000000000..2a17c891dda811d4c407c7f2abe44ce89db7c5d8 --- /dev/null +++ b/Include/moduleobject.h @@ -0,0 +1,122 @@ + +/* Module object interface */ + +#ifndef Py_MODULEOBJECT_H +#define Py_MODULEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyModule_Type; + +#define PyModule_Check(op) PyObject_TypeCheck((op), &PyModule_Type) +#define PyModule_CheckExact(op) Py_IS_TYPE((op), &PyModule_Type) + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject *) PyModule_NewObject( + PyObject *name + ); +#endif +PyAPI_FUNC(PyObject *) PyModule_New( + const char *name /* UTF-8 encoded string */ + ); +PyAPI_FUNC(PyObject *) PyModule_GetDict(PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject *) PyModule_GetNameObject(PyObject *); +#endif +PyAPI_FUNC(const char *) PyModule_GetName(PyObject *); +Py_DEPRECATED(3.2) PyAPI_FUNC(const char *) PyModule_GetFilename(PyObject *); +PyAPI_FUNC(PyObject *) PyModule_GetFilenameObject(PyObject *); +PyAPI_FUNC(PyModuleDef*) PyModule_GetDef(PyObject*); +PyAPI_FUNC(void*) PyModule_GetState(PyObject*); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* New in 3.5 */ +PyAPI_FUNC(PyObject *) PyModuleDef_Init(PyModuleDef*); +PyAPI_DATA(PyTypeObject) PyModuleDef_Type; +#endif + +typedef struct PyModuleDef_Base { + PyObject_HEAD + /* The function used to re-initialize the module. + This is only set for legacy (single-phase init) extension modules + and only used for those that support multiple initializations + (m_size >= 0). + It is set by _PyImport_LoadDynamicModuleWithSpec() + and _imp.create_builtin(). */ + PyObject* (*m_init)(void); + /* The module's index into its interpreter's modules_by_index cache. + This is set for all extension modules but only used for legacy ones. + (See PyInterpreterState.modules_by_index for more info.) + It is set by PyModuleDef_Init(). */ + Py_ssize_t m_index; + /* A copy of the module's __dict__ after the first time it was loaded. + This is only set/used for legacy modules that do not support + multiple initializations. + It is set by fix_up_extension() in import.c. */ + PyObject* m_copy; +} PyModuleDef_Base; + +#define PyModuleDef_HEAD_INIT { \ + PyObject_HEAD_INIT(_Py_NULL) \ + _Py_NULL, /* m_init */ \ + 0, /* m_index */ \ + _Py_NULL, /* m_copy */ \ + } + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* New in 3.5 */ +struct PyModuleDef_Slot { + int slot; + void *value; +}; + +#define Py_mod_create 1 +#define Py_mod_exec 2 +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030c0000 +# define Py_mod_multiple_interpreters 3 +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +# define Py_mod_gil 4 +#endif + + +#ifndef Py_LIMITED_API +#define _Py_mod_LAST_SLOT 4 +#endif + +#endif /* New in 3.5 */ + +/* for Py_mod_multiple_interpreters: */ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030c0000 +# define Py_MOD_MULTIPLE_INTERPRETERS_NOT_SUPPORTED ((void *)0) +# define Py_MOD_MULTIPLE_INTERPRETERS_SUPPORTED ((void *)1) +# define Py_MOD_PER_INTERPRETER_GIL_SUPPORTED ((void *)2) +#endif + +/* for Py_mod_gil: */ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +# define Py_MOD_GIL_USED ((void *)0) +# define Py_MOD_GIL_NOT_USED ((void *)1) +#endif + +#if !defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) +PyAPI_FUNC(int) PyUnstable_Module_SetGIL(PyObject *module, void *gil); +#endif + +struct PyModuleDef { + PyModuleDef_Base m_base; + const char* m_name; + const char* m_doc; + Py_ssize_t m_size; + PyMethodDef *m_methods; + PyModuleDef_Slot *m_slots; + traverseproc m_traverse; + inquiry m_clear; + freefunc m_free; +}; + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MODULEOBJECT_H */ diff --git a/Include/monitoring.h b/Include/monitoring.h new file mode 100644 index 0000000000000000000000000000000000000000..985f7f230e44e3df89ff53efbef604ca298f09f0 --- /dev/null +++ b/Include/monitoring.h @@ -0,0 +1,18 @@ +#ifndef Py_MONITORING_H +#define Py_MONITORING_H +#ifdef __cplusplus +extern "C" { +#endif + +// There is currently no limited API for monitoring + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_MONITORING_H +# include "cpython/monitoring.h" +# undef Py_CPYTHON_MONITORING_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_MONITORING_H */ diff --git a/Include/object.h b/Include/object.h new file mode 100644 index 0000000000000000000000000000000000000000..0266d25230a2b423c5f5034386188a8676e2a510 --- /dev/null +++ b/Include/object.h @@ -0,0 +1,1275 @@ +#ifndef Py_OBJECT_H +#define Py_OBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Object and type object interface */ + +/* +Objects are structures allocated on the heap. Special rules apply to +the use of objects to ensure they are properly garbage-collected. +Objects are never allocated statically or on the stack; they must be +accessed through special macros and functions only. (Type objects are +exceptions to the first rule; the standard types are represented by +statically initialized type objects, although work on type/class unification +for Python 2.2 made it possible to have heap-allocated type objects too). + +An object has a 'reference count' that is increased or decreased when a +pointer to the object is copied or deleted; when the reference count +reaches zero there are no references to the object left and it can be +removed from the heap. + +An object has a 'type' that determines what it represents and what kind +of data it contains. An object's type is fixed when it is created. +Types themselves are represented as objects; an object contains a +pointer to the corresponding type object. The type itself has a type +pointer pointing to the object representing the type 'type', which +contains a pointer to itself!. + +Objects do not float around in memory; once allocated an object keeps +the same size and address. Objects that must hold variable-size data +can contain pointers to variable-size parts of the object. Not all +objects of the same type have the same size; but the size cannot change +after allocation. (These restrictions are made so a reference to an +object can be simply a pointer -- moving an object would require +updating all the pointers, and changing an object's size would require +moving it if there was another object right next to it.) + +Objects are always accessed through pointers of the type 'PyObject *'. +The type 'PyObject' is a structure that only contains the reference count +and the type pointer. The actual memory allocated for an object +contains other data that can only be accessed after casting the pointer +to a pointer to a longer structure type. This longer type must start +with the reference count and type fields; the macro PyObject_HEAD should be +used for this (to accommodate for future changes). The implementation +of a particular object type can cast the object pointer to the proper +type and back. + +A standard interface exists for objects that contain an array of items +whose size is determined when the object is allocated. +*/ + +/* Py_DEBUG implies Py_REF_DEBUG. */ +#if defined(Py_DEBUG) && !defined(Py_REF_DEBUG) +# define Py_REF_DEBUG +#endif + +/* PyObject_HEAD defines the initial segment of every PyObject. */ +#define PyObject_HEAD PyObject ob_base; + +/* +Immortalization: + +The following indicates the immortalization strategy depending on the amount +of available bits in the reference count field. All strategies are backwards +compatible but the specific reference count value or immortalization check +might change depending on the specializations for the underlying system. + +Proper deallocation of immortal instances requires distinguishing between +statically allocated immortal instances vs those promoted by the runtime to be +immortal. The latter should be the only instances that require +cleanup during runtime finalization. +*/ + +#if SIZEOF_VOID_P > 4 +/* +In 64+ bit systems, an object will be marked as immortal by setting all of the +lower 32 bits of the reference count field, which is equal to: 0xFFFFFFFF + +Using the lower 32 bits makes the value backwards compatible by allowing +C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely +increase and decrease the objects reference count. The object would lose its +immortality, but the execution would still be correct. + +Reference count increases will use saturated arithmetic, taking advantage of +having all the lower 32 bits set, which will avoid the reference count to go +beyond the refcount limit. Immortality checks for reference count decreases will +be done by checking the bit sign flag in the lower 32 bits. +*/ +#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX) + +#else +/* +In 32 bit systems, an object will be marked as immortal by setting all of the +lower 30 bits of the reference count field, which is equal to: 0x3FFFFFFF + +Using the lower 30 bits makes the value backwards compatible by allowing +C-Extensions without the updated checks in Py_INCREF and Py_DECREF to safely +increase and decrease the objects reference count. The object would lose its +immortality, but the execution would still be correct. + +Reference count increases and decreases will first go through an immortality +check by comparing the reference count field to the immortality reference count. +*/ +#define _Py_IMMORTAL_REFCNT _Py_CAST(Py_ssize_t, UINT_MAX >> 2) +#endif + +// Py_GIL_DISABLED builds indicate immortal objects using `ob_ref_local`, which is +// always 32-bits. +#ifdef Py_GIL_DISABLED +#define _Py_IMMORTAL_REFCNT_LOCAL UINT32_MAX +#endif + +// Kept for backward compatibility. It was needed by Py_TRACE_REFS build. +#define _PyObject_EXTRA_INIT + +/* Make all uses of PyObject_HEAD_INIT immortal. + * + * Statically allocated objects might be shared between + * interpreters, so must be marked as immortal. + */ +#if defined(Py_GIL_DISABLED) +#define PyObject_HEAD_INIT(type) \ + { \ + 0, \ + 0, \ + { 0 }, \ + 0, \ + _Py_IMMORTAL_REFCNT_LOCAL, \ + 0, \ + (type), \ + }, +#else +#define PyObject_HEAD_INIT(type) \ + { \ + { _Py_IMMORTAL_REFCNT }, \ + (type) \ + }, +#endif + +#define PyVarObject_HEAD_INIT(type, size) \ + { \ + PyObject_HEAD_INIT(type) \ + (size) \ + }, + +/* PyObject_VAR_HEAD defines the initial segment of all variable-size + * container objects. These end with a declaration of an array with 1 + * element, but enough space is malloc'ed so that the array actually + * has room for ob_size elements. Note that ob_size is an element count, + * not necessarily a byte count. + */ +#define PyObject_VAR_HEAD PyVarObject ob_base; +#define Py_INVALID_SIZE (Py_ssize_t)-1 + +/* Nothing is actually declared to be a PyObject, but every pointer to + * a Python object can be cast to a PyObject*. This is inheritance built + * by hand. Similarly every pointer to a variable-size Python object can, + * in addition, be cast to PyVarObject*. + */ +#ifndef Py_GIL_DISABLED +struct _object { +#if (defined(__GNUC__) || defined(__clang__)) \ + && !(defined __STDC_VERSION__ && __STDC_VERSION__ >= 201112L) + // On C99 and older, anonymous union is a GCC and clang extension + __extension__ +#endif +#ifdef _MSC_VER + // Ignore MSC warning C4201: "nonstandard extension used: + // nameless struct/union" + __pragma(warning(push)) + __pragma(warning(disable: 4201)) +#endif + union { + Py_ssize_t ob_refcnt; +#if SIZEOF_VOID_P > 4 + PY_UINT32_T ob_refcnt_split[2]; +#endif + }; +#ifdef _MSC_VER + __pragma(warning(pop)) +#endif + + PyTypeObject *ob_type; +}; +#else +// Objects that are not owned by any thread use a thread id (tid) of zero. +// This includes both immortal objects and objects whose reference count +// fields have been merged. +#define _Py_UNOWNED_TID 0 + +// The shared reference count uses the two least-significant bits to store +// flags. The remaining bits are used to store the reference count. +#define _Py_REF_SHARED_SHIFT 2 +#define _Py_REF_SHARED_FLAG_MASK 0x3 + +// The shared flags are initialized to zero. +#define _Py_REF_SHARED_INIT 0x0 +#define _Py_REF_MAYBE_WEAKREF 0x1 +#define _Py_REF_QUEUED 0x2 +#define _Py_REF_MERGED 0x3 + +// Create a shared field from a refcnt and desired flags +#define _Py_REF_SHARED(refcnt, flags) (((refcnt) << _Py_REF_SHARED_SHIFT) + (flags)) + +struct _object { + // ob_tid stores the thread id (or zero). It is also used by the GC and the + // trashcan mechanism as a linked list pointer and by the GC to store the + // computed "gc_refs" refcount. + uintptr_t ob_tid; + uint16_t _padding; + PyMutex ob_mutex; // per-object lock + uint8_t ob_gc_bits; // gc-related state + uint32_t ob_ref_local; // local reference count + Py_ssize_t ob_ref_shared; // shared (atomic) reference count + PyTypeObject *ob_type; +}; +#endif + +/* Cast argument to PyObject* type. */ +#define _PyObject_CAST(op) _Py_CAST(PyObject*, (op)) + +typedef struct { + PyObject ob_base; + Py_ssize_t ob_size; /* Number of items in variable part */ +} PyVarObject; + +/* Cast argument to PyVarObject* type. */ +#define _PyVarObject_CAST(op) _Py_CAST(PyVarObject*, (op)) + + +// Test if the 'x' object is the 'y' object, the same as "x is y" in Python. +PyAPI_FUNC(int) Py_Is(PyObject *x, PyObject *y); +#define Py_Is(x, y) ((x) == (y)) + +#if defined(Py_GIL_DISABLED) && !defined(Py_LIMITED_API) +PyAPI_FUNC(uintptr_t) _Py_GetThreadLocal_Addr(void); + +static inline uintptr_t +_Py_ThreadId(void) +{ + uintptr_t tid; +#if defined(_MSC_VER) && defined(_M_X64) + tid = __readgsqword(48); +#elif defined(_MSC_VER) && defined(_M_IX86) + tid = __readfsdword(24); +#elif defined(_MSC_VER) && defined(_M_ARM64) + tid = __getReg(18); +#elif defined(__MINGW32__) && defined(_M_X64) + tid = __readgsqword(48); +#elif defined(__MINGW32__) && defined(_M_IX86) + tid = __readfsdword(24); +#elif defined(__MINGW32__) && defined(_M_ARM64) + tid = __getReg(18); +#elif defined(__i386__) + __asm__("movl %%gs:0, %0" : "=r" (tid)); // 32-bit always uses GS +#elif defined(__MACH__) && defined(__x86_64__) + __asm__("movq %%gs:0, %0" : "=r" (tid)); // x86_64 macOSX uses GS +#elif defined(__x86_64__) + __asm__("movq %%fs:0, %0" : "=r" (tid)); // x86_64 Linux, BSD uses FS +#elif defined(__arm__) && __ARM_ARCH >= 7 + __asm__ ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tid)); +#elif defined(__aarch64__) && defined(__APPLE__) + __asm__ ("mrs %0, tpidrro_el0" : "=r" (tid)); +#elif defined(__aarch64__) + __asm__ ("mrs %0, tpidr_el0" : "=r" (tid)); +#elif defined(__powerpc64__) + #if defined(__clang__) && _Py__has_builtin(__builtin_thread_pointer) + tid = (uintptr_t)__builtin_thread_pointer(); + #else + // r13 is reserved for use as system thread ID by the Power 64-bit ABI. + register uintptr_t tp __asm__ ("r13"); + __asm__("" : "=r" (tp)); + tid = tp; + #endif +#elif defined(__powerpc__) + #if defined(__clang__) && _Py__has_builtin(__builtin_thread_pointer) + tid = (uintptr_t)__builtin_thread_pointer(); + #else + // r2 is reserved for use as system thread ID by the Power 32-bit ABI. + register uintptr_t tp __asm__ ("r2"); + __asm__ ("" : "=r" (tp)); + tid = tp; + #endif +#elif defined(__s390__) && defined(__GNUC__) + // Both GCC and Clang have supported __builtin_thread_pointer + // for s390 from long time ago. + tid = (uintptr_t)__builtin_thread_pointer(); +#elif defined(__riscv) + #if defined(__clang__) && _Py__has_builtin(__builtin_thread_pointer) + tid = (uintptr_t)__builtin_thread_pointer(); + #else + // tp is Thread Pointer provided by the RISC-V ABI. + __asm__ ("mv %0, tp" : "=r" (tid)); + #endif +#else + // Fallback to a portable implementation if we do not have a faster + // platform-specific implementation. + tid = _Py_GetThreadLocal_Addr(); +#endif + return tid; +} + +static inline Py_ALWAYS_INLINE int +_Py_IsOwnedByCurrentThread(PyObject *ob) +{ +#ifdef _Py_THREAD_SANITIZER + return _Py_atomic_load_uintptr_relaxed(&ob->ob_tid) == _Py_ThreadId(); +#else + return ob->ob_tid == _Py_ThreadId(); +#endif +} +#endif + +static inline Py_ssize_t Py_REFCNT(PyObject *ob) { +#if !defined(Py_GIL_DISABLED) + return ob->ob_refcnt; +#else + uint32_t local = _Py_atomic_load_uint32_relaxed(&ob->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return _Py_IMMORTAL_REFCNT; + } + Py_ssize_t shared = _Py_atomic_load_ssize_relaxed(&ob->ob_ref_shared); + return _Py_STATIC_CAST(Py_ssize_t, local) + + Py_ARITHMETIC_RIGHT_SHIFT(Py_ssize_t, shared, _Py_REF_SHARED_SHIFT); +#endif +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_REFCNT(ob) Py_REFCNT(_PyObject_CAST(ob)) +#endif + + +// bpo-39573: The Py_SET_TYPE() function must be used to set an object type. +static inline PyTypeObject* Py_TYPE(PyObject *ob) { + return ob->ob_type; +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_TYPE(ob) Py_TYPE(_PyObject_CAST(ob)) +#endif + +PyAPI_DATA(PyTypeObject) PyLong_Type; +PyAPI_DATA(PyTypeObject) PyBool_Type; + +// bpo-39573: The Py_SET_SIZE() function must be used to set an object size. +static inline Py_ssize_t Py_SIZE(PyObject *ob) { + assert(ob->ob_type != &PyLong_Type); + assert(ob->ob_type != &PyBool_Type); + return _PyVarObject_CAST(ob)->ob_size; +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SIZE(ob) Py_SIZE(_PyObject_CAST(ob)) +#endif + +static inline Py_ALWAYS_INLINE int _Py_IsImmortal(PyObject *op) +{ +#if defined(Py_GIL_DISABLED) + return (_Py_atomic_load_uint32_relaxed(&op->ob_ref_local) == + _Py_IMMORTAL_REFCNT_LOCAL); +#elif SIZEOF_VOID_P > 4 + return (_Py_CAST(PY_INT32_T, op->ob_refcnt) < 0); +#else + return (op->ob_refcnt == _Py_IMMORTAL_REFCNT); +#endif +} +#define _Py_IsImmortal(op) _Py_IsImmortal(_PyObject_CAST(op)) + +static inline int Py_IS_TYPE(PyObject *ob, PyTypeObject *type) { + return Py_TYPE(ob) == type; +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_IS_TYPE(ob, type) Py_IS_TYPE(_PyObject_CAST(ob), (type)) +#endif + + +// Py_SET_REFCNT() implementation for stable ABI +PyAPI_FUNC(void) _Py_SetRefcnt(PyObject *ob, Py_ssize_t refcnt); + +static inline void Py_SET_REFCNT(PyObject *ob, Py_ssize_t refcnt) { +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030d0000 + // Stable ABI implements Py_SET_REFCNT() as a function call + // on limited C API version 3.13 and newer. + _Py_SetRefcnt(ob, refcnt); +#else + // This immortal check is for code that is unaware of immortal objects. + // The runtime tracks these objects and we should avoid as much + // as possible having extensions inadvertently change the refcnt + // of an immortalized object. + if (_Py_IsImmortal(ob)) { + return; + } + +#ifndef Py_GIL_DISABLED + ob->ob_refcnt = refcnt; +#else + if (_Py_IsOwnedByCurrentThread(ob)) { + if ((size_t)refcnt > (size_t)UINT32_MAX) { + // On overflow, make the object immortal + ob->ob_tid = _Py_UNOWNED_TID; + ob->ob_ref_local = _Py_IMMORTAL_REFCNT_LOCAL; + ob->ob_ref_shared = 0; + } + else { + // Set local refcount to desired refcount and shared refcount + // to zero, but preserve the shared refcount flags. + ob->ob_ref_local = _Py_STATIC_CAST(uint32_t, refcnt); + ob->ob_ref_shared &= _Py_REF_SHARED_FLAG_MASK; + } + } + else { + // Set local refcount to zero and shared refcount to desired refcount. + // Mark the object as merged. + ob->ob_tid = _Py_UNOWNED_TID; + ob->ob_ref_local = 0; + ob->ob_ref_shared = _Py_REF_SHARED(refcnt, _Py_REF_MERGED); + } +#endif // Py_GIL_DISABLED +#endif // Py_LIMITED_API+0 < 0x030d0000 +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SET_REFCNT(ob, refcnt) Py_SET_REFCNT(_PyObject_CAST(ob), (refcnt)) +#endif + + +static inline void Py_SET_TYPE(PyObject *ob, PyTypeObject *type) { + ob->ob_type = type; +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SET_TYPE(ob, type) Py_SET_TYPE(_PyObject_CAST(ob), type) +#endif + +static inline void Py_SET_SIZE(PyVarObject *ob, Py_ssize_t size) { + assert(ob->ob_base.ob_type != &PyLong_Type); + assert(ob->ob_base.ob_type != &PyBool_Type); +#ifdef Py_GIL_DISABLED + _Py_atomic_store_ssize_relaxed(&ob->ob_size, size); +#else + ob->ob_size = size; +#endif +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_SET_SIZE(ob, size) Py_SET_SIZE(_PyVarObject_CAST(ob), (size)) +#endif + + +/* +Type objects contain a string containing the type name (to help somewhat +in debugging), the allocation parameters (see PyObject_New() and +PyObject_NewVar()), +and methods for accessing objects of the type. Methods are optional, a +nil pointer meaning that particular kind of access is not available for +this type. The Py_DECREF() macro uses the tp_dealloc method without +checking for a nil pointer; it should always be implemented except if +the implementation can guarantee that the reference count will never +reach zero (e.g., for statically allocated type objects). + +NB: the methods for certain type groups are now contained in separate +method blocks. +*/ + +typedef PyObject * (*unaryfunc)(PyObject *); +typedef PyObject * (*binaryfunc)(PyObject *, PyObject *); +typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *); +typedef int (*inquiry)(PyObject *); +typedef Py_ssize_t (*lenfunc)(PyObject *); +typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t); +typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t); +typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *); +typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *); +typedef int(*objobjargproc)(PyObject *, PyObject *, PyObject *); + +typedef int (*objobjproc)(PyObject *, PyObject *); +typedef int (*visitproc)(PyObject *, void *); +typedef int (*traverseproc)(PyObject *, visitproc, void *); + + +typedef void (*freefunc)(void *); +typedef void (*destructor)(PyObject *); +typedef PyObject *(*getattrfunc)(PyObject *, char *); +typedef PyObject *(*getattrofunc)(PyObject *, PyObject *); +typedef int (*setattrfunc)(PyObject *, char *, PyObject *); +typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *); +typedef PyObject *(*reprfunc)(PyObject *); +typedef Py_hash_t (*hashfunc)(PyObject *); +typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int); +typedef PyObject *(*getiterfunc) (PyObject *); +typedef PyObject *(*iternextfunc) (PyObject *); +typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *); +typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *); +typedef int (*initproc)(PyObject *, PyObject *, PyObject *); +typedef PyObject *(*newfunc)(PyTypeObject *, PyObject *, PyObject *); +typedef PyObject *(*allocfunc)(PyTypeObject *, Py_ssize_t); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030c0000 // 3.12 +typedef PyObject *(*vectorcallfunc)(PyObject *callable, PyObject *const *args, + size_t nargsf, PyObject *kwnames); +#endif + +typedef struct{ + int slot; /* slot id, see below */ + void *pfunc; /* function pointer */ +} PyType_Slot; + +typedef struct{ + const char* name; + int basicsize; + int itemsize; + unsigned int flags; + PyType_Slot *slots; /* terminated by slot==0. */ +} PyType_Spec; + +PyAPI_FUNC(PyObject*) PyType_FromSpec(PyType_Spec*); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject*) PyType_FromSpecWithBases(PyType_Spec*, PyObject*); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03040000 +PyAPI_FUNC(void*) PyType_GetSlot(PyTypeObject*, int); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +PyAPI_FUNC(PyObject*) PyType_FromModuleAndSpec(PyObject *, PyType_Spec *, PyObject *); +PyAPI_FUNC(PyObject *) PyType_GetModule(PyTypeObject *); +PyAPI_FUNC(void *) PyType_GetModuleState(PyTypeObject *); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030B0000 +PyAPI_FUNC(PyObject *) PyType_GetName(PyTypeObject *); +PyAPI_FUNC(PyObject *) PyType_GetQualName(PyTypeObject *); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000 +PyAPI_FUNC(PyObject *) PyType_GetFullyQualifiedName(PyTypeObject *type); +PyAPI_FUNC(PyObject *) PyType_GetModuleName(PyTypeObject *type); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030C0000 +PyAPI_FUNC(PyObject *) PyType_FromMetaclass(PyTypeObject*, PyObject*, PyType_Spec*, PyObject*); +PyAPI_FUNC(void *) PyObject_GetTypeData(PyObject *obj, PyTypeObject *cls); +PyAPI_FUNC(Py_ssize_t) PyType_GetTypeDataSize(PyTypeObject *cls); +#endif + +/* Generic type check */ +PyAPI_FUNC(int) PyType_IsSubtype(PyTypeObject *, PyTypeObject *); + +static inline int PyObject_TypeCheck(PyObject *ob, PyTypeObject *type) { + return Py_IS_TYPE(ob, type) || PyType_IsSubtype(Py_TYPE(ob), type); +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define PyObject_TypeCheck(ob, type) PyObject_TypeCheck(_PyObject_CAST(ob), (type)) +#endif + +PyAPI_DATA(PyTypeObject) PyType_Type; /* built-in 'type' */ +PyAPI_DATA(PyTypeObject) PyBaseObject_Type; /* built-in 'object' */ +PyAPI_DATA(PyTypeObject) PySuper_Type; /* built-in 'super' */ + +PyAPI_FUNC(unsigned long) PyType_GetFlags(PyTypeObject*); + +PyAPI_FUNC(int) PyType_Ready(PyTypeObject *); +PyAPI_FUNC(PyObject *) PyType_GenericAlloc(PyTypeObject *, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyType_GenericNew(PyTypeObject *, + PyObject *, PyObject *); +PyAPI_FUNC(unsigned int) PyType_ClearCache(void); +PyAPI_FUNC(void) PyType_Modified(PyTypeObject *); + +/* Generic operations on objects */ +PyAPI_FUNC(PyObject *) PyObject_Repr(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_Str(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_ASCII(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_Bytes(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_RichCompare(PyObject *, PyObject *, int); +PyAPI_FUNC(int) PyObject_RichCompareBool(PyObject *, PyObject *, int); +PyAPI_FUNC(PyObject *) PyObject_GetAttrString(PyObject *, const char *); +PyAPI_FUNC(int) PyObject_SetAttrString(PyObject *, const char *, PyObject *); +PyAPI_FUNC(int) PyObject_DelAttrString(PyObject *v, const char *name); +PyAPI_FUNC(int) PyObject_HasAttrString(PyObject *, const char *); +PyAPI_FUNC(PyObject *) PyObject_GetAttr(PyObject *, PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(int) PyObject_GetOptionalAttr(PyObject *, PyObject *, PyObject **); +PyAPI_FUNC(int) PyObject_GetOptionalAttrString(PyObject *, const char *, PyObject **); +#endif +PyAPI_FUNC(int) PyObject_SetAttr(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_DelAttr(PyObject *v, PyObject *name); +PyAPI_FUNC(int) PyObject_HasAttr(PyObject *, PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(int) PyObject_HasAttrWithError(PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_HasAttrStringWithError(PyObject *, const char *); +#endif +PyAPI_FUNC(PyObject *) PyObject_SelfIter(PyObject *); +PyAPI_FUNC(PyObject *) PyObject_GenericGetAttr(PyObject *, PyObject *); +PyAPI_FUNC(int) PyObject_GenericSetAttr(PyObject *, PyObject *, PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(int) PyObject_GenericSetDict(PyObject *, PyObject *, void *); +#endif +PyAPI_FUNC(Py_hash_t) PyObject_Hash(PyObject *); +PyAPI_FUNC(Py_hash_t) PyObject_HashNotImplemented(PyObject *); +PyAPI_FUNC(int) PyObject_IsTrue(PyObject *); +PyAPI_FUNC(int) PyObject_Not(PyObject *); +PyAPI_FUNC(int) PyCallable_Check(PyObject *); +PyAPI_FUNC(void) PyObject_ClearWeakRefs(PyObject *); + +/* PyObject_Dir(obj) acts like Python builtins.dir(obj), returning a + list of strings. PyObject_Dir(NULL) is like builtins.dir(), + returning the names of the current locals. In this case, if there are + no current locals, NULL is returned, and PyErr_Occurred() is false. +*/ +PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *); + +/* Helpers for printing recursive container types */ +PyAPI_FUNC(int) Py_ReprEnter(PyObject *); +PyAPI_FUNC(void) Py_ReprLeave(PyObject *); + +/* Flag bits for printing: */ +#define Py_PRINT_RAW 1 /* No string quotes etc. */ + +/* +Type flags (tp_flags) + +These flags are used to change expected features and behavior for a +particular type. + +Arbitration of the flag bit positions will need to be coordinated among +all extension writers who publicly release their extensions (this will +be fewer than you might expect!). + +Most flags were removed as of Python 3.0 to make room for new flags. (Some +flags are not for backwards compatibility but to indicate the presence of an +optional feature; these flags remain of course.) + +Type definitions should use Py_TPFLAGS_DEFAULT for their tp_flags value. + +Code can use PyType_HasFeature(type_ob, flag_value) to test whether the +given type object has a specified feature. +*/ + +#ifndef Py_LIMITED_API + +/* Track types initialized using _PyStaticType_InitBuiltin(). */ +#define _Py_TPFLAGS_STATIC_BUILTIN (1 << 1) + +/* The values array is placed inline directly after the rest of + * the object. Implies Py_TPFLAGS_HAVE_GC. + */ +#define Py_TPFLAGS_INLINE_VALUES (1 << 2) + +/* Placement of weakref pointers are managed by the VM, not by the type. + * The VM will automatically set tp_weaklistoffset. + */ +#define Py_TPFLAGS_MANAGED_WEAKREF (1 << 3) + +/* Placement of dict (and values) pointers are managed by the VM, not by the type. + * The VM will automatically set tp_dictoffset. Implies Py_TPFLAGS_HAVE_GC. + */ +#define Py_TPFLAGS_MANAGED_DICT (1 << 4) + +#define Py_TPFLAGS_PREHEADER (Py_TPFLAGS_MANAGED_WEAKREF | Py_TPFLAGS_MANAGED_DICT) + +/* Set if instances of the type object are treated as sequences for pattern matching */ +#define Py_TPFLAGS_SEQUENCE (1 << 5) +/* Set if instances of the type object are treated as mappings for pattern matching */ +#define Py_TPFLAGS_MAPPING (1 << 6) +#endif + +/* Disallow creating instances of the type: set tp_new to NULL and don't create + * the "__new__" key in the type dictionary. */ +#define Py_TPFLAGS_DISALLOW_INSTANTIATION (1UL << 7) + +/* Set if the type object is immutable: type attributes cannot be set nor deleted */ +#define Py_TPFLAGS_IMMUTABLETYPE (1UL << 8) + +/* Set if the type object is dynamically allocated */ +#define Py_TPFLAGS_HEAPTYPE (1UL << 9) + +/* Set if the type allows subclassing */ +#define Py_TPFLAGS_BASETYPE (1UL << 10) + +/* Set if the type implements the vectorcall protocol (PEP 590) */ +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030C0000 +#define Py_TPFLAGS_HAVE_VECTORCALL (1UL << 11) +#ifndef Py_LIMITED_API +// Backwards compatibility alias for API that was provisional in Python 3.8 +#define _Py_TPFLAGS_HAVE_VECTORCALL Py_TPFLAGS_HAVE_VECTORCALL +#endif +#endif + +/* Set if the type is 'ready' -- fully initialized */ +#define Py_TPFLAGS_READY (1UL << 12) + +/* Set while the type is being 'readied', to prevent recursive ready calls */ +#define Py_TPFLAGS_READYING (1UL << 13) + +/* Objects support garbage collection (see objimpl.h) */ +#define Py_TPFLAGS_HAVE_GC (1UL << 14) + +/* These two bits are preserved for Stackless Python, next after this is 17 */ +#ifdef STACKLESS +#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION (3UL << 15) +#else +#define Py_TPFLAGS_HAVE_STACKLESS_EXTENSION 0 +#endif + +/* Objects behave like an unbound method */ +#define Py_TPFLAGS_METHOD_DESCRIPTOR (1UL << 17) + +/* Unused. Legacy flag */ +#define Py_TPFLAGS_VALID_VERSION_TAG (1UL << 19) + +/* Type is abstract and cannot be instantiated */ +#define Py_TPFLAGS_IS_ABSTRACT (1UL << 20) + +// This undocumented flag gives certain built-ins their unique pattern-matching +// behavior, which allows a single positional subpattern to match against the +// subject itself (rather than a mapped attribute on it): +#define _Py_TPFLAGS_MATCH_SELF (1UL << 22) + +/* Items (ob_size*tp_itemsize) are found at the end of an instance's memory */ +#define Py_TPFLAGS_ITEMS_AT_END (1UL << 23) + +/* These flags are used to determine if a type is a subclass. */ +#define Py_TPFLAGS_LONG_SUBCLASS (1UL << 24) +#define Py_TPFLAGS_LIST_SUBCLASS (1UL << 25) +#define Py_TPFLAGS_TUPLE_SUBCLASS (1UL << 26) +#define Py_TPFLAGS_BYTES_SUBCLASS (1UL << 27) +#define Py_TPFLAGS_UNICODE_SUBCLASS (1UL << 28) +#define Py_TPFLAGS_DICT_SUBCLASS (1UL << 29) +#define Py_TPFLAGS_BASE_EXC_SUBCLASS (1UL << 30) +#define Py_TPFLAGS_TYPE_SUBCLASS (1UL << 31) + +#define Py_TPFLAGS_DEFAULT ( \ + Py_TPFLAGS_HAVE_STACKLESS_EXTENSION | \ + 0) + +/* NOTE: Some of the following flags reuse lower bits (removed as part of the + * Python 3.0 transition). */ + +/* The following flags are kept for compatibility; in previous + * versions they indicated presence of newer tp_* fields on the + * type struct. + * Starting with 3.8, binary compatibility of C extensions across + * feature releases of Python is not supported anymore (except when + * using the stable ABI, in which all classes are created dynamically, + * using the interpreter's memory layout.) + * Note that older extensions using the stable ABI set these flags, + * so the bits must not be repurposed. + */ +#define Py_TPFLAGS_HAVE_FINALIZE (1UL << 0) +#define Py_TPFLAGS_HAVE_VERSION_TAG (1UL << 18) + + +/* +The macros Py_INCREF(op) and Py_DECREF(op) are used to increment or decrement +reference counts. Py_DECREF calls the object's deallocator function when +the refcount falls to 0; for +objects that don't contain references to other objects or heap memory +this can be the standard function free(). Both macros can be used +wherever a void expression is allowed. The argument must not be a +NULL pointer. If it may be NULL, use Py_XINCREF/Py_XDECREF instead. +The macro _Py_NewReference(op) initialize reference counts to 1, and +in special builds (Py_REF_DEBUG, Py_TRACE_REFS) performs additional +bookkeeping appropriate to the special build. + +We assume that the reference count field can never overflow; this can +be proven when the size of the field is the same as the pointer size, so +we ignore the possibility. Provided a C int is at least 32 bits (which +is implicitly assumed in many parts of this code), that's enough for +about 2**31 references to an object. + +XXX The following became out of date in Python 2.2, but I'm not sure +XXX what the full truth is now. Certainly, heap-allocated type objects +XXX can and should be deallocated. +Type objects should never be deallocated; the type pointer in an object +is not considered to be a reference to the type object, to save +complications in the deallocation function. (This is actually a +decision that's up to the implementer of each new type so if you want, +you can count such references to the type object.) +*/ + +#if defined(Py_REF_DEBUG) && !defined(Py_LIMITED_API) +PyAPI_FUNC(void) _Py_NegativeRefcount(const char *filename, int lineno, + PyObject *op); +PyAPI_FUNC(void) _Py_INCREF_IncRefTotal(void); +PyAPI_FUNC(void) _Py_DECREF_DecRefTotal(void); +#endif // Py_REF_DEBUG && !Py_LIMITED_API + +PyAPI_FUNC(void) _Py_Dealloc(PyObject *); + +/* +These are provided as conveniences to Python runtime embedders, so that +they can have object code that is not dependent on Python compilation flags. +*/ +PyAPI_FUNC(void) Py_IncRef(PyObject *); +PyAPI_FUNC(void) Py_DecRef(PyObject *); + +// Similar to Py_IncRef() and Py_DecRef() but the argument must be non-NULL. +// Private functions used by Py_INCREF() and Py_DECREF(). +PyAPI_FUNC(void) _Py_IncRef(PyObject *); +PyAPI_FUNC(void) _Py_DecRef(PyObject *); + +static inline Py_ALWAYS_INLINE void Py_INCREF(PyObject *op) +{ +#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) + // Stable ABI implements Py_INCREF() as a function call on limited C API + // version 3.12 and newer, and on Python built in debug mode. _Py_IncRef() + // was added to Python 3.10.0a7, use Py_IncRef() on older Python versions. + // Py_IncRef() accepts NULL whereas _Py_IncRef() doesn't. +# if Py_LIMITED_API+0 >= 0x030a00A7 + _Py_IncRef(op); +# else + Py_IncRef(op); +# endif +#else + // Non-limited C API and limited C API for Python 3.9 and older access + // directly PyObject.ob_refcnt. +#if defined(Py_GIL_DISABLED) + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + uint32_t new_local = local + 1; + if (new_local == 0) { + // local is equal to _Py_IMMORTAL_REFCNT: do nothing + return; + } + if (_Py_IsOwnedByCurrentThread(op)) { + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, new_local); + } + else { + _Py_atomic_add_ssize(&op->ob_ref_shared, (1 << _Py_REF_SHARED_SHIFT)); + } +#elif SIZEOF_VOID_P > 4 + // Portable saturated add, branching on the carry flag and set low bits + PY_UINT32_T cur_refcnt = op->ob_refcnt_split[PY_BIG_ENDIAN]; + PY_UINT32_T new_refcnt = cur_refcnt + 1; + if (new_refcnt == 0) { + // cur_refcnt is equal to _Py_IMMORTAL_REFCNT: the object is immortal, + // do nothing + return; + } + op->ob_refcnt_split[PY_BIG_ENDIAN] = new_refcnt; +#else + // Explicitly check immortality against the immortal value + if (_Py_IsImmortal(op)) { + return; + } + op->ob_refcnt++; +#endif + _Py_INCREF_STAT_INC(); +#ifdef Py_REF_DEBUG + _Py_INCREF_IncRefTotal(); +#endif +#endif +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_INCREF(op) Py_INCREF(_PyObject_CAST(op)) +#endif + + +#if !defined(Py_LIMITED_API) && defined(Py_GIL_DISABLED) +// Implements Py_DECREF on objects not owned by the current thread. +PyAPI_FUNC(void) _Py_DecRefShared(PyObject *); +PyAPI_FUNC(void) _Py_DecRefSharedDebug(PyObject *, const char *, int); + +// Called from Py_DECREF by the owning thread when the local refcount reaches +// zero. The call will deallocate the object if the shared refcount is also +// zero. Otherwise, the thread gives up ownership and merges the reference +// count fields. +PyAPI_FUNC(void) _Py_MergeZeroLocalRefcount(PyObject *); +#endif + +#if defined(Py_LIMITED_API) && (Py_LIMITED_API+0 >= 0x030c0000 || defined(Py_REF_DEBUG)) +// Stable ABI implements Py_DECREF() as a function call on limited C API +// version 3.12 and newer, and on Python built in debug mode. _Py_DecRef() was +// added to Python 3.10.0a7, use Py_DecRef() on older Python versions. +// Py_DecRef() accepts NULL whereas _Py_IncRef() doesn't. +static inline void Py_DECREF(PyObject *op) { +# if Py_LIMITED_API+0 >= 0x030a00A7 + _Py_DecRef(op); +# else + Py_DecRef(op); +# endif +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) + +#elif defined(Py_GIL_DISABLED) && defined(Py_REF_DEBUG) +static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return; + } + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (_Py_IsOwnedByCurrentThread(op)) { + if (local == 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefSharedDebug(op, filename, lineno); + } +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#elif defined(Py_GIL_DISABLED) +static inline void Py_DECREF(PyObject *op) +{ + uint32_t local = _Py_atomic_load_uint32_relaxed(&op->ob_ref_local); + if (local == _Py_IMMORTAL_REFCNT_LOCAL) { + return; + } + _Py_DECREF_STAT_INC(); + if (_Py_IsOwnedByCurrentThread(op)) { + local--; + _Py_atomic_store_uint32_relaxed(&op->ob_ref_local, local); + if (local == 0) { + _Py_MergeZeroLocalRefcount(op); + } + } + else { + _Py_DecRefShared(op); + } +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) + +#elif defined(Py_REF_DEBUG) +static inline void Py_DECREF(const char *filename, int lineno, PyObject *op) +{ + if (op->ob_refcnt <= 0) { + _Py_NegativeRefcount(filename, lineno, op); + } + if (_Py_IsImmortal(op)) { + return; + } + _Py_DECREF_STAT_INC(); + _Py_DECREF_DecRefTotal(); + if (--op->ob_refcnt == 0) { + _Py_Dealloc(op); + } +} +#define Py_DECREF(op) Py_DECREF(__FILE__, __LINE__, _PyObject_CAST(op)) + +#else +static inline Py_ALWAYS_INLINE void Py_DECREF(PyObject *op) +{ + // Non-limited C API and limited C API for Python 3.9 and older access + // directly PyObject.ob_refcnt. + if (_Py_IsImmortal(op)) { + return; + } + _Py_DECREF_STAT_INC(); + if (--op->ob_refcnt == 0) { + _Py_Dealloc(op); + } +} +#define Py_DECREF(op) Py_DECREF(_PyObject_CAST(op)) +#endif + + +/* Safely decref `op` and set `op` to NULL, especially useful in tp_clear + * and tp_dealloc implementations. + * + * Note that "the obvious" code can be deadly: + * + * Py_XDECREF(op); + * op = NULL; + * + * Typically, `op` is something like self->containee, and `self` is done + * using its `containee` member. In the code sequence above, suppose + * `containee` is non-NULL with a refcount of 1. Its refcount falls to + * 0 on the first line, which can trigger an arbitrary amount of code, + * possibly including finalizers (like __del__ methods or weakref callbacks) + * coded in Python, which in turn can release the GIL and allow other threads + * to run, etc. Such code may even invoke methods of `self` again, or cause + * cyclic gc to trigger, but-- oops! --self->containee still points to the + * object being torn down, and it may be in an insane state while being torn + * down. This has in fact been a rich historic source of miserable (rare & + * hard-to-diagnose) segfaulting (and other) bugs. + * + * The safe way is: + * + * Py_CLEAR(op); + * + * That arranges to set `op` to NULL _before_ decref'ing, so that any code + * triggered as a side-effect of `op` getting torn down no longer believes + * `op` points to a valid object. + * + * There are cases where it's safe to use the naive code, but they're brittle. + * For example, if `op` points to a Python integer, you know that destroying + * one of those can't cause problems -- but in part that relies on that + * Python integers aren't currently weakly referencable. Best practice is + * to use Py_CLEAR() even if you can't think of a reason for why you need to. + * + * gh-98724: Use a temporary variable to only evaluate the macro argument once, + * to avoid the duplication of side effects if the argument has side effects. + * + * gh-99701: If the PyObject* type is used with casting arguments to PyObject*, + * the code can be miscompiled with strict aliasing because of type punning. + * With strict aliasing, a compiler considers that two pointers of different + * types cannot read or write the same memory which enables optimization + * opportunities. + * + * If available, use _Py_TYPEOF() to use the 'op' type for temporary variables, + * and so avoid type punning. Otherwise, use memcpy() which causes type erasure + * and so prevents the compiler to reuse an old cached 'op' value after + * Py_CLEAR(). + */ +#ifdef _Py_TYPEOF +#define Py_CLEAR(op) \ + do { \ + _Py_TYPEOF(op)* _tmp_op_ptr = &(op); \ + _Py_TYPEOF(op) _tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op != NULL) { \ + *_tmp_op_ptr = _Py_NULL; \ + Py_DECREF(_tmp_old_op); \ + } \ + } while (0) +#else +#define Py_CLEAR(op) \ + do { \ + PyObject **_tmp_op_ptr = _Py_CAST(PyObject**, &(op)); \ + PyObject *_tmp_old_op = (*_tmp_op_ptr); \ + if (_tmp_old_op != NULL) { \ + PyObject *_null_ptr = _Py_NULL; \ + memcpy(_tmp_op_ptr, &_null_ptr, sizeof(PyObject*)); \ + Py_DECREF(_tmp_old_op); \ + } \ + } while (0) +#endif + + +/* Function to use in case the object pointer can be NULL: */ +static inline void Py_XINCREF(PyObject *op) +{ + if (op != _Py_NULL) { + Py_INCREF(op); + } +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_XINCREF(op) Py_XINCREF(_PyObject_CAST(op)) +#endif + +static inline void Py_XDECREF(PyObject *op) +{ + if (op != _Py_NULL) { + Py_DECREF(op); + } +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_XDECREF(op) Py_XDECREF(_PyObject_CAST(op)) +#endif + +// Create a new strong reference to an object: +// increment the reference count of the object and return the object. +PyAPI_FUNC(PyObject*) Py_NewRef(PyObject *obj); + +// Similar to Py_NewRef(), but the object can be NULL. +PyAPI_FUNC(PyObject*) Py_XNewRef(PyObject *obj); + +static inline PyObject* _Py_NewRef(PyObject *obj) +{ + Py_INCREF(obj); + return obj; +} + +static inline PyObject* _Py_XNewRef(PyObject *obj) +{ + Py_XINCREF(obj); + return obj; +} + +// Py_NewRef() and Py_XNewRef() are exported as functions for the stable ABI. +// Names overridden with macros by static inline functions for best +// performances. +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_NewRef(obj) _Py_NewRef(_PyObject_CAST(obj)) +# define Py_XNewRef(obj) _Py_XNewRef(_PyObject_CAST(obj)) +#else +# define Py_NewRef(obj) _Py_NewRef(obj) +# define Py_XNewRef(obj) _Py_XNewRef(obj) +#endif + + +#define Py_CONSTANT_NONE 0 +#define Py_CONSTANT_FALSE 1 +#define Py_CONSTANT_TRUE 2 +#define Py_CONSTANT_ELLIPSIS 3 +#define Py_CONSTANT_NOT_IMPLEMENTED 4 +#define Py_CONSTANT_ZERO 5 +#define Py_CONSTANT_ONE 6 +#define Py_CONSTANT_EMPTY_STR 7 +#define Py_CONSTANT_EMPTY_BYTES 8 +#define Py_CONSTANT_EMPTY_TUPLE 9 + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(PyObject*) Py_GetConstant(unsigned int constant_id); +PyAPI_FUNC(PyObject*) Py_GetConstantBorrowed(unsigned int constant_id); +#endif + + +/* +_Py_NoneStruct is an object of undefined type which can be used in contexts +where NULL (nil) is not suitable (since NULL often means 'error'). +*/ +PyAPI_DATA(PyObject) _Py_NoneStruct; /* Don't use this directly */ + +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_None Py_GetConstantBorrowed(Py_CONSTANT_NONE) +#else +# define Py_None (&_Py_NoneStruct) +#endif + +// Test if an object is the None singleton, the same as "x is None" in Python. +PyAPI_FUNC(int) Py_IsNone(PyObject *x); +#define Py_IsNone(x) Py_Is((x), Py_None) + +/* Macro for returning Py_None from a function. + * Only treat Py_None as immortal in the limited C API 3.12 and newer. */ +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 < 0x030c0000 +# define Py_RETURN_NONE return Py_NewRef(Py_None) +#else +# define Py_RETURN_NONE return Py_None +#endif + +/* +Py_NotImplemented is a singleton used to signal that an operation is +not implemented for a given type combination. +*/ +PyAPI_DATA(PyObject) _Py_NotImplementedStruct; /* Don't use this directly */ + +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_NotImplemented Py_GetConstantBorrowed(Py_CONSTANT_NOT_IMPLEMENTED) +#else +# define Py_NotImplemented (&_Py_NotImplementedStruct) +#endif + +/* Macro for returning Py_NotImplemented from a function */ +#define Py_RETURN_NOTIMPLEMENTED return Py_NotImplemented + +/* Rich comparison opcodes */ +#define Py_LT 0 +#define Py_LE 1 +#define Py_EQ 2 +#define Py_NE 3 +#define Py_GT 4 +#define Py_GE 5 + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 +/* Result of calling PyIter_Send */ +typedef enum { + PYGEN_RETURN = 0, + PYGEN_ERROR = -1, + PYGEN_NEXT = 1, +} PySendResult; +#endif + +/* + * Macro for implementing rich comparisons + * + * Needs to be a macro because any C-comparable type can be used. + */ +#define Py_RETURN_RICHCOMPARE(val1, val2, op) \ + do { \ + switch (op) { \ + case Py_EQ: if ((val1) == (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ + case Py_NE: if ((val1) != (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ + case Py_LT: if ((val1) < (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ + case Py_GT: if ((val1) > (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ + case Py_LE: if ((val1) <= (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ + case Py_GE: if ((val1) >= (val2)) Py_RETURN_TRUE; Py_RETURN_FALSE; \ + default: \ + Py_UNREACHABLE(); \ + } \ + } while (0) + + +/* +More conventions +================ + +Argument Checking +----------------- + +Functions that take objects as arguments normally don't check for nil +arguments, but they do check the type of the argument, and return an +error if the function doesn't apply to the type. + +Failure Modes +------------- + +Functions may fail for a variety of reasons, including running out of +memory. This is communicated to the caller in two ways: an error string +is set (see errors.h), and the function result differs: functions that +normally return a pointer return NULL for failure, functions returning +an integer return -1 (which could be a legal return value too!), and +other functions return 0 for success and -1 for failure. +Callers should always check for errors before using the result. If +an error was set, the caller must either explicitly clear it, or pass +the error on to its caller. + +Reference Counts +---------------- + +It takes a while to get used to the proper usage of reference counts. + +Functions that create an object set the reference count to 1; such new +objects must be stored somewhere or destroyed again with Py_DECREF(). +Some functions that 'store' objects, such as PyTuple_SetItem() and +PyList_SetItem(), +don't increment the reference count of the object, since the most +frequent use is to store a fresh object. Functions that 'retrieve' +objects, such as PyTuple_GetItem() and PyDict_GetItemString(), also +don't increment +the reference count, since most frequently the object is only looked at +quickly. Thus, to retrieve an object and store it again, the caller +must call Py_INCREF() explicitly. + +NOTE: functions that 'consume' a reference count, like +PyList_SetItem(), consume the reference even if the object wasn't +successfully stored, to simplify error handling. + +It seems attractive to make other functions that take an object as +argument consume a reference count; however, this may quickly get +confusing (even the current practice is already confusing). Consider +it carefully, it may save lots of calls to Py_INCREF() and Py_DECREF() at +times. +*/ + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_OBJECT_H +# include "cpython/object.h" +# undef Py_CPYTHON_OBJECT_H +#endif + + +static inline int +PyType_HasFeature(PyTypeObject *type, unsigned long feature) +{ + unsigned long flags; +#ifdef Py_LIMITED_API + // PyTypeObject is opaque in the limited C API + flags = PyType_GetFlags(type); +#else +# ifdef Py_GIL_DISABLED + flags = _Py_atomic_load_ulong_relaxed(&type->tp_flags); +# else + flags = type->tp_flags; +# endif +#endif + return ((flags & feature) != 0); +} + +#define PyType_FastSubclass(type, flag) PyType_HasFeature((type), (flag)) + +static inline int PyType_Check(PyObject *op) { + return PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TYPE_SUBCLASS); +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define PyType_Check(op) PyType_Check(_PyObject_CAST(op)) +#endif + +#define _PyType_CAST(op) \ + (assert(PyType_Check(op)), _Py_CAST(PyTypeObject*, (op))) + +static inline int PyType_CheckExact(PyObject *op) { + return Py_IS_TYPE(op, &PyType_Type); +} +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define PyType_CheckExact(op) PyType_CheckExact(_PyObject_CAST(op)) +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(PyObject *) PyType_GetModuleByDef(PyTypeObject *, PyModuleDef *); +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_OBJECT_H diff --git a/Include/objimpl.h b/Include/objimpl.h new file mode 100644 index 0000000000000000000000000000000000000000..56472a72e42d341a56931646081d0221cf8113b2 --- /dev/null +++ b/Include/objimpl.h @@ -0,0 +1,211 @@ +// The PyObject_ memory family: high-level object memory interfaces. +// See pymem.h for the low-level PyMem_ family. + +#ifndef Py_OBJIMPL_H +#define Py_OBJIMPL_H +#ifdef __cplusplus +extern "C" { +#endif + +/* BEWARE: + + Each interface exports both functions and macros. Extension modules should + use the functions, to ensure binary compatibility across Python versions. + Because the Python implementation is free to change internal details, and + the macros may (or may not) expose details for speed, if you do use the + macros you must recompile your extensions with each Python release. + + Never mix calls to PyObject_ memory functions with calls to the platform + malloc/realloc/ calloc/free, or with calls to PyMem_. +*/ + +/* +Functions and macros for modules that implement new object types. + + - PyObject_New(type, typeobj) allocates memory for a new object of the given + type, and initializes part of it. 'type' must be the C structure type used + to represent the object, and 'typeobj' the address of the corresponding + type object. Reference count and type pointer are filled in; the rest of + the bytes of the object are *undefined*! The resulting expression type is + 'type *'. The size of the object is determined by the tp_basicsize field + of the type object. + + - PyObject_NewVar(type, typeobj, n) is similar but allocates a variable-size + object with room for n items. In addition to the refcount and type pointer + fields, this also fills in the ob_size field. + + - PyObject_Free(op) releases the memory allocated for an object. It does not + run a destructor -- it only frees the memory. + + - PyObject_Init(op, typeobj) and PyObject_InitVar(op, typeobj, n) don't + allocate memory. Instead of a 'type' parameter, they take a pointer to a + new object (allocated by an arbitrary allocator), and initialize its object + header fields. + +Note that objects created with PyObject_{New, NewVar} are allocated using the +specialized Python allocator (implemented in obmalloc.c), if WITH_PYMALLOC is +enabled. In addition, a special debugging allocator is used if Py_DEBUG +macro is also defined. + +In case a specific form of memory management is needed (for example, if you +must use the platform malloc heap(s), or shared memory, or C++ local storage or +operator new), you must first allocate the object with your custom allocator, +then pass its pointer to PyObject_{Init, InitVar} for filling in its Python- +specific fields: reference count, type pointer, possibly others. You should +be aware that Python has no control over these objects because they don't +cooperate with the Python memory manager. Such objects may not be eligible +for automatic garbage collection and you have to make sure that they are +released accordingly whenever their destructor gets called (cf. the specific +form of memory management you're using). + +Unless you have specific memory management requirements, use +PyObject_{New, NewVar, Del}. +*/ + +/* + * Raw object memory interface + * =========================== + */ + +/* Functions to call the same malloc/realloc/free as used by Python's + object allocator. If WITH_PYMALLOC is enabled, these may differ from + the platform malloc/realloc/free. The Python object allocator is + designed for fast, cache-conscious allocation of many "small" objects, + and with low hidden memory overhead. + + PyObject_Malloc(0) returns a unique non-NULL pointer if possible. + + PyObject_Realloc(NULL, n) acts like PyObject_Malloc(n). + PyObject_Realloc(p != NULL, 0) does not return NULL, or free the memory + at p. + + Returned pointers must be checked for NULL explicitly; no action is + performed on failure other than to return NULL (no warning it printed, no + exception is set, etc). + + For allocating objects, use PyObject_{New, NewVar} instead whenever + possible. The PyObject_{Malloc, Realloc, Free} family is exposed + so that you can exploit Python's small-block allocator for non-object + uses. If you must use these routines to allocate object memory, make sure + the object gets initialized via PyObject_{Init, InitVar} after obtaining + the raw memory. +*/ +PyAPI_FUNC(void *) PyObject_Malloc(size_t size); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +PyAPI_FUNC(void *) PyObject_Calloc(size_t nelem, size_t elsize); +#endif +PyAPI_FUNC(void *) PyObject_Realloc(void *ptr, size_t new_size); +PyAPI_FUNC(void) PyObject_Free(void *ptr); + + +// Deprecated aliases only kept for backward compatibility. +// PyObject_Del and PyObject_DEL are defined with no parameter to be able to +// use them as function pointers (ex: tp_free = PyObject_Del). +#define PyObject_MALLOC PyObject_Malloc +#define PyObject_REALLOC PyObject_Realloc +#define PyObject_FREE PyObject_Free +#define PyObject_Del PyObject_Free +#define PyObject_DEL PyObject_Free + + +/* + * Generic object allocator interface + * ================================== + */ + +/* Functions */ +PyAPI_FUNC(PyObject *) PyObject_Init(PyObject *, PyTypeObject *); +PyAPI_FUNC(PyVarObject *) PyObject_InitVar(PyVarObject *, + PyTypeObject *, Py_ssize_t); + +#define PyObject_INIT(op, typeobj) \ + PyObject_Init(_PyObject_CAST(op), (typeobj)) +#define PyObject_INIT_VAR(op, typeobj, size) \ + PyObject_InitVar(_PyVarObject_CAST(op), (typeobj), (size)) + + +PyAPI_FUNC(PyObject *) _PyObject_New(PyTypeObject *); +PyAPI_FUNC(PyVarObject *) _PyObject_NewVar(PyTypeObject *, Py_ssize_t); + +#define PyObject_New(type, typeobj) ((type *)_PyObject_New(typeobj)) + +// Alias to PyObject_New(). In Python 3.8, PyObject_NEW() called directly +// PyObject_MALLOC() with _PyObject_SIZE(). +#define PyObject_NEW(type, typeobj) PyObject_New(type, (typeobj)) + +#define PyObject_NewVar(type, typeobj, n) \ + ( (type *) _PyObject_NewVar((typeobj), (n)) ) + +// Alias to PyObject_NewVar(). In Python 3.8, PyObject_NEW_VAR() called +// directly PyObject_MALLOC() with _PyObject_VAR_SIZE(). +#define PyObject_NEW_VAR(type, typeobj, n) PyObject_NewVar(type, (typeobj), (n)) + + +/* + * Garbage Collection Support + * ========================== + */ + +/* C equivalent of gc.collect(). */ +PyAPI_FUNC(Py_ssize_t) PyGC_Collect(void); +/* C API for controlling the state of the garbage collector */ +PyAPI_FUNC(int) PyGC_Enable(void); +PyAPI_FUNC(int) PyGC_Disable(void); +PyAPI_FUNC(int) PyGC_IsEnabled(void); + +/* Test if a type has a GC head */ +#define PyType_IS_GC(t) PyType_HasFeature((t), Py_TPFLAGS_HAVE_GC) + +PyAPI_FUNC(PyVarObject *) _PyObject_GC_Resize(PyVarObject *, Py_ssize_t); +#define PyObject_GC_Resize(type, op, n) \ + ( (type *) _PyObject_GC_Resize(_PyVarObject_CAST(op), (n)) ) + + + +PyAPI_FUNC(PyObject *) _PyObject_GC_New(PyTypeObject *); +PyAPI_FUNC(PyVarObject *) _PyObject_GC_NewVar(PyTypeObject *, Py_ssize_t); + +/* Tell the GC to track this object. + * + * See also private _PyObject_GC_TRACK() macro. */ +PyAPI_FUNC(void) PyObject_GC_Track(void *); + +/* Tell the GC to stop tracking this object. + * + * See also private _PyObject_GC_UNTRACK() macro. */ +PyAPI_FUNC(void) PyObject_GC_UnTrack(void *); + +PyAPI_FUNC(void) PyObject_GC_Del(void *); + +#define PyObject_GC_New(type, typeobj) \ + _Py_CAST(type*, _PyObject_GC_New(typeobj)) +#define PyObject_GC_NewVar(type, typeobj, n) \ + _Py_CAST(type*, _PyObject_GC_NewVar((typeobj), (n))) + +PyAPI_FUNC(int) PyObject_GC_IsTracked(PyObject *); +PyAPI_FUNC(int) PyObject_GC_IsFinalized(PyObject *); + +/* Utility macro to help write tp_traverse functions. + * To use this macro, the tp_traverse function must name its arguments + * "visit" and "arg". This is intended to keep tp_traverse functions + * looking as much alike as possible. + */ +#define Py_VISIT(op) \ + do { \ + if (op) { \ + int vret = visit(_PyObject_CAST(op), arg); \ + if (vret) \ + return vret; \ + } \ + } while (0) + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_OBJIMPL_H +# include "cpython/objimpl.h" +# undef Py_CPYTHON_OBJIMPL_H +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_OBJIMPL_H diff --git a/Include/opcode.h b/Include/opcode.h new file mode 100644 index 0000000000000000000000000000000000000000..2619b690019acc370d3d73c7544f3fd1cff99b42 --- /dev/null +++ b/Include/opcode.h @@ -0,0 +1,42 @@ +#ifndef Py_OPCODE_H +#define Py_OPCODE_H +#ifdef __cplusplus +extern "C" { +#endif + +#include "opcode_ids.h" + + +#define NB_ADD 0 +#define NB_AND 1 +#define NB_FLOOR_DIVIDE 2 +#define NB_LSHIFT 3 +#define NB_MATRIX_MULTIPLY 4 +#define NB_MULTIPLY 5 +#define NB_REMAINDER 6 +#define NB_OR 7 +#define NB_POWER 8 +#define NB_RSHIFT 9 +#define NB_SUBTRACT 10 +#define NB_TRUE_DIVIDE 11 +#define NB_XOR 12 +#define NB_INPLACE_ADD 13 +#define NB_INPLACE_AND 14 +#define NB_INPLACE_FLOOR_DIVIDE 15 +#define NB_INPLACE_LSHIFT 16 +#define NB_INPLACE_MATRIX_MULTIPLY 17 +#define NB_INPLACE_MULTIPLY 18 +#define NB_INPLACE_REMAINDER 19 +#define NB_INPLACE_OR 20 +#define NB_INPLACE_POWER 21 +#define NB_INPLACE_RSHIFT 22 +#define NB_INPLACE_SUBTRACT 23 +#define NB_INPLACE_TRUE_DIVIDE 24 +#define NB_INPLACE_XOR 25 + +#define NB_OPARG_LAST 25 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OPCODE_H */ diff --git a/Include/opcode_ids.h b/Include/opcode_ids.h new file mode 100644 index 0000000000000000000000000000000000000000..647f7c0ecb1ec83a84c4db190338ac3e60f818d1 --- /dev/null +++ b/Include/opcode_ids.h @@ -0,0 +1,244 @@ +// This file is generated by Tools/cases_generator/opcode_id_generator.py +// from: +// Python/bytecodes.c +// Do not edit! + +#ifndef Py_OPCODE_IDS_H +#define Py_OPCODE_IDS_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Instruction opcodes for compiled code */ +#define CACHE 0 +#define BEFORE_ASYNC_WITH 1 +#define BEFORE_WITH 2 +#define BINARY_OP_INPLACE_ADD_UNICODE 3 +#define BINARY_SLICE 4 +#define BINARY_SUBSCR 5 +#define CHECK_EG_MATCH 6 +#define CHECK_EXC_MATCH 7 +#define CLEANUP_THROW 8 +#define DELETE_SUBSCR 9 +#define END_ASYNC_FOR 10 +#define END_FOR 11 +#define END_SEND 12 +#define EXIT_INIT_CHECK 13 +#define FORMAT_SIMPLE 14 +#define FORMAT_WITH_SPEC 15 +#define GET_AITER 16 +#define RESERVED 17 +#define GET_ANEXT 18 +#define GET_ITER 19 +#define GET_LEN 20 +#define GET_YIELD_FROM_ITER 21 +#define INTERPRETER_EXIT 22 +#define LOAD_ASSERTION_ERROR 23 +#define LOAD_BUILD_CLASS 24 +#define LOAD_LOCALS 25 +#define MAKE_FUNCTION 26 +#define MATCH_KEYS 27 +#define MATCH_MAPPING 28 +#define MATCH_SEQUENCE 29 +#define NOP 30 +#define POP_EXCEPT 31 +#define POP_TOP 32 +#define PUSH_EXC_INFO 33 +#define PUSH_NULL 34 +#define RETURN_GENERATOR 35 +#define RETURN_VALUE 36 +#define SETUP_ANNOTATIONS 37 +#define STORE_SLICE 38 +#define STORE_SUBSCR 39 +#define TO_BOOL 40 +#define UNARY_INVERT 41 +#define UNARY_NEGATIVE 42 +#define UNARY_NOT 43 +#define WITH_EXCEPT_START 44 +#define BINARY_OP 45 +#define BUILD_CONST_KEY_MAP 46 +#define BUILD_LIST 47 +#define BUILD_MAP 48 +#define BUILD_SET 49 +#define BUILD_SLICE 50 +#define BUILD_STRING 51 +#define BUILD_TUPLE 52 +#define CALL 53 +#define CALL_FUNCTION_EX 54 +#define CALL_INTRINSIC_1 55 +#define CALL_INTRINSIC_2 56 +#define CALL_KW 57 +#define COMPARE_OP 58 +#define CONTAINS_OP 59 +#define CONVERT_VALUE 60 +#define COPY 61 +#define COPY_FREE_VARS 62 +#define DELETE_ATTR 63 +#define DELETE_DEREF 64 +#define DELETE_FAST 65 +#define DELETE_GLOBAL 66 +#define DELETE_NAME 67 +#define DICT_MERGE 68 +#define DICT_UPDATE 69 +#define ENTER_EXECUTOR 70 +#define EXTENDED_ARG 71 +#define FOR_ITER 72 +#define GET_AWAITABLE 73 +#define IMPORT_FROM 74 +#define IMPORT_NAME 75 +#define IS_OP 76 +#define JUMP_BACKWARD 77 +#define JUMP_BACKWARD_NO_INTERRUPT 78 +#define JUMP_FORWARD 79 +#define LIST_APPEND 80 +#define LIST_EXTEND 81 +#define LOAD_ATTR 82 +#define LOAD_CONST 83 +#define LOAD_DEREF 84 +#define LOAD_FAST 85 +#define LOAD_FAST_AND_CLEAR 86 +#define LOAD_FAST_CHECK 87 +#define LOAD_FAST_LOAD_FAST 88 +#define LOAD_FROM_DICT_OR_DEREF 89 +#define LOAD_FROM_DICT_OR_GLOBALS 90 +#define LOAD_GLOBAL 91 +#define LOAD_NAME 92 +#define LOAD_SUPER_ATTR 93 +#define MAKE_CELL 94 +#define MAP_ADD 95 +#define MATCH_CLASS 96 +#define POP_JUMP_IF_FALSE 97 +#define POP_JUMP_IF_NONE 98 +#define POP_JUMP_IF_NOT_NONE 99 +#define POP_JUMP_IF_TRUE 100 +#define RAISE_VARARGS 101 +#define RERAISE 102 +#define RETURN_CONST 103 +#define SEND 104 +#define SET_ADD 105 +#define SET_FUNCTION_ATTRIBUTE 106 +#define SET_UPDATE 107 +#define STORE_ATTR 108 +#define STORE_DEREF 109 +#define STORE_FAST 110 +#define STORE_FAST_LOAD_FAST 111 +#define STORE_FAST_STORE_FAST 112 +#define STORE_GLOBAL 113 +#define STORE_NAME 114 +#define SWAP 115 +#define UNPACK_EX 116 +#define UNPACK_SEQUENCE 117 +#define YIELD_VALUE 118 +#define RESUME 149 +#define BINARY_OP_ADD_FLOAT 150 +#define BINARY_OP_ADD_INT 151 +#define BINARY_OP_ADD_UNICODE 152 +#define BINARY_OP_MULTIPLY_FLOAT 153 +#define BINARY_OP_MULTIPLY_INT 154 +#define BINARY_OP_SUBTRACT_FLOAT 155 +#define BINARY_OP_SUBTRACT_INT 156 +#define BINARY_SUBSCR_DICT 157 +#define BINARY_SUBSCR_GETITEM 158 +#define BINARY_SUBSCR_LIST_INT 159 +#define BINARY_SUBSCR_STR_INT 160 +#define BINARY_SUBSCR_TUPLE_INT 161 +#define CALL_ALLOC_AND_ENTER_INIT 162 +#define CALL_BOUND_METHOD_EXACT_ARGS 163 +#define CALL_BOUND_METHOD_GENERAL 164 +#define CALL_BUILTIN_CLASS 165 +#define CALL_BUILTIN_FAST 166 +#define CALL_BUILTIN_FAST_WITH_KEYWORDS 167 +#define CALL_BUILTIN_O 168 +#define CALL_ISINSTANCE 169 +#define CALL_LEN 170 +#define CALL_LIST_APPEND 171 +#define CALL_METHOD_DESCRIPTOR_FAST 172 +#define CALL_METHOD_DESCRIPTOR_FAST_WITH_KEYWORDS 173 +#define CALL_METHOD_DESCRIPTOR_NOARGS 174 +#define CALL_METHOD_DESCRIPTOR_O 175 +#define CALL_NON_PY_GENERAL 176 +#define CALL_PY_EXACT_ARGS 177 +#define CALL_PY_GENERAL 178 +#define CALL_STR_1 179 +#define CALL_TUPLE_1 180 +#define CALL_TYPE_1 181 +#define COMPARE_OP_FLOAT 182 +#define COMPARE_OP_INT 183 +#define COMPARE_OP_STR 184 +#define CONTAINS_OP_DICT 185 +#define CONTAINS_OP_SET 186 +#define FOR_ITER_GEN 187 +#define FOR_ITER_LIST 188 +#define FOR_ITER_RANGE 189 +#define FOR_ITER_TUPLE 190 +#define LOAD_ATTR_CLASS 191 +#define LOAD_ATTR_GETATTRIBUTE_OVERRIDDEN 192 +#define LOAD_ATTR_INSTANCE_VALUE 193 +#define LOAD_ATTR_METHOD_LAZY_DICT 194 +#define LOAD_ATTR_METHOD_NO_DICT 195 +#define LOAD_ATTR_METHOD_WITH_VALUES 196 +#define LOAD_ATTR_MODULE 197 +#define LOAD_ATTR_NONDESCRIPTOR_NO_DICT 198 +#define LOAD_ATTR_NONDESCRIPTOR_WITH_VALUES 199 +#define LOAD_ATTR_PROPERTY 200 +#define LOAD_ATTR_SLOT 201 +#define LOAD_ATTR_WITH_HINT 202 +#define LOAD_GLOBAL_BUILTIN 203 +#define LOAD_GLOBAL_MODULE 204 +#define LOAD_SUPER_ATTR_ATTR 205 +#define LOAD_SUPER_ATTR_METHOD 206 +#define RESUME_CHECK 207 +#define SEND_GEN 208 +#define STORE_ATTR_INSTANCE_VALUE 209 +#define STORE_ATTR_SLOT 210 +#define STORE_ATTR_WITH_HINT 211 +#define STORE_SUBSCR_DICT 212 +#define STORE_SUBSCR_LIST_INT 213 +#define TO_BOOL_ALWAYS_TRUE 214 +#define TO_BOOL_BOOL 215 +#define TO_BOOL_INT 216 +#define TO_BOOL_LIST 217 +#define TO_BOOL_NONE 218 +#define TO_BOOL_STR 219 +#define UNPACK_SEQUENCE_LIST 220 +#define UNPACK_SEQUENCE_TUPLE 221 +#define UNPACK_SEQUENCE_TWO_TUPLE 222 +#define INSTRUMENTED_RESUME 236 +#define INSTRUMENTED_END_FOR 237 +#define INSTRUMENTED_END_SEND 238 +#define INSTRUMENTED_RETURN_VALUE 239 +#define INSTRUMENTED_RETURN_CONST 240 +#define INSTRUMENTED_YIELD_VALUE 241 +#define INSTRUMENTED_LOAD_SUPER_ATTR 242 +#define INSTRUMENTED_FOR_ITER 243 +#define INSTRUMENTED_CALL 244 +#define INSTRUMENTED_CALL_KW 245 +#define INSTRUMENTED_CALL_FUNCTION_EX 246 +#define INSTRUMENTED_INSTRUCTION 247 +#define INSTRUMENTED_JUMP_FORWARD 248 +#define INSTRUMENTED_JUMP_BACKWARD 249 +#define INSTRUMENTED_POP_JUMP_IF_TRUE 250 +#define INSTRUMENTED_POP_JUMP_IF_FALSE 251 +#define INSTRUMENTED_POP_JUMP_IF_NONE 252 +#define INSTRUMENTED_POP_JUMP_IF_NOT_NONE 253 +#define INSTRUMENTED_LINE 254 +#define JUMP 256 +#define JUMP_NO_INTERRUPT 257 +#define LOAD_CLOSURE 258 +#define LOAD_METHOD 259 +#define LOAD_SUPER_METHOD 260 +#define LOAD_ZERO_SUPER_ATTR 261 +#define LOAD_ZERO_SUPER_METHOD 262 +#define POP_BLOCK 263 +#define SETUP_CLEANUP 264 +#define SETUP_FINALLY 265 +#define SETUP_WITH 266 +#define STORE_FAST_MAYBE_NULL 267 + +#define HAVE_ARGUMENT 44 +#define MIN_INSTRUMENTED_OPCODE 236 + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OPCODE_IDS_H */ diff --git a/Include/osdefs.h b/Include/osdefs.h new file mode 100644 index 0000000000000000000000000000000000000000..2599e87a9d7c4b8e599a88d46b264e2be3a1da7d --- /dev/null +++ b/Include/osdefs.h @@ -0,0 +1,57 @@ +// Operating system dependencies. +// +// Define constants: +// +// - ALTSEP +// - DELIM +// - MAXPATHLEN +// - SEP + +#ifndef Py_OSDEFS_H +#define Py_OSDEFS_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef MS_WINDOWS +# define SEP L'\\' +# define ALTSEP L'/' +# define MAXPATHLEN 256 +# define DELIM L';' +#endif + +#ifdef __VXWORKS__ +# define DELIM L';' +#endif + +/* Filename separator */ +#ifndef SEP +# define SEP L'/' +#endif + +/* Max pathname length */ +#ifdef __hpux +# include +# include +# ifndef PATH_MAX +# define PATH_MAX MAXPATHLEN +# endif +#endif + +#ifndef MAXPATHLEN +# if defined(PATH_MAX) && PATH_MAX > 1024 +# define MAXPATHLEN PATH_MAX +# else +# define MAXPATHLEN 1024 +# endif +#endif + +/* Search path entry delimiter */ +#ifndef DELIM +# define DELIM L':' +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_OSDEFS_H diff --git a/Include/osmodule.h b/Include/osmodule.h new file mode 100644 index 0000000000000000000000000000000000000000..9095c2fdd3d638140db129937d29830286941819 --- /dev/null +++ b/Include/osmodule.h @@ -0,0 +1,17 @@ + +/* os module interface */ + +#ifndef Py_OSMODULE_H +#define Py_OSMODULE_H +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03060000 +PyAPI_FUNC(PyObject *) PyOS_FSPath(PyObject *path); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_OSMODULE_H */ diff --git a/Include/patchlevel.h b/Include/patchlevel.h new file mode 100644 index 0000000000000000000000000000000000000000..36b5ac25c63d38eea942737010d4ee7ef5dc9826 --- /dev/null +++ b/Include/patchlevel.h @@ -0,0 +1,35 @@ + +/* Python version identification scheme. + + When the major or minor version changes, the VERSION variable in + configure.ac must also be changed. + + There is also (independent) API version information in modsupport.h. +*/ + +/* Values for PY_RELEASE_LEVEL */ +#define PY_RELEASE_LEVEL_ALPHA 0xA +#define PY_RELEASE_LEVEL_BETA 0xB +#define PY_RELEASE_LEVEL_GAMMA 0xC /* For release candidates */ +#define PY_RELEASE_LEVEL_FINAL 0xF /* Serial should be 0 here */ + /* Higher for patch releases */ + +/* Version parsed out into numeric values */ +/*--start constants--*/ +#define PY_MAJOR_VERSION 3 +#define PY_MINOR_VERSION 13 +#define PY_MICRO_VERSION 7 +#define PY_RELEASE_LEVEL PY_RELEASE_LEVEL_FINAL +#define PY_RELEASE_SERIAL 0 + +/* Version as a string */ +#define PY_VERSION "3.13.7" +/*--end constants--*/ + +/* Version as a single 4-byte hex number, e.g. 0x010502B2 == 1.5.2b2. + Use this for numeric comparisons, e.g. #if PY_VERSION_HEX >= ... */ +#define PY_VERSION_HEX ((PY_MAJOR_VERSION << 24) | \ + (PY_MINOR_VERSION << 16) | \ + (PY_MICRO_VERSION << 8) | \ + (PY_RELEASE_LEVEL << 4) | \ + (PY_RELEASE_SERIAL << 0)) diff --git a/Include/py_curses.h b/Include/py_curses.h new file mode 100644 index 0000000000000000000000000000000000000000..3e8b16c201f810207c72a259e646e57374038a2f --- /dev/null +++ b/Include/py_curses.h @@ -0,0 +1,117 @@ + +#ifndef Py_CURSES_H +#define Py_CURSES_H + +#ifdef __APPLE__ +/* +** On Mac OS X 10.2 [n]curses.h and stdlib.h use different guards +** against multiple definition of wchar_t. +*/ +#ifdef _BSD_WCHAR_T_DEFINED_ +#define _WCHAR_T +#endif +#endif /* __APPLE__ */ + +/* On FreeBSD, [n]curses.h and stdlib.h/wchar.h use different guards + against multiple definition of wchar_t and wint_t. */ +#if defined(__FreeBSD__) && defined(_XOPEN_SOURCE_EXTENDED) +# ifndef __wchar_t +# define __wchar_t +# endif +# ifndef __wint_t +# define __wint_t +# endif +#endif + +#if defined(WINDOW_HAS_FLAGS) && defined(__APPLE__) +/* gh-109617, gh-115383: we can rely on the default value for NCURSES_OPAQUE on + most platforms, but not on macOS. This is because, starting with Xcode 15, + Apple-provided ncurses.h comes from ncurses 6 (which defaults to opaque + structs) but can still be linked to older versions of ncurses dynamic + libraries which don't provide functions such as is_pad() to deal with opaque + structs. Setting NCURSES_OPAQUE to 0 is harmless in all ncurses releases to + this date (provided that a thread-safe implementation is not required), but + this might change in the future. This fix might become irrelevant once + support for macOS 13 or earlier is dropped. */ +#define NCURSES_OPAQUE 0 +#endif + +#if defined(HAVE_NCURSESW_NCURSES_H) +# include +#elif defined(HAVE_NCURSESW_CURSES_H) +# include +#elif defined(HAVE_NCURSES_NCURSES_H) +# include +#elif defined(HAVE_NCURSES_CURSES_H) +# include +#elif defined(HAVE_NCURSES_H) +# include +#elif defined(HAVE_CURSES_H) +# include +#endif + +#ifdef NCURSES_VERSION +/* configure was checking , but we will + use , which has some or all these features. */ +#if !defined(WINDOW_HAS_FLAGS) && \ + (NCURSES_VERSION_PATCH+0 < 20070303 || !(NCURSES_OPAQUE+0)) +/* the WINDOW flags field was always accessible in ncurses prior to 20070303; + after that, it depends on the value of NCURSES_OPAQUE. */ +#define WINDOW_HAS_FLAGS 1 +#endif +#if !defined(HAVE_CURSES_IS_PAD) && NCURSES_VERSION_PATCH+0 >= 20090906 +#define HAVE_CURSES_IS_PAD 1 +#endif +#ifndef MVWDELCH_IS_EXPRESSION +#define MVWDELCH_IS_EXPRESSION 1 +#endif +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define PyCurses_API_pointers 4 + +/* Type declarations */ + +typedef struct PyCursesWindowObject { + PyObject_HEAD + WINDOW *win; + char *encoding; + struct PyCursesWindowObject *orig; +} PyCursesWindowObject; + +#define PyCursesWindow_Check(v) Py_IS_TYPE((v), &PyCursesWindow_Type) + +#define PyCurses_CAPSULE_NAME "_curses._C_API" + + +#ifdef CURSES_MODULE +/* This section is used when compiling _cursesmodule.c */ + +#else +/* This section is used in modules that use the _cursesmodule API */ + +static void **PyCurses_API; + +#define PyCursesWindow_Type (*_PyType_CAST(PyCurses_API[0])) +#define PyCursesSetupTermCalled {if (! ((int (*)(void))PyCurses_API[1]) () ) return NULL;} +#define PyCursesInitialised {if (! ((int (*)(void))PyCurses_API[2]) () ) return NULL;} +#define PyCursesInitialisedColor {if (! ((int (*)(void))PyCurses_API[3]) () ) return NULL;} + +#define import_curses() \ + PyCurses_API = (void **)PyCapsule_Import(PyCurses_CAPSULE_NAME, 1); + +#endif + +/* general error messages */ +static const char catchall_ERR[] = "curses function returned ERR"; +static const char catchall_NULL[] = "curses function returned NULL"; + +#ifdef __cplusplus +} +#endif + +#endif /* !defined(Py_CURSES_H) */ + diff --git a/Include/pyatomic.h b/Include/pyatomic.h new file mode 100644 index 0000000000000000000000000000000000000000..2ce2c81cf5251a96b8fcaf60f0feba5b49018a55 --- /dev/null +++ b/Include/pyatomic.h @@ -0,0 +1,16 @@ +#ifndef Py_ATOMIC_H +#define Py_ATOMIC_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_ATOMIC_H +# include "cpython/pyatomic.h" +# undef Py_CPYTHON_ATOMIC_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ATOMIC_H */ diff --git a/Include/pybuffer.h b/Include/pybuffer.h new file mode 100644 index 0000000000000000000000000000000000000000..ca1c6058d9052c53588dafddc229a1ef13445ca4 --- /dev/null +++ b/Include/pybuffer.h @@ -0,0 +1,145 @@ +/* Public Py_buffer API */ + +#ifndef Py_BUFFER_H +#define Py_BUFFER_H +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030b0000 + +/* === New Buffer API ============================================ + * Limited API and stable ABI since Python 3.11 + * + * Py_buffer struct layout and size is now part of the stable abi3. The + * struct layout and size must not be changed in any way, as it would + * break the ABI. + * + */ + +typedef struct { + void *buf; + PyObject *obj; /* owned reference */ + Py_ssize_t len; + Py_ssize_t itemsize; /* This is Py_ssize_t so it can be + pointed to by strides in simple case.*/ + int readonly; + int ndim; + char *format; + Py_ssize_t *shape; + Py_ssize_t *strides; + Py_ssize_t *suboffsets; + void *internal; +} Py_buffer; + +typedef int (*getbufferproc)(PyObject *, Py_buffer *, int); +typedef void (*releasebufferproc)(PyObject *, Py_buffer *); + +/* Return 1 if the getbuffer function is available, otherwise return 0. */ +PyAPI_FUNC(int) PyObject_CheckBuffer(PyObject *obj); + +/* This is a C-API version of the getbuffer function call. It checks + to make sure object has the required function pointer and issues the + call. + + Returns -1 and raises an error on failure and returns 0 on success. */ +PyAPI_FUNC(int) PyObject_GetBuffer(PyObject *obj, Py_buffer *view, + int flags); + +/* Get the memory area pointed to by the indices for the buffer given. + Note that view->ndim is the assumed size of indices. */ +PyAPI_FUNC(void *) PyBuffer_GetPointer(const Py_buffer *view, const Py_ssize_t *indices); + +/* Return the implied itemsize of the data-format area from a + struct-style description. */ +PyAPI_FUNC(Py_ssize_t) PyBuffer_SizeFromFormat(const char *format); + +/* Implementation in memoryobject.c */ +PyAPI_FUNC(int) PyBuffer_ToContiguous(void *buf, const Py_buffer *view, + Py_ssize_t len, char order); + +PyAPI_FUNC(int) PyBuffer_FromContiguous(const Py_buffer *view, const void *buf, + Py_ssize_t len, char order); + +/* Copy len bytes of data from the contiguous chunk of memory + pointed to by buf into the buffer exported by obj. Return + 0 on success and return -1 and raise a PyBuffer_Error on + error (i.e. the object does not have a buffer interface or + it is not working). + + If fort is 'F', then if the object is multi-dimensional, + then the data will be copied into the array in + Fortran-style (first dimension varies the fastest). If + fort is 'C', then the data will be copied into the array + in C-style (last dimension varies the fastest). If fort + is 'A', then it does not matter and the copy will be made + in whatever way is more efficient. */ +PyAPI_FUNC(int) PyObject_CopyData(PyObject *dest, PyObject *src); + +/* Copy the data from the src buffer to the buffer of destination. */ +PyAPI_FUNC(int) PyBuffer_IsContiguous(const Py_buffer *view, char fort); + +/*Fill the strides array with byte-strides of a contiguous + (Fortran-style if fort is 'F' or C-style otherwise) + array of the given shape with the given number of bytes + per element. */ +PyAPI_FUNC(void) PyBuffer_FillContiguousStrides(int ndims, + Py_ssize_t *shape, + Py_ssize_t *strides, + int itemsize, + char fort); + +/* Fills in a buffer-info structure correctly for an exporter + that can only share a contiguous chunk of memory of + "unsigned bytes" of the given length. + + Returns 0 on success and -1 (with raising an error) on error. */ +PyAPI_FUNC(int) PyBuffer_FillInfo(Py_buffer *view, PyObject *o, void *buf, + Py_ssize_t len, int readonly, + int flags); + +/* Releases a Py_buffer obtained from getbuffer ParseTuple's "s*". */ +PyAPI_FUNC(void) PyBuffer_Release(Py_buffer *view); + +/* Maximum number of dimensions */ +#define PyBUF_MAX_NDIM 64 + +/* Flags for getting buffers. Keep these in sync with inspect.BufferFlags. */ +#define PyBUF_SIMPLE 0 +#define PyBUF_WRITABLE 0x0001 + +#ifndef Py_LIMITED_API +/* we used to include an E, backwards compatible alias */ +#define PyBUF_WRITEABLE PyBUF_WRITABLE +#endif + +#define PyBUF_FORMAT 0x0004 +#define PyBUF_ND 0x0008 +#define PyBUF_STRIDES (0x0010 | PyBUF_ND) +#define PyBUF_C_CONTIGUOUS (0x0020 | PyBUF_STRIDES) +#define PyBUF_F_CONTIGUOUS (0x0040 | PyBUF_STRIDES) +#define PyBUF_ANY_CONTIGUOUS (0x0080 | PyBUF_STRIDES) +#define PyBUF_INDIRECT (0x0100 | PyBUF_STRIDES) + +#define PyBUF_CONTIG (PyBUF_ND | PyBUF_WRITABLE) +#define PyBUF_CONTIG_RO (PyBUF_ND) + +#define PyBUF_STRIDED (PyBUF_STRIDES | PyBUF_WRITABLE) +#define PyBUF_STRIDED_RO (PyBUF_STRIDES) + +#define PyBUF_RECORDS (PyBUF_STRIDES | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_RECORDS_RO (PyBUF_STRIDES | PyBUF_FORMAT) + +#define PyBUF_FULL (PyBUF_INDIRECT | PyBUF_WRITABLE | PyBUF_FORMAT) +#define PyBUF_FULL_RO (PyBUF_INDIRECT | PyBUF_FORMAT) + + +#define PyBUF_READ 0x100 +#define PyBUF_WRITE 0x200 + +#endif /* !Py_LIMITED_API || Py_LIMITED_API >= 3.11 */ + +#ifdef __cplusplus +} +#endif +#endif /* Py_BUFFER_H */ diff --git a/Include/pycapsule.h b/Include/pycapsule.h new file mode 100644 index 0000000000000000000000000000000000000000..666b9f8673967051856b1b74f5aca5ce95f620df --- /dev/null +++ b/Include/pycapsule.h @@ -0,0 +1,58 @@ + +/* Capsule objects let you wrap a C "void *" pointer in a Python + object. They're a way of passing data through the Python interpreter + without creating your own custom type. + + Capsules are used for communication between extension modules. + They provide a way for an extension module to export a C interface + to other extension modules, so that extension modules can use the + Python import mechanism to link to one another. + + For more information, please see "c-api/capsule.html" in the + documentation. +*/ + +#ifndef Py_CAPSULE_H +#define Py_CAPSULE_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PyCapsule_Type; + +typedef void (*PyCapsule_Destructor)(PyObject *); + +#define PyCapsule_CheckExact(op) Py_IS_TYPE((op), &PyCapsule_Type) + + +PyAPI_FUNC(PyObject *) PyCapsule_New( + void *pointer, + const char *name, + PyCapsule_Destructor destructor); + +PyAPI_FUNC(void *) PyCapsule_GetPointer(PyObject *capsule, const char *name); + +PyAPI_FUNC(PyCapsule_Destructor) PyCapsule_GetDestructor(PyObject *capsule); + +PyAPI_FUNC(const char *) PyCapsule_GetName(PyObject *capsule); + +PyAPI_FUNC(void *) PyCapsule_GetContext(PyObject *capsule); + +PyAPI_FUNC(int) PyCapsule_IsValid(PyObject *capsule, const char *name); + +PyAPI_FUNC(int) PyCapsule_SetPointer(PyObject *capsule, void *pointer); + +PyAPI_FUNC(int) PyCapsule_SetDestructor(PyObject *capsule, PyCapsule_Destructor destructor); + +PyAPI_FUNC(int) PyCapsule_SetName(PyObject *capsule, const char *name); + +PyAPI_FUNC(int) PyCapsule_SetContext(PyObject *capsule, void *context); + +PyAPI_FUNC(void *) PyCapsule_Import( + const char *name, /* UTF-8 encoded string */ + int no_block); + +#ifdef __cplusplus +} +#endif +#endif /* !Py_CAPSULE_H */ diff --git a/Include/pyconfig.h b/Include/pyconfig.h new file mode 100644 index 0000000000000000000000000000000000000000..424421f6ff16476ebc96e0ec164123ed9913a533 --- /dev/null +++ b/Include/pyconfig.h @@ -0,0 +1,759 @@ +#ifndef Py_CONFIG_H +#define Py_CONFIG_H + +/* pyconfig.h. NOT Generated automatically by configure. + +This is a manually maintained version used for the Watcom, +Borland and Microsoft Visual C++ compilers. It is a +standard part of the Python distribution. + +WINDOWS DEFINES: +The code specific to Windows should be wrapped around one of +the following #defines + +MS_WIN64 - Code specific to the MS Win64 API +MS_WIN32 - Code specific to the MS Win32 (and Win64) API (obsolete, this covers all supported APIs) +MS_WINDOWS - Code specific to Windows, but all versions. +Py_ENABLE_SHARED - Code if the Python core is built as a DLL. + +Also note that neither "_M_IX86" or "_MSC_VER" should be used for +any purpose other than "Windows Intel x86 specific" and "Microsoft +compiler specific". Therefore, these should be very rare. + + +NOTE: The following symbols are deprecated: +NT, USE_DL_EXPORT, USE_DL_IMPORT, DL_EXPORT, DL_IMPORT +MS_CORE_DLL. + +WIN32 is still required for the locale module. + +*/ + +/* Deprecated USE_DL_EXPORT macro - please use Py_BUILD_CORE */ +#ifdef USE_DL_EXPORT +# define Py_BUILD_CORE +#endif /* USE_DL_EXPORT */ + +/* Visual Studio 2005 introduces deprecation warnings for + "insecure" and POSIX functions. The insecure functions should + be replaced by *_s versions (according to Microsoft); the + POSIX functions by _* versions (which, according to Microsoft, + would be ISO C conforming). Neither renaming is feasible, so + we just silence the warnings. */ + +#ifndef _CRT_SECURE_NO_DEPRECATE +#define _CRT_SECURE_NO_DEPRECATE 1 +#endif +#ifndef _CRT_NONSTDC_NO_DEPRECATE +#define _CRT_NONSTDC_NO_DEPRECATE 1 +#endif + +#define HAVE_IO_H +#define HAVE_SYS_UTIME_H +#define HAVE_TEMPNAM +#define HAVE_TMPFILE +#define HAVE_TMPNAM +#define HAVE_CLOCK +#define HAVE_STRERROR + +#include + +#define HAVE_STRFTIME +#define DONT_HAVE_SIG_ALARM +#define DONT_HAVE_SIG_PAUSE +#define LONG_BIT 32 +#define WORD_BIT 32 + +#define MS_WIN32 /* only support win32 and greater. */ +#define MS_WINDOWS +#define NT_THREADS +#define WITH_THREAD +#ifndef NETSCAPE_PI +#define USE_SOCKET +#endif + +#if defined(Py_BUILD_CORE) || defined(Py_BUILD_CORE_BUILTIN) || defined(Py_BUILD_CORE_MODULE) +#include + +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) +#define MS_WINDOWS_DESKTOP +#endif +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) +#define MS_WINDOWS_APP +#endif +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_SYSTEM) +#define MS_WINDOWS_SYSTEM +#endif +#if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_GAMES) +#define MS_WINDOWS_GAMES +#endif + +/* Define to 1 if you support windows console io */ +#if defined(MS_WINDOWS_DESKTOP) || defined(MS_WINDOWS_APP) || defined(MS_WINDOWS_SYSTEM) +#define HAVE_WINDOWS_CONSOLE_IO 1 +#endif +#endif /* Py_BUILD_CORE || Py_BUILD_CORE_BUILTIN || Py_BUILD_CORE_MODULE */ + +/* Define to 1 if you want to disable the GIL */ +/* Uncomment the definition for free-threaded builds, or define it manually + * when compiling extension modules. Note that we test with #ifdef, so + * defining as 0 will still disable the GIL. */ +#ifndef Py_GIL_DISABLED +/* #define Py_GIL_DISABLED 1 */ +#endif + +/* Compiler specific defines */ + +/* ------------------------------------------------------------------------*/ +/* Microsoft C defines _MSC_VER, as does clang-cl.exe */ +#ifdef _MSC_VER + +/* We want COMPILER to expand to a string containing _MSC_VER's *value*. + * This is horridly tricky, because the stringization operator only works + * on macro arguments, and doesn't evaluate macros passed *as* arguments. + */ +#define _Py_PASTE_VERSION(SUFFIX) \ + ("[MSC v." _Py_STRINGIZE(_MSC_VER) " " SUFFIX "]") +/* e.g., this produces, after compile-time string catenation, + * ("[MSC v.1900 64 bit (Intel)]") + * + * _Py_STRINGIZE(_MSC_VER) expands to + * _Py_STRINGIZE1(_MSC_VER) and this second macro call is scanned + * again for macros and so further expands to + * _Py_STRINGIZE1(1900) which then expands to + * "1900" + */ +#define _Py_STRINGIZE(X) _Py_STRINGIZE1(X) +#define _Py_STRINGIZE1(X) #X + +/* MSVC defines _WINxx to differentiate the windows platform types + + Note that for compatibility reasons _WIN32 is defined on Win32 + *and* on Win64. For the same reasons, in Python, MS_WIN32 is + defined on Win32 *and* Win64. Win32 only code must therefore be + guarded as follows: + #if defined(MS_WIN32) && !defined(MS_WIN64) +*/ +#ifdef _WIN64 +#define MS_WIN64 +#endif + +/* set the COMPILER and support tier + * + * win_amd64 MSVC (x86_64-pc-windows-msvc): 1 + * win32 MSVC (i686-pc-windows-msvc): 1 + * win_arm64 MSVC (aarch64-pc-windows-msvc): 3 + * other archs and ICC: 0 + */ +#ifdef MS_WIN64 +#if defined(_M_X64) || defined(_M_AMD64) +#if defined(__clang__) +#define COMPILER ("[Clang " __clang_version__ "] 64 bit (AMD64) with MSC v." _Py_STRINGIZE(_MSC_VER) " CRT]") +#define PY_SUPPORT_TIER 0 +#elif defined(__INTEL_COMPILER) +#define COMPILER ("[ICC v." _Py_STRINGIZE(__INTEL_COMPILER) " 64 bit (amd64) with MSC v." _Py_STRINGIZE(_MSC_VER) " CRT]") +#define PY_SUPPORT_TIER 0 +#else +#define COMPILER _Py_PASTE_VERSION("64 bit (AMD64)") +#define PY_SUPPORT_TIER 1 +#endif /* __clang__ */ +#define PYD_PLATFORM_TAG "win_amd64" +#elif defined(_M_ARM64) +#define COMPILER _Py_PASTE_VERSION("64 bit (ARM64)") +#define PY_SUPPORT_TIER 3 +#define PYD_PLATFORM_TAG "win_arm64" +#else +#define COMPILER _Py_PASTE_VERSION("64 bit (Unknown)") +#define PY_SUPPORT_TIER 0 +#endif +#endif /* MS_WIN64 */ + +/* set the version macros for the windows headers */ +/* Python 3.12+ requires Windows 8.1 or greater */ +#define Py_WINVER 0x0603 /* _WIN32_WINNT_WINBLUE (8.1) */ +#define Py_NTDDI NTDDI_WINBLUE + +/* We only set these values when building Python - we don't want to force + these values on extensions, as that will affect the prototypes and + structures exposed in the Windows headers. Even when building Python, we + allow a single source file to override this - they may need access to + structures etc so it can optionally use new Windows features if it + determines at runtime they are available. +*/ +#if defined(Py_BUILD_CORE) || defined(Py_BUILD_CORE_BUILTIN) || defined(Py_BUILD_CORE_MODULE) +#ifndef NTDDI_VERSION +#define NTDDI_VERSION Py_NTDDI +#endif +#ifndef WINVER +#define WINVER Py_WINVER +#endif +#ifndef _WIN32_WINNT +#define _WIN32_WINNT Py_WINVER +#endif +#endif + +/* _W64 is not defined for VC6 or eVC4 */ +#ifndef _W64 +#define _W64 +#endif + +/* Define like size_t, omitting the "unsigned" */ +#ifdef MS_WIN64 +typedef __int64 Py_ssize_t; +# define PY_SSIZE_T_MAX LLONG_MAX +#else +typedef _W64 int Py_ssize_t; +# define PY_SSIZE_T_MAX INT_MAX +#endif +#define HAVE_PY_SSIZE_T 1 + +#if defined(MS_WIN32) && !defined(MS_WIN64) +#if defined(_M_IX86) +#if defined(__clang__) +#define COMPILER ("[Clang " __clang_version__ "] 32 bit (Intel) with MSC v." _Py_STRINGIZE(_MSC_VER) " CRT]") +#define PY_SUPPORT_TIER 0 +#elif defined(__INTEL_COMPILER) +#define COMPILER ("[ICC v." _Py_STRINGIZE(__INTEL_COMPILER) " 32 bit (Intel) with MSC v." _Py_STRINGIZE(_MSC_VER) " CRT]") +#define PY_SUPPORT_TIER 0 +#else +#define COMPILER _Py_PASTE_VERSION("32 bit (Intel)") +#define PY_SUPPORT_TIER 1 +#endif /* __clang__ */ +#define PYD_PLATFORM_TAG "win32" +#elif defined(_M_ARM) +#define COMPILER _Py_PASTE_VERSION("32 bit (ARM)") +#define PYD_PLATFORM_TAG "win_arm32" +#define PY_SUPPORT_TIER 0 +#else +#define COMPILER _Py_PASTE_VERSION("32 bit (Unknown)") +#define PY_SUPPORT_TIER 0 +#endif +#endif /* MS_WIN32 && !MS_WIN64 */ + +typedef int pid_t; + +/* define some ANSI types that are not defined in earlier Win headers */ +#if _MSC_VER >= 1200 +/* This file only exists in VC 6.0 or higher */ +#include +#endif + +#endif /* _MSC_VER */ + +/* ------------------------------------------------------------------------*/ +/* mingw and mingw-w64 define __MINGW32__ */ +#ifdef __MINGW32__ + +#ifdef _WIN64 +#define MS_WIN64 +#endif + +#endif /* __MINGW32__*/ + +/* ------------------------------------------------------------------------*/ +/* egcs/gnu-win32 defines __GNUC__ and _WIN32 */ +#if defined(__GNUC__) && defined(_WIN32) +/* XXX These defines are likely incomplete, but should be easy to fix. + They should be complete enough to build extension modules. */ +/* Suggested by Rene Liebscher to avoid a GCC 2.91.* + bug that requires structure imports. More recent versions of the + compiler don't exhibit this bug. +*/ +#if (__GNUC__==2) && (__GNUC_MINOR__<=91) +#warning "Please use an up-to-date version of gcc! (>2.91 recommended)" +#endif + +#define COMPILER "[gcc]" +#define PY_LONG_LONG long long +#define PY_LLONG_MIN LLONG_MIN +#define PY_LLONG_MAX LLONG_MAX +#define PY_ULLONG_MAX ULLONG_MAX +#endif /* GNUC */ + +/* ------------------------------------------------------------------------*/ +/* lcc-win32 defines __LCC__ */ +#if defined(__LCC__) +/* XXX These defines are likely incomplete, but should be easy to fix. + They should be complete enough to build extension modules. */ + +#define COMPILER "[lcc-win32]" +typedef int pid_t; +/* __declspec() is supported here too - do nothing to get the defaults */ + +#endif /* LCC */ + +/* ------------------------------------------------------------------------*/ +/* End of compilers - finish up */ + +#ifndef NO_STDIO_H +# include +#endif + +/* 64 bit ints are usually spelt __int64 unless compiler has overridden */ +#ifndef PY_LONG_LONG +# define PY_LONG_LONG __int64 +# define PY_LLONG_MAX _I64_MAX +# define PY_LLONG_MIN _I64_MIN +# define PY_ULLONG_MAX _UI64_MAX +#endif + +/* For Windows the Python core is in a DLL by default. Test +Py_NO_ENABLE_SHARED to find out. Also support MS_NO_COREDLL for b/w compat */ +#if !defined(MS_NO_COREDLL) && !defined(Py_NO_ENABLE_SHARED) +# define Py_ENABLE_SHARED 1 /* standard symbol for shared library */ +# define MS_COREDLL /* deprecated old symbol */ +#endif /* !MS_NO_COREDLL && ... */ + +/* All windows compilers that use this header support __declspec */ +#define HAVE_DECLSPEC_DLL + +/* For an MSVC DLL, we can nominate the .lib files used by extensions */ +#ifdef MS_COREDLL +# if !defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_BUILTIN) + /* not building the core - must be an ext */ +# if defined(_MSC_VER) + /* So MSVC users need not specify the .lib + file in their Makefile */ +# if defined(Py_GIL_DISABLED) +# if defined(_DEBUG) +# pragma comment(lib,"python313t_d.lib") +# elif defined(Py_LIMITED_API) +# pragma comment(lib,"python3t.lib") +# else +# pragma comment(lib,"python313t.lib") +# endif /* _DEBUG */ +# else /* Py_GIL_DISABLED */ +# if defined(_DEBUG) +# pragma comment(lib,"python313_d.lib") +# elif defined(Py_LIMITED_API) +# pragma comment(lib,"python3.lib") +# else +# pragma comment(lib,"python313.lib") +# endif /* _DEBUG */ +# endif /* Py_GIL_DISABLED */ +# endif /* _MSC_VER */ +# endif /* Py_BUILD_CORE */ +#endif /* MS_COREDLL */ + +#ifdef MS_WIN64 +/* maintain "win32" sys.platform for backward compatibility of Python code, + the Win64 API should be close enough to the Win32 API to make this + preferable */ +# define PLATFORM "win32" +# define SIZEOF_VOID_P 8 +# define SIZEOF_TIME_T 8 +# define SIZEOF_OFF_T 4 +# define SIZEOF_FPOS_T 8 +# define SIZEOF_HKEY 8 +# define SIZEOF_SIZE_T 8 +# define ALIGNOF_SIZE_T 8 +# define ALIGNOF_MAX_ALIGN_T 8 +/* configure.ac defines HAVE_LARGEFILE_SUPPORT iff + sizeof(off_t) > sizeof(long), and sizeof(long long) >= sizeof(off_t). + On Win64 the second condition is not true, but if fpos_t replaces off_t + then this is true. The uses of HAVE_LARGEFILE_SUPPORT imply that Win64 + should define this. */ +# define HAVE_LARGEFILE_SUPPORT +#elif defined(MS_WIN32) +# define PLATFORM "win32" +# define HAVE_LARGEFILE_SUPPORT +# define SIZEOF_VOID_P 4 +# define SIZEOF_OFF_T 4 +# define SIZEOF_FPOS_T 8 +# define SIZEOF_HKEY 4 +# define SIZEOF_SIZE_T 4 +# define ALIGNOF_SIZE_T 4 + /* MS VS2005 changes time_t to a 64-bit type on all platforms */ +# if defined(_MSC_VER) && _MSC_VER >= 1400 +# define SIZEOF_TIME_T 8 +# else +# define SIZEOF_TIME_T 4 +# endif +# define ALIGNOF_MAX_ALIGN_T 8 +#endif + +#ifdef _DEBUG +# define Py_DEBUG +#endif + + +#ifdef MS_WIN32 + +#define SIZEOF_SHORT 2 +#define SIZEOF_INT 4 +#define SIZEOF_LONG 4 +#define ALIGNOF_LONG 4 +#define SIZEOF_LONG_LONG 8 +#define SIZEOF_DOUBLE 8 +#define SIZEOF_FLOAT 4 + +/* VC 7.1 has them and VC 6.0 does not. VC 6.0 has a version number of 1200. + Microsoft eMbedded Visual C++ 4.0 has a version number of 1201 and doesn't + define these. + If some compiler does not provide them, modify the #if appropriately. */ +#if defined(_MSC_VER) +#if _MSC_VER > 1300 +#define HAVE_UINTPTR_T 1 +#define HAVE_INTPTR_T 1 +#else +/* VC6, VS 2002 and eVC4 don't support the C99 LL suffix for 64-bit integer literals */ +#define Py_LL(x) x##I64 +#endif /* _MSC_VER > 1300 */ +#endif /* _MSC_VER */ + +#endif + +/* define signed and unsigned exact-width 32-bit and 64-bit types, used in the + implementation of Python integers. */ +#define PY_UINT32_T uint32_t +#define PY_UINT64_T uint64_t +#define PY_INT32_T int32_t +#define PY_INT64_T int64_t + +/* Fairly standard from here! */ + +/* Define if on AIX 3. + System headers sometimes define this. + We just want to avoid a redefinition error message. */ +#ifndef _ALL_SOURCE +/* #undef _ALL_SOURCE */ +#endif + +/* Define to empty if the keyword does not work. */ +/* #define const */ + +/* Define to 1 if you have the header file. */ +#define HAVE_CONIO_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_DIRECT_H 1 + +/* Define to 1 if you have the declaration of `tzname', and to 0 if you don't. + */ +#define HAVE_DECL_TZNAME 1 + +/* Define if you have dirent.h. */ +/* #define DIRENT 1 */ + +/* Define to the type of elements in the array set by `getgroups'. + Usually this is either `int' or `gid_t'. */ +/* #undef GETGROUPS_T */ + +/* Define to `int' if doesn't define. */ +/* #undef gid_t */ + +/* Define if your struct tm has tm_zone. */ +/* #undef HAVE_TM_ZONE */ + +/* Define if you don't have tm_zone but do have the external array + tzname. */ +#define HAVE_TZNAME + +/* Define to `int' if doesn't define. */ +/* #undef mode_t */ + +/* Define if you don't have dirent.h, but have ndir.h. */ +/* #undef NDIR */ + +/* Define to `long' if doesn't define. */ +/* #undef off_t */ + +/* Define to `int' if doesn't define. */ +/* #undef pid_t */ + +/* Define if the system does not provide POSIX.1 features except + with this defined. */ +/* #undef _POSIX_1_SOURCE */ + +/* Define if you need to in order for stat and other things to work. */ +/* #undef _POSIX_SOURCE */ + +/* Define as the return type of signal handlers (int or void). */ +#define RETSIGTYPE void + +/* Define to `unsigned' if doesn't define. */ +/* #undef size_t */ + +/* Define if you have the ANSI C header files. */ +#define STDC_HEADERS 1 + +/* Define if you don't have dirent.h, but have sys/dir.h. */ +/* #undef SYSDIR */ + +/* Define if you don't have dirent.h, but have sys/ndir.h. */ +/* #undef SYSNDIR */ + +/* Define if you can safely include both and . */ +/* #undef TIME_WITH_SYS_TIME */ + +/* Define if your declares struct tm. */ +/* #define TM_IN_SYS_TIME 1 */ + +/* Define to `int' if doesn't define. */ +/* #undef uid_t */ + +/* Define if the closedir function returns void instead of int. */ +/* #undef VOID_CLOSEDIR */ + +/* Define if getpgrp() must be called as getpgrp(0) + and (consequently) setpgrp() as setpgrp(0, 0). */ +/* #undef GETPGRP_HAVE_ARGS */ + +/* Define this if your time.h defines altzone */ +/* #define HAVE_ALTZONE */ + +/* Define if you have the putenv function. */ +#define HAVE_PUTENV + +/* Define if your compiler supports function prototypes */ +#define HAVE_PROTOTYPES + +/* Define if you can safely include both and + (which you can't on SCO ODT 3.0). */ +/* #undef SYS_SELECT_WITH_SYS_TIME */ + +/* Define if you want build the _decimal module using a coroutine-local rather + than a thread-local context */ +#define WITH_DECIMAL_CONTEXTVAR 1 + +/* Define if you want documentation strings in extension modules */ +#define WITH_DOC_STRINGS 1 + +/* Define if you want to compile in rudimentary thread support */ +/* #undef WITH_THREAD */ + +/* Define if you want to use the GNU readline library */ +/* #define WITH_READLINE 1 */ + +/* Use Python's own small-block memory-allocator. */ +#define WITH_PYMALLOC 1 + +/* Define if you want to compile in mimalloc memory allocator. */ +#define WITH_MIMALLOC 1 + +/* Define if you want to compile in object freelists optimization */ +#define WITH_FREELISTS 1 + +/* Define if you have clock. */ +/* #define HAVE_CLOCK */ + +/* Define when any dynamic module loading is enabled */ +#define HAVE_DYNAMIC_LOADING + +/* Define if you have ftime. */ +#define HAVE_FTIME + +/* Define if you have getpeername. */ +#define HAVE_GETPEERNAME + +/* Define if you have getpgrp. */ +/* #undef HAVE_GETPGRP */ + +/* Define if you have getpid. */ +#define HAVE_GETPID + +/* Define if you have gettimeofday. */ +/* #undef HAVE_GETTIMEOFDAY */ + +/* Define if you have getwd. */ +/* #undef HAVE_GETWD */ + +/* Define if you have lstat. */ +/* #undef HAVE_LSTAT */ + +/* Define if you have the mktime function. */ +#define HAVE_MKTIME + +/* Define if you have nice. */ +/* #undef HAVE_NICE */ + +/* Define if you have readlink. */ +/* #undef HAVE_READLINK */ + +/* Define if you have setpgid. */ +/* #undef HAVE_SETPGID */ + +/* Define if you have setpgrp. */ +/* #undef HAVE_SETPGRP */ + +/* Define if you have setsid. */ +/* #undef HAVE_SETSID */ + +/* Define if you have setvbuf. */ +#define HAVE_SETVBUF + +/* Define if you have siginterrupt. */ +/* #undef HAVE_SIGINTERRUPT */ + +/* Define to 1 if you have the `shutdown' function. */ +#define HAVE_SHUTDOWN 1 + +/* Define if you have symlink. */ +/* #undef HAVE_SYMLINK */ + +/* Define if you have tcgetpgrp. */ +/* #undef HAVE_TCGETPGRP */ + +/* Define if you have tcsetpgrp. */ +/* #undef HAVE_TCSETPGRP */ + +/* Define if you have times. */ +/* #undef HAVE_TIMES */ + +/* Define to 1 if you have the `umask' function. */ +#define HAVE_UMASK 1 + +/* Define if you have uname. */ +/* #undef HAVE_UNAME */ + +/* Define if you have waitpid. */ +/* #undef HAVE_WAITPID */ + +/* Define to 1 if you have the `wcsftime' function. */ +#if defined(_MSC_VER) && _MSC_VER >= 1310 +#define HAVE_WCSFTIME 1 +#endif + +/* Define to 1 if you have the `wcscoll' function. */ +#define HAVE_WCSCOLL 1 + +/* Define to 1 if you have the `wcsxfrm' function. */ +#define HAVE_WCSXFRM 1 + +/* Define if the zlib library has inflateCopy */ +#define HAVE_ZLIB_COPY 1 + +/* Define if you have the header file. */ +/* #undef HAVE_DLFCN_H */ + +/* Define to 1 if you have the header file. */ +#define HAVE_ERRNO_H 1 + +/* Define if you have the header file. */ +#define HAVE_FCNTL_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_PROCESS_H 1 + +/* Define to 1 if you have the header file. */ +#define HAVE_SIGNAL_H 1 + +/* Define if you have the header file. */ +#define HAVE_STDDEF_H 1 + +/* Define if you have the header file. */ +/* #undef HAVE_SYS_AUDIOIO_H */ + +/* Define if you have the header file. */ +/* #define HAVE_SYS_PARAM_H 1 */ + +/* Define if you have the header file. */ +/* #define HAVE_SYS_SELECT_H 1 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_STAT_H 1 + +/* Define if you have the header file. */ +/* #define HAVE_SYS_TIME_H 1 */ + +/* Define if you have the header file. */ +/* #define HAVE_SYS_TIMES_H 1 */ + +/* Define to 1 if you have the header file. */ +#define HAVE_SYS_TYPES_H 1 + +/* Define if you have the header file. */ +/* #define HAVE_SYS_UN_H 1 */ + +/* Define if you have the header file. */ +/* #define HAVE_SYS_UTIME_H 1 */ + +/* Define if you have the header file. */ +/* #define HAVE_SYS_UTSNAME_H 1 */ + +/* Define if you have the header file. */ +/* #define HAVE_UNISTD_H 1 */ + +/* Define if you have the header file. */ +/* #define HAVE_UTIME_H 1 */ + +/* Define if the compiler provides a wchar.h header file. */ +#define HAVE_WCHAR_H 1 + +/* The size of `wchar_t', as computed by sizeof. */ +#define SIZEOF_WCHAR_T 2 + +/* The size of `_Bool', as computed by sizeof. */ +#define SIZEOF__BOOL 1 + +/* The size of `pid_t', as computed by sizeof. */ +#define SIZEOF_PID_T SIZEOF_INT + +/* Define if you have the dl library (-ldl). */ +/* #undef HAVE_LIBDL */ + +/* Define if you have the mpc library (-lmpc). */ +/* #undef HAVE_LIBMPC */ + +/* Define if you have the seq library (-lseq). */ +/* #undef HAVE_LIBSEQ */ + +/* Define if you have the socket library (-lsocket). */ +#define HAVE_LIBSOCKET 1 + +/* Define if you have the sun library (-lsun). */ +/* #undef HAVE_LIBSUN */ + +/* Define if you have the termcap library (-ltermcap). */ +/* #undef HAVE_LIBTERMCAP */ + +/* Define if you have the termlib library (-ltermlib). */ +/* #undef HAVE_LIBTERMLIB */ + +/* Define if you have the thread library (-lthread). */ +/* #undef HAVE_LIBTHREAD */ + +/* WinSock does not use a bitmask in select, and uses + socket handles greater than FD_SETSIZE */ +#define Py_SOCKET_FD_CAN_BE_GE_FD_SETSIZE + +/* Define if C doubles are 64-bit IEEE 754 binary format, stored with the + least significant byte first */ +#define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 1 + +/* Define to 1 if you have the `erf' function. */ +#define HAVE_ERF 1 + +/* Define to 1 if you have the `erfc' function. */ +#define HAVE_ERFC 1 + +// netdb.h functions (provided by winsock.h) +#define HAVE_GETHOSTNAME 1 +#define HAVE_GETHOSTBYADDR 1 +#define HAVE_GETHOSTBYNAME 1 +#define HAVE_GETPROTOBYNAME 1 +#define HAVE_GETSERVBYNAME 1 +#define HAVE_GETSERVBYPORT 1 +// sys/socket.h functions (provided by winsock.h) +#define HAVE_INET_PTON 1 +#define HAVE_INET_NTOA 1 +#define HAVE_ACCEPT 1 +#define HAVE_BIND 1 +#define HAVE_CONNECT 1 +#define HAVE_GETSOCKNAME 1 +#define HAVE_LISTEN 1 +#define HAVE_RECVFROM 1 +#define HAVE_SENDTO 1 +#define HAVE_SETSOCKOPT 1 +#define HAVE_SOCKET 1 + +/* Define to 1 if you have the `dup' function. */ +#define HAVE_DUP 1 + +/* framework name */ +#define _PYTHONFRAMEWORK "" + +/* Define if libssl has X509_VERIFY_PARAM_set1_host and related function */ +#define HAVE_X509_VERIFY_PARAM_SET1_HOST 1 + +#endif /* !Py_CONFIG_H */ diff --git a/Include/pydtrace.h b/Include/pydtrace.h new file mode 100644 index 0000000000000000000000000000000000000000..e197d36694537b188e8aaf8d8e89f770a3de96a1 --- /dev/null +++ b/Include/pydtrace.h @@ -0,0 +1,59 @@ +/* Static DTrace probes interface */ + +#ifndef Py_DTRACE_H +#define Py_DTRACE_H +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef WITH_DTRACE + +#include "pydtrace_probes.h" + +/* pydtrace_probes.h, on systems with DTrace, is auto-generated to include + `PyDTrace_{PROBE}` and `PyDTrace_{PROBE}_ENABLED()` macros for every probe + defined in pydtrace.d. + + Calling these functions must be guarded by a `PyDTrace_{PROBE}_ENABLED()` + check to minimize performance impact when probing is off. For example: + + if (PyDTrace_FUNCTION_ENTRY_ENABLED()) + PyDTrace_FUNCTION_ENTRY(f); +*/ + +#else + +/* Without DTrace, compile to nothing. */ + +static inline void PyDTrace_LINE(const char *arg0, const char *arg1, int arg2) {} +static inline void PyDTrace_FUNCTION_ENTRY(const char *arg0, const char *arg1, int arg2) {} +static inline void PyDTrace_FUNCTION_RETURN(const char *arg0, const char *arg1, int arg2) {} +static inline void PyDTrace_GC_START(int arg0) {} +static inline void PyDTrace_GC_DONE(Py_ssize_t arg0) {} +static inline void PyDTrace_INSTANCE_NEW_START(int arg0) {} +static inline void PyDTrace_INSTANCE_NEW_DONE(int arg0) {} +static inline void PyDTrace_INSTANCE_DELETE_START(int arg0) {} +static inline void PyDTrace_INSTANCE_DELETE_DONE(int arg0) {} +static inline void PyDTrace_IMPORT_FIND_LOAD_START(const char *arg0) {} +static inline void PyDTrace_IMPORT_FIND_LOAD_DONE(const char *arg0, int arg1) {} +static inline void PyDTrace_AUDIT(const char *arg0, void *arg1) {} + +static inline int PyDTrace_LINE_ENABLED(void) { return 0; } +static inline int PyDTrace_FUNCTION_ENTRY_ENABLED(void) { return 0; } +static inline int PyDTrace_FUNCTION_RETURN_ENABLED(void) { return 0; } +static inline int PyDTrace_GC_START_ENABLED(void) { return 0; } +static inline int PyDTrace_GC_DONE_ENABLED(void) { return 0; } +static inline int PyDTrace_INSTANCE_NEW_START_ENABLED(void) { return 0; } +static inline int PyDTrace_INSTANCE_NEW_DONE_ENABLED(void) { return 0; } +static inline int PyDTrace_INSTANCE_DELETE_START_ENABLED(void) { return 0; } +static inline int PyDTrace_INSTANCE_DELETE_DONE_ENABLED(void) { return 0; } +static inline int PyDTrace_IMPORT_FIND_LOAD_START_ENABLED(void) { return 0; } +static inline int PyDTrace_IMPORT_FIND_LOAD_DONE_ENABLED(void) { return 0; } +static inline int PyDTrace_AUDIT_ENABLED(void) { return 0; } + +#endif /* !WITH_DTRACE */ + +#ifdef __cplusplus +} +#endif +#endif /* !Py_DTRACE_H */ diff --git a/Include/pyerrors.h b/Include/pyerrors.h new file mode 100644 index 0000000000000000000000000000000000000000..5d0028c116e2d862948042746734e252e4aed0f6 --- /dev/null +++ b/Include/pyerrors.h @@ -0,0 +1,335 @@ +// Error handling definitions + +#ifndef Py_ERRORS_H +#define Py_ERRORS_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(void) PyErr_SetNone(PyObject *); +PyAPI_FUNC(void) PyErr_SetObject(PyObject *, PyObject *); +PyAPI_FUNC(void) PyErr_SetString( + PyObject *exception, + const char *string /* decoded from utf-8 */ + ); +PyAPI_FUNC(PyObject *) PyErr_Occurred(void); +PyAPI_FUNC(void) PyErr_Clear(void); +PyAPI_FUNC(void) PyErr_Fetch(PyObject **, PyObject **, PyObject **); +PyAPI_FUNC(void) PyErr_Restore(PyObject *, PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyErr_GetRaisedException(void); +PyAPI_FUNC(void) PyErr_SetRaisedException(PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030b0000 +PyAPI_FUNC(PyObject*) PyErr_GetHandledException(void); +PyAPI_FUNC(void) PyErr_SetHandledException(PyObject *); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(void) PyErr_GetExcInfo(PyObject **, PyObject **, PyObject **); +PyAPI_FUNC(void) PyErr_SetExcInfo(PyObject *, PyObject *, PyObject *); +#endif + +/* Defined in Python/pylifecycle.c + + The Py_FatalError() function is replaced with a macro which logs + automatically the name of the current function, unless the Py_LIMITED_API + macro is defined. */ +PyAPI_FUNC(void) _Py_NO_RETURN Py_FatalError(const char *message); + +/* Error testing and normalization */ +PyAPI_FUNC(int) PyErr_GivenExceptionMatches(PyObject *, PyObject *); +PyAPI_FUNC(int) PyErr_ExceptionMatches(PyObject *); +PyAPI_FUNC(void) PyErr_NormalizeException(PyObject**, PyObject**, PyObject**); + +/* Traceback manipulation (PEP 3134) */ +PyAPI_FUNC(int) PyException_SetTraceback(PyObject *, PyObject *); +PyAPI_FUNC(PyObject *) PyException_GetTraceback(PyObject *); + +/* Cause manipulation (PEP 3134) */ +PyAPI_FUNC(PyObject *) PyException_GetCause(PyObject *); +PyAPI_FUNC(void) PyException_SetCause(PyObject *, PyObject *); + +/* Context manipulation (PEP 3134) */ +PyAPI_FUNC(PyObject *) PyException_GetContext(PyObject *); +PyAPI_FUNC(void) PyException_SetContext(PyObject *, PyObject *); + + +PyAPI_FUNC(PyObject *) PyException_GetArgs(PyObject *); +PyAPI_FUNC(void) PyException_SetArgs(PyObject *, PyObject *); + +/* */ + +#define PyExceptionClass_Check(x) \ + (PyType_Check((x)) && \ + PyType_FastSubclass((PyTypeObject*)(x), Py_TPFLAGS_BASE_EXC_SUBCLASS)) + +#define PyExceptionInstance_Check(x) \ + PyType_FastSubclass(Py_TYPE(x), Py_TPFLAGS_BASE_EXC_SUBCLASS) + +PyAPI_FUNC(const char *) PyExceptionClass_Name(PyObject *); + +#define PyExceptionInstance_Class(x) _PyObject_CAST(Py_TYPE(x)) + +#define _PyBaseExceptionGroup_Check(x) \ + PyObject_TypeCheck((x), (PyTypeObject *)PyExc_BaseExceptionGroup) + +/* Predefined exceptions */ + +PyAPI_DATA(PyObject *) PyExc_BaseException; +PyAPI_DATA(PyObject *) PyExc_Exception; +PyAPI_DATA(PyObject *) PyExc_BaseExceptionGroup; +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +PyAPI_DATA(PyObject *) PyExc_StopAsyncIteration; +#endif +PyAPI_DATA(PyObject *) PyExc_StopIteration; +PyAPI_DATA(PyObject *) PyExc_GeneratorExit; +PyAPI_DATA(PyObject *) PyExc_ArithmeticError; +PyAPI_DATA(PyObject *) PyExc_LookupError; + +PyAPI_DATA(PyObject *) PyExc_AssertionError; +PyAPI_DATA(PyObject *) PyExc_AttributeError; +PyAPI_DATA(PyObject *) PyExc_BufferError; +PyAPI_DATA(PyObject *) PyExc_EOFError; +PyAPI_DATA(PyObject *) PyExc_FloatingPointError; +PyAPI_DATA(PyObject *) PyExc_OSError; +PyAPI_DATA(PyObject *) PyExc_ImportError; +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03060000 +PyAPI_DATA(PyObject *) PyExc_ModuleNotFoundError; +#endif +PyAPI_DATA(PyObject *) PyExc_IndexError; +PyAPI_DATA(PyObject *) PyExc_KeyError; +PyAPI_DATA(PyObject *) PyExc_KeyboardInterrupt; +PyAPI_DATA(PyObject *) PyExc_MemoryError; +PyAPI_DATA(PyObject *) PyExc_NameError; +PyAPI_DATA(PyObject *) PyExc_OverflowError; +PyAPI_DATA(PyObject *) PyExc_RuntimeError; +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +PyAPI_DATA(PyObject *) PyExc_RecursionError; +#endif +PyAPI_DATA(PyObject *) PyExc_NotImplementedError; +PyAPI_DATA(PyObject *) PyExc_SyntaxError; +PyAPI_DATA(PyObject *) PyExc_IndentationError; +PyAPI_DATA(PyObject *) PyExc_TabError; +PyAPI_DATA(PyObject *) PyExc_ReferenceError; +PyAPI_DATA(PyObject *) PyExc_SystemError; +PyAPI_DATA(PyObject *) PyExc_SystemExit; +PyAPI_DATA(PyObject *) PyExc_TypeError; +PyAPI_DATA(PyObject *) PyExc_UnboundLocalError; +PyAPI_DATA(PyObject *) PyExc_UnicodeError; +PyAPI_DATA(PyObject *) PyExc_UnicodeEncodeError; +PyAPI_DATA(PyObject *) PyExc_UnicodeDecodeError; +PyAPI_DATA(PyObject *) PyExc_UnicodeTranslateError; +PyAPI_DATA(PyObject *) PyExc_ValueError; +PyAPI_DATA(PyObject *) PyExc_ZeroDivisionError; + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_DATA(PyObject *) PyExc_BlockingIOError; +PyAPI_DATA(PyObject *) PyExc_BrokenPipeError; +PyAPI_DATA(PyObject *) PyExc_ChildProcessError; +PyAPI_DATA(PyObject *) PyExc_ConnectionError; +PyAPI_DATA(PyObject *) PyExc_ConnectionAbortedError; +PyAPI_DATA(PyObject *) PyExc_ConnectionRefusedError; +PyAPI_DATA(PyObject *) PyExc_ConnectionResetError; +PyAPI_DATA(PyObject *) PyExc_FileExistsError; +PyAPI_DATA(PyObject *) PyExc_FileNotFoundError; +PyAPI_DATA(PyObject *) PyExc_InterruptedError; +PyAPI_DATA(PyObject *) PyExc_IsADirectoryError; +PyAPI_DATA(PyObject *) PyExc_NotADirectoryError; +PyAPI_DATA(PyObject *) PyExc_PermissionError; +PyAPI_DATA(PyObject *) PyExc_ProcessLookupError; +PyAPI_DATA(PyObject *) PyExc_TimeoutError; +#endif + + +/* Compatibility aliases */ +PyAPI_DATA(PyObject *) PyExc_EnvironmentError; +PyAPI_DATA(PyObject *) PyExc_IOError; +#ifdef MS_WINDOWS +PyAPI_DATA(PyObject *) PyExc_WindowsError; +#endif + +/* Predefined warning categories */ +PyAPI_DATA(PyObject *) PyExc_Warning; +PyAPI_DATA(PyObject *) PyExc_UserWarning; +PyAPI_DATA(PyObject *) PyExc_DeprecationWarning; +PyAPI_DATA(PyObject *) PyExc_PendingDeprecationWarning; +PyAPI_DATA(PyObject *) PyExc_SyntaxWarning; +PyAPI_DATA(PyObject *) PyExc_RuntimeWarning; +PyAPI_DATA(PyObject *) PyExc_FutureWarning; +PyAPI_DATA(PyObject *) PyExc_ImportWarning; +PyAPI_DATA(PyObject *) PyExc_UnicodeWarning; +PyAPI_DATA(PyObject *) PyExc_BytesWarning; +PyAPI_DATA(PyObject *) PyExc_EncodingWarning; +PyAPI_DATA(PyObject *) PyExc_ResourceWarning; + + +/* Convenience functions */ + +PyAPI_FUNC(int) PyErr_BadArgument(void); +PyAPI_FUNC(PyObject *) PyErr_NoMemory(void); +PyAPI_FUNC(PyObject *) PyErr_SetFromErrno(PyObject *); +PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilenameObject( + PyObject *, PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03040000 +PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilenameObjects( + PyObject *, PyObject *, PyObject *); +#endif +PyAPI_FUNC(PyObject *) PyErr_SetFromErrnoWithFilename( + PyObject *exc, + const char *filename /* decoded from the filesystem encoding */ + ); + +PyAPI_FUNC(PyObject *) PyErr_Format( + PyObject *exception, + const char *format, /* ASCII-encoded string */ + ... + ); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +PyAPI_FUNC(PyObject *) PyErr_FormatV( + PyObject *exception, + const char *format, + va_list vargs); +#endif + +#ifdef MS_WINDOWS +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErrWithFilename( + int ierr, + const char *filename /* decoded from the filesystem encoding */ + ); +PyAPI_FUNC(PyObject *) PyErr_SetFromWindowsErr(int); +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilenameObject( + PyObject *,int, PyObject *); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03040000 +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilenameObjects( + PyObject *,int, PyObject *, PyObject *); +#endif +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErrWithFilename( + PyObject *exc, + int ierr, + const char *filename /* decoded from the filesystem encoding */ + ); +PyAPI_FUNC(PyObject *) PyErr_SetExcFromWindowsErr(PyObject *, int); +#endif /* MS_WINDOWS */ + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03060000 +PyAPI_FUNC(PyObject *) PyErr_SetImportErrorSubclass(PyObject *, PyObject *, + PyObject *, PyObject *); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject *) PyErr_SetImportError(PyObject *, PyObject *, + PyObject *); +#endif + +/* Export the old function so that the existing API remains available: */ +PyAPI_FUNC(void) PyErr_BadInternalCall(void); +PyAPI_FUNC(void) _PyErr_BadInternalCall(const char *filename, int lineno); +/* Mask the old API with a call to the new API for code compiled under + Python 2.0: */ +#define PyErr_BadInternalCall() _PyErr_BadInternalCall(__FILE__, __LINE__) + +/* Function to create a new exception */ +PyAPI_FUNC(PyObject *) PyErr_NewException( + const char *name, PyObject *base, PyObject *dict); +PyAPI_FUNC(PyObject *) PyErr_NewExceptionWithDoc( + const char *name, const char *doc, PyObject *base, PyObject *dict); +PyAPI_FUNC(void) PyErr_WriteUnraisable(PyObject *); + + +/* In signalmodule.c */ +PyAPI_FUNC(int) PyErr_CheckSignals(void); +PyAPI_FUNC(void) PyErr_SetInterrupt(void); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 +PyAPI_FUNC(int) PyErr_SetInterruptEx(int signum); +#endif + +/* Support for adding program text to SyntaxErrors */ +PyAPI_FUNC(void) PyErr_SyntaxLocation( + const char *filename, /* decoded from the filesystem encoding */ + int lineno); +PyAPI_FUNC(void) PyErr_SyntaxLocationEx( + const char *filename, /* decoded from the filesystem encoding */ + int lineno, + int col_offset); +PyAPI_FUNC(PyObject *) PyErr_ProgramText( + const char *filename, /* decoded from the filesystem encoding */ + int lineno); + +/* The following functions are used to create and modify unicode + exceptions from C */ + +/* create a UnicodeDecodeError object */ +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_Create( + const char *encoding, /* UTF-8 encoded string */ + const char *object, + Py_ssize_t length, + Py_ssize_t start, + Py_ssize_t end, + const char *reason /* UTF-8 encoded string */ + ); + +/* get the encoding attribute */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetEncoding(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetEncoding(PyObject *); + +/* get the object attribute */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetObject(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetObject(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetObject(PyObject *); + +/* get the value of the start attribute (the int * may not be NULL) + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_GetStart(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeDecodeError_GetStart(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeTranslateError_GetStart(PyObject *, Py_ssize_t *); + +/* assign a new value to the start attribute + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_SetStart(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeDecodeError_SetStart(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeTranslateError_SetStart(PyObject *, Py_ssize_t); + +/* get the value of the end attribute (the int *may not be NULL) + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_GetEnd(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeDecodeError_GetEnd(PyObject *, Py_ssize_t *); +PyAPI_FUNC(int) PyUnicodeTranslateError_GetEnd(PyObject *, Py_ssize_t *); + +/* assign a new value to the end attribute + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_SetEnd(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeDecodeError_SetEnd(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyUnicodeTranslateError_SetEnd(PyObject *, Py_ssize_t); + +/* get the value of the reason attribute */ +PyAPI_FUNC(PyObject *) PyUnicodeEncodeError_GetReason(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeDecodeError_GetReason(PyObject *); +PyAPI_FUNC(PyObject *) PyUnicodeTranslateError_GetReason(PyObject *); + +/* assign a new value to the reason attribute + return 0 on success, -1 on failure */ +PyAPI_FUNC(int) PyUnicodeEncodeError_SetReason( + PyObject *exc, + const char *reason /* UTF-8 encoded string */ + ); +PyAPI_FUNC(int) PyUnicodeDecodeError_SetReason( + PyObject *exc, + const char *reason /* UTF-8 encoded string */ + ); +PyAPI_FUNC(int) PyUnicodeTranslateError_SetReason( + PyObject *exc, + const char *reason /* UTF-8 encoded string */ + ); + +PyAPI_FUNC(int) PyOS_snprintf(char *str, size_t size, const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 3, 4))); +PyAPI_FUNC(int) PyOS_vsnprintf(char *str, size_t size, const char *format, va_list va) + Py_GCC_ATTRIBUTE((format(printf, 3, 0))); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_ERRORS_H +# include "cpython/pyerrors.h" +# undef Py_CPYTHON_ERRORS_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_ERRORS_H */ diff --git a/Include/pyexpat.h b/Include/pyexpat.h new file mode 100644 index 0000000000000000000000000000000000000000..9824d099c3df7d08083ee6f9232dedcbe9d846a0 --- /dev/null +++ b/Include/pyexpat.h @@ -0,0 +1,57 @@ +/* Stuff to export relevant 'expat' entry points from pyexpat to other + * parser modules, such as cElementTree. */ + +/* note: you must import expat.h before importing this module! */ + +#define PyExpat_CAPI_MAGIC "pyexpat.expat_CAPI 1.1" +#define PyExpat_CAPSULE_NAME "pyexpat.expat_CAPI" + +struct PyExpat_CAPI +{ + char* magic; /* set to PyExpat_CAPI_MAGIC */ + int size; /* set to sizeof(struct PyExpat_CAPI) */ + int MAJOR_VERSION; + int MINOR_VERSION; + int MICRO_VERSION; + /* pointers to selected expat functions. add new functions at + the end, if needed */ + const XML_LChar * (*ErrorString)(enum XML_Error code); + enum XML_Error (*GetErrorCode)(XML_Parser parser); + XML_Size (*GetErrorColumnNumber)(XML_Parser parser); + XML_Size (*GetErrorLineNumber)(XML_Parser parser); + enum XML_Status (*Parse)( + XML_Parser parser, const char *s, int len, int isFinal); + XML_Parser (*ParserCreate_MM)( + const XML_Char *encoding, const XML_Memory_Handling_Suite *memsuite, + const XML_Char *namespaceSeparator); + void (*ParserFree)(XML_Parser parser); + void (*SetCharacterDataHandler)( + XML_Parser parser, XML_CharacterDataHandler handler); + void (*SetCommentHandler)( + XML_Parser parser, XML_CommentHandler handler); + void (*SetDefaultHandlerExpand)( + XML_Parser parser, XML_DefaultHandler handler); + void (*SetElementHandler)( + XML_Parser parser, XML_StartElementHandler start, + XML_EndElementHandler end); + void (*SetNamespaceDeclHandler)( + XML_Parser parser, XML_StartNamespaceDeclHandler start, + XML_EndNamespaceDeclHandler end); + void (*SetProcessingInstructionHandler)( + XML_Parser parser, XML_ProcessingInstructionHandler handler); + void (*SetUnknownEncodingHandler)( + XML_Parser parser, XML_UnknownEncodingHandler handler, + void *encodingHandlerData); + void (*SetUserData)(XML_Parser parser, void *userData); + void (*SetStartDoctypeDeclHandler)(XML_Parser parser, + XML_StartDoctypeDeclHandler start); + enum XML_Status (*SetEncoding)(XML_Parser parser, const XML_Char *encoding); + int (*DefaultUnknownEncodingHandler)( + void *encodingHandlerData, const XML_Char *name, XML_Encoding *info); + /* might be NULL for expat < 2.1.0 */ + int (*SetHashSalt)(XML_Parser parser, unsigned long hash_salt); + /* might be NULL for expat < 2.6.0 */ + XML_Bool (*SetReparseDeferralEnabled)(XML_Parser parser, XML_Bool enabled); + /* always add new stuff to the end! */ +}; + diff --git a/Include/pyframe.h b/Include/pyframe.h new file mode 100644 index 0000000000000000000000000000000000000000..13d52312ea966e43322950405d06440e0c988fd9 --- /dev/null +++ b/Include/pyframe.h @@ -0,0 +1,26 @@ +/* Limited C API of PyFrame API + * + * Include "frameobject.h" to get the PyFrameObject structure. + */ + +#ifndef Py_PYFRAME_H +#define Py_PYFRAME_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Return the line of code the frame is currently executing. */ +PyAPI_FUNC(int) PyFrame_GetLineNumber(PyFrameObject *); + +PyAPI_FUNC(PyCodeObject *) PyFrame_GetCode(PyFrameObject *frame); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_PYFRAME_H +# include "cpython/pyframe.h" +# undef Py_CPYTHON_PYFRAME_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYFRAME_H */ diff --git a/Include/pyhash.h b/Include/pyhash.h new file mode 100644 index 0000000000000000000000000000000000000000..3e23e2758808d793ae8ffe716e898a0bceb6f3d3 --- /dev/null +++ b/Include/pyhash.h @@ -0,0 +1,59 @@ +#ifndef Py_HASH_H +#define Py_HASH_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Cutoff for small string DJBX33A optimization in range [1, cutoff). + * + * About 50% of the strings in a typical Python application are smaller than + * 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks. + * NEVER use DJBX33A for long strings! + * + * A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms + * should use a smaller cutoff because it is easier to create colliding + * strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should + * provide a decent safety margin. + */ +#ifndef Py_HASH_CUTOFF +# define Py_HASH_CUTOFF 0 +#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0) +# error Py_HASH_CUTOFF must in range 0...7. +#endif /* Py_HASH_CUTOFF */ + + +/* Hash algorithm selection + * + * The values for Py_HASH_* are hard-coded in the + * configure script. + * + * - FNV and SIPHASH* are available on all platforms and architectures. + * - With EXTERNAL embedders can provide an alternative implementation with:: + * + * PyHash_FuncDef PyHash_Func = {...}; + * + * XXX: Figure out __declspec() for extern PyHash_FuncDef. + */ +#define Py_HASH_EXTERNAL 0 +#define Py_HASH_SIPHASH24 1 +#define Py_HASH_FNV 2 +#define Py_HASH_SIPHASH13 3 + +#ifndef Py_HASH_ALGORITHM +# ifndef HAVE_ALIGNED_REQUIRED +# define Py_HASH_ALGORITHM Py_HASH_SIPHASH13 +# else +# define Py_HASH_ALGORITHM Py_HASH_FNV +# endif /* uint64_t && uint32_t && aligned */ +#endif /* Py_HASH_ALGORITHM */ + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_HASH_H +# include "cpython/pyhash.h" +# undef Py_CPYTHON_HASH_H +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_HASH_H diff --git a/Include/pylifecycle.h b/Include/pylifecycle.h new file mode 100644 index 0000000000000000000000000000000000000000..de1bcb1d2cb632b026220bc02808352915b61c34 --- /dev/null +++ b/Include/pylifecycle.h @@ -0,0 +1,80 @@ + +/* Interfaces to configure, query, create & destroy the Python runtime */ + +#ifndef Py_PYLIFECYCLE_H +#define Py_PYLIFECYCLE_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Initialization and finalization */ +PyAPI_FUNC(void) Py_Initialize(void); +PyAPI_FUNC(void) Py_InitializeEx(int); +PyAPI_FUNC(void) Py_Finalize(void); +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03060000 +PyAPI_FUNC(int) Py_FinalizeEx(void); +#endif +PyAPI_FUNC(int) Py_IsInitialized(void); + +/* Subinterpreter support */ +PyAPI_FUNC(PyThreadState *) Py_NewInterpreter(void); +PyAPI_FUNC(void) Py_EndInterpreter(PyThreadState *); + + +/* Py_PyAtExit is for the atexit module, Py_AtExit is for low-level + * exit functions. + */ +PyAPI_FUNC(int) Py_AtExit(void (*func)(void)); + +PyAPI_FUNC(void) _Py_NO_RETURN Py_Exit(int); + +/* Bootstrap __main__ (defined in Modules/main.c) */ +PyAPI_FUNC(int) Py_Main(int argc, wchar_t **argv); +PyAPI_FUNC(int) Py_BytesMain(int argc, char **argv); + +/* In pathconfig.c */ +Py_DEPRECATED(3.11) PyAPI_FUNC(void) Py_SetProgramName(const wchar_t *); +Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetProgramName(void); + +Py_DEPRECATED(3.11) PyAPI_FUNC(void) Py_SetPythonHome(const wchar_t *); +Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetPythonHome(void); + +Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetProgramFullPath(void); +Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetPrefix(void); +Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetExecPrefix(void); +Py_DEPRECATED(3.13) PyAPI_FUNC(wchar_t *) Py_GetPath(void); +#ifdef MS_WINDOWS +int _Py_CheckPython3(void); +#endif + +/* In their own files */ +PyAPI_FUNC(const char *) Py_GetVersion(void); +PyAPI_FUNC(const char *) Py_GetPlatform(void); +PyAPI_FUNC(const char *) Py_GetCopyright(void); +PyAPI_FUNC(const char *) Py_GetCompiler(void); +PyAPI_FUNC(const char *) Py_GetBuildInfo(void); + +/* Signals */ +typedef void (*PyOS_sighandler_t)(int); +PyAPI_FUNC(PyOS_sighandler_t) PyOS_getsig(int); +PyAPI_FUNC(PyOS_sighandler_t) PyOS_setsig(int, PyOS_sighandler_t); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030B0000 +PyAPI_DATA(const unsigned long) Py_Version; +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000 +PyAPI_FUNC(int) Py_IsFinalizing(void); +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_PYLIFECYCLE_H +# include "cpython/pylifecycle.h" +# undef Py_CPYTHON_PYLIFECYCLE_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYLIFECYCLE_H */ diff --git a/Include/pymacconfig.h b/Include/pymacconfig.h new file mode 100644 index 0000000000000000000000000000000000000000..615abe103ca0388c7ce67f142676d64bb8ef1697 --- /dev/null +++ b/Include/pymacconfig.h @@ -0,0 +1,91 @@ +// This file moves some of the autoconf magic to compile-time when building on +// macOS. This is needed for building 4-way universal binaries and for 64-bit +// universal binaries because the values redefined below aren't configure-time +// constant but only compile-time constant in these scenarios. + +#ifndef PY_MACCONFIG_H +#define PY_MACCONFIG_H +#ifdef __APPLE__ + +#undef ALIGNOF_MAX_ALIGN_T +#undef SIZEOF_LONG +#undef SIZEOF_LONG_DOUBLE +#undef SIZEOF_PTHREAD_T +#undef SIZEOF_SIZE_T +#undef SIZEOF_TIME_T +#undef SIZEOF_VOID_P +#undef SIZEOF__BOOL +#undef SIZEOF_UINTPTR_T +#undef SIZEOF_PTHREAD_T +#undef WORDS_BIGENDIAN +#undef DOUBLE_IS_ARM_MIXED_ENDIAN_IEEE754 +#undef DOUBLE_IS_BIG_ENDIAN_IEEE754 +#undef DOUBLE_IS_LITTLE_ENDIAN_IEEE754 +#undef HAVE_GCC_ASM_FOR_X87 +#undef HAVE_GCC_ASM_FOR_X64 + +#undef VA_LIST_IS_ARRAY +#if defined(__LP64__) && defined(__x86_64__) +# define VA_LIST_IS_ARRAY 1 +#endif + +#undef HAVE_LARGEFILE_SUPPORT +#ifndef __LP64__ +# define HAVE_LARGEFILE_SUPPORT 1 +#endif + +#undef SIZEOF_LONG +#ifdef __LP64__ +# define SIZEOF__BOOL 1 +# define SIZEOF__BOOL 1 +# define SIZEOF_LONG 8 +# define SIZEOF_PTHREAD_T 8 +# define SIZEOF_SIZE_T 8 +# define SIZEOF_TIME_T 8 +# define SIZEOF_VOID_P 8 +# define SIZEOF_UINTPTR_T 8 +# define SIZEOF_PTHREAD_T 8 +#else +# ifdef __ppc__ +# define SIZEOF__BOOL 4 +# else +# define SIZEOF__BOOL 1 +# endif +# define SIZEOF_LONG 4 +# define SIZEOF_PTHREAD_T 4 +# define SIZEOF_SIZE_T 4 +# define SIZEOF_TIME_T 4 +# define SIZEOF_VOID_P 4 +# define SIZEOF_UINTPTR_T 4 +# define SIZEOF_PTHREAD_T 4 +#endif + +// macOS 10.4 (the first release to support 64-bit code +// at all) only supports 64-bit in the UNIX layer. +// Therefore suppress the toolbox-glue in 64-bit mode. +// +// In 64-bit mode setpgrp always has no arguments, in 32-bit +// mode that depends on the compilation environment +#if defined(__LP64__) +# undef SETPGRP_HAVE_ARG +#endif + +#ifdef __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# define DOUBLE_IS_BIG_ENDIAN_IEEE754 +#else +# define DOUBLE_IS_LITTLE_ENDIAN_IEEE754 +#endif + +#if defined(__i386__) || defined(__x86_64__) +# define HAVE_GCC_ASM_FOR_X87 +# define ALIGNOF_MAX_ALIGN_T 16 +# define HAVE_GCC_ASM_FOR_X64 1 +# define SIZEOF_LONG_DOUBLE 16 +#else +# define ALIGNOF_MAX_ALIGN_T 8 +# define SIZEOF_LONG_DOUBLE 8 +#endif + +#endif // __APPLE__ +#endif // !PY_MACCONFIG_H diff --git a/Include/pymacro.h b/Include/pymacro.h new file mode 100644 index 0000000000000000000000000000000000000000..e0378f9d27a048c26b87fe4b3ae3869aaae5cc9b --- /dev/null +++ b/Include/pymacro.h @@ -0,0 +1,193 @@ +#ifndef Py_PYMACRO_H +#define Py_PYMACRO_H + +// gh-91782: On FreeBSD 12, if the _POSIX_C_SOURCE and _XOPEN_SOURCE macros are +// defined, disables C11 support and does not define +// the static_assert() macro. +// https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=255290 +// +// macOS <= 10.10 doesn't define static_assert in assert.h at all despite +// having C11 compiler support. +// +// static_assert is defined in glibc from version 2.16. Compiler support for +// the C11 _Static_assert keyword is in gcc >= 4.6. +// +// MSVC makes static_assert a keyword in C11-17, contrary to the standards. +// +// In C++11 and C2x, static_assert is a keyword, redefining is undefined +// behaviour. So only define if building as C, not C++ (if __cplusplus is +// not defined), and only for C11-17. +#if !defined(static_assert) && (defined(__GNUC__) || defined(__clang__)) \ + && !defined(__cplusplus) && defined(__STDC_VERSION__) \ + && __STDC_VERSION__ >= 201112L && __STDC_VERSION__ <= 201710L +# define static_assert _Static_assert +#endif + +/* Minimum value between x and y */ +#define Py_MIN(x, y) (((x) > (y)) ? (y) : (x)) + +/* Maximum value between x and y */ +#define Py_MAX(x, y) (((x) > (y)) ? (x) : (y)) + +/* Absolute value of the number x */ +#define Py_ABS(x) ((x) < 0 ? -(x) : (x)) + +#define _Py_XSTRINGIFY(x) #x + +/* Convert the argument to a string. For example, Py_STRINGIFY(123) is replaced + with "123" by the preprocessor. Defines are also replaced by their value. + For example Py_STRINGIFY(__LINE__) is replaced by the line number, not + by "__LINE__". */ +#define Py_STRINGIFY(x) _Py_XSTRINGIFY(x) + +/* Get the size of a structure member in bytes */ +#define Py_MEMBER_SIZE(type, member) sizeof(((type *)0)->member) + +/* Argument must be a char or an int in [-128, 127] or [0, 255]. */ +#define Py_CHARMASK(c) ((unsigned char)((c) & 0xff)) + +#if (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L \ + && !defined(__cplusplus) && !defined(_MSC_VER)) +# define Py_BUILD_ASSERT_EXPR(cond) \ + ((void)sizeof(struct { int dummy; _Static_assert(cond, #cond); }), \ + 0) +#else + /* Assert a build-time dependency, as an expression. + * + * Your compile will fail if the condition isn't true, or can't be evaluated + * by the compiler. This can be used in an expression: its value is 0. + * + * Example: + * + * #define foo_to_char(foo) \ + * ((char *)(foo) \ + * + Py_BUILD_ASSERT_EXPR(offsetof(struct foo, string) == 0)) + * + * Written by Rusty Russell, public domain, http://ccodearchive.net/ + */ +# define Py_BUILD_ASSERT_EXPR(cond) \ + (sizeof(char [1 - 2*!(cond)]) - 1) +#endif + +#if ((defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L) \ + || (defined(__cplusplus) && __cplusplus >= 201103L)) + // Use static_assert() on C11 and newer +# define Py_BUILD_ASSERT(cond) \ + do { \ + static_assert((cond), #cond); \ + } while (0) +#else +# define Py_BUILD_ASSERT(cond) \ + do { \ + (void)Py_BUILD_ASSERT_EXPR(cond); \ + } while(0) +#endif + +/* Get the number of elements in a visible array + + This does not work on pointers, or arrays declared as [], or function + parameters. With correct compiler support, such usage will cause a build + error (see Py_BUILD_ASSERT_EXPR). + + Written by Rusty Russell, public domain, http://ccodearchive.net/ + + Requires at GCC 3.1+ */ +#if (defined(__GNUC__) && !defined(__STRICT_ANSI__) && \ + (((__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) || (__GNUC__ >= 4))) +/* Two gcc extensions. + &a[0] degrades to a pointer: a different type from an array */ +#define Py_ARRAY_LENGTH(array) \ + (sizeof(array) / sizeof((array)[0]) \ + + Py_BUILD_ASSERT_EXPR(!__builtin_types_compatible_p(typeof(array), \ + typeof(&(array)[0])))) +#else +#define Py_ARRAY_LENGTH(array) \ + (sizeof(array) / sizeof((array)[0])) +#endif + + +/* Define macros for inline documentation. */ +#define PyDoc_VAR(name) static const char name[] +#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) +#ifdef WITH_DOC_STRINGS +#define PyDoc_STR(str) str +#else +#define PyDoc_STR(str) "" +#endif + +/* Below "a" is a power of 2. */ +/* Round down size "n" to be a multiple of "a". */ +#define _Py_SIZE_ROUND_DOWN(n, a) ((size_t)(n) & ~(size_t)((a) - 1)) +/* Round up size "n" to be a multiple of "a". */ +#define _Py_SIZE_ROUND_UP(n, a) (((size_t)(n) + \ + (size_t)((a) - 1)) & ~(size_t)((a) - 1)) +/* Round pointer "p" down to the closest "a"-aligned address <= "p". */ +#define _Py_ALIGN_DOWN(p, a) ((void *)((uintptr_t)(p) & ~(uintptr_t)((a) - 1))) +/* Round pointer "p" up to the closest "a"-aligned address >= "p". */ +#define _Py_ALIGN_UP(p, a) ((void *)(((uintptr_t)(p) + \ + (uintptr_t)((a) - 1)) & ~(uintptr_t)((a) - 1))) +/* Check if pointer "p" is aligned to "a"-bytes boundary. */ +#define _Py_IS_ALIGNED(p, a) (!((uintptr_t)(p) & (uintptr_t)((a) - 1))) + +/* Use this for unused arguments in a function definition to silence compiler + * warnings. Example: + * + * int func(int a, int Py_UNUSED(b)) { return a; } + */ +#if defined(__GNUC__) || defined(__clang__) +# define Py_UNUSED(name) _unused_ ## name __attribute__((unused)) +#elif defined(_MSC_VER) + // Disable warning C4100: unreferenced formal parameter, + // declare the parameter, + // restore old compiler warnings. +# define Py_UNUSED(name) \ + __pragma(warning(push)) \ + __pragma(warning(suppress: 4100)) \ + _unused_ ## name \ + __pragma(warning(pop)) +#else +# define Py_UNUSED(name) _unused_ ## name +#endif + +#if defined(RANDALL_WAS_HERE) +# define Py_UNREACHABLE() \ + Py_FatalError( \ + "If you're seeing this, the code is in what I thought was\n" \ + "an unreachable state.\n\n" \ + "I could give you advice for what to do, but honestly, why\n" \ + "should you trust me? I clearly screwed this up. I'm writing\n" \ + "a message that should never appear, yet I know it will\n" \ + "probably appear someday.\n\n" \ + "On a deep level, I know I'm not up to this task.\n" \ + "I'm so sorry.\n" \ + "https://xkcd.com/2200") +#elif defined(Py_DEBUG) +# define Py_UNREACHABLE() \ + Py_FatalError( \ + "We've reached an unreachable state. Anything is possible.\n" \ + "The limits were in our heads all along. Follow your dreams.\n" \ + "https://xkcd.com/2200") +#elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 5)) +# define Py_UNREACHABLE() __builtin_unreachable() +#elif defined(__clang__) || defined(__INTEL_COMPILER) +# define Py_UNREACHABLE() __builtin_unreachable() +#elif defined(_MSC_VER) +# define Py_UNREACHABLE() __assume(0) +#else +# define Py_UNREACHABLE() \ + Py_FatalError("Unreachable C code path reached") +#endif + +#define _Py_CONTAINER_OF(ptr, type, member) \ + (type*)((char*)ptr - offsetof(type, member)) + +// Prevent using an expression as a l-value. +// For example, "int x; _Py_RVALUE(x) = 1;" fails with a compiler error. +#define _Py_RVALUE(EXPR) ((void)0, (EXPR)) + +// Return non-zero if the type is signed, return zero if it's unsigned. +// Use "<= 0" rather than "< 0" to prevent the compiler warning: +// "comparison of unsigned expression in '< 0' is always false". +#define _Py_IS_TYPE_SIGNED(type) ((type)(-1) <= 0) + +#endif /* Py_PYMACRO_H */ diff --git a/Include/pymath.h b/Include/pymath.h new file mode 100644 index 0000000000000000000000000000000000000000..4c1e3d9984894b0cb27dfb387e3e3b8781e14091 --- /dev/null +++ b/Include/pymath.h @@ -0,0 +1,62 @@ +// Symbols and macros to supply platform-independent interfaces to mathematical +// functions and constants. + +#ifndef Py_PYMATH_H +#define Py_PYMATH_H + +/* High precision definition of pi and e (Euler) + * The values are taken from libc6's math.h. + */ +#ifndef Py_MATH_PIl +#define Py_MATH_PIl 3.1415926535897932384626433832795029L +#endif +#ifndef Py_MATH_PI +#define Py_MATH_PI 3.14159265358979323846 +#endif + +#ifndef Py_MATH_El +#define Py_MATH_El 2.7182818284590452353602874713526625L +#endif + +#ifndef Py_MATH_E +#define Py_MATH_E 2.7182818284590452354 +#endif + +/* Tau (2pi) to 40 digits, taken from tauday.com/tau-digits. */ +#ifndef Py_MATH_TAU +#define Py_MATH_TAU 6.2831853071795864769252867665590057683943L +#endif + +// Py_IS_NAN(X) +// Return 1 if float or double arg is a NaN, else 0. +#define Py_IS_NAN(X) isnan(X) + +// Py_IS_INFINITY(X) +// Return 1 if float or double arg is an infinity, else 0. +#define Py_IS_INFINITY(X) isinf(X) + +// Py_IS_FINITE(X) +// Return 1 if float or double arg is neither infinite nor NAN, else 0. +#define Py_IS_FINITE(X) isfinite(X) + +// Py_INFINITY: Value that evaluates to a positive double infinity. +#ifndef Py_INFINITY +# define Py_INFINITY ((double)INFINITY) +#endif + +/* Py_HUGE_VAL should always be the same as Py_INFINITY. But historically + * this was not reliable and Python did not require IEEE floats and C99 + * conformity. Prefer Py_INFINITY for new code. + */ +#ifndef Py_HUGE_VAL +# define Py_HUGE_VAL HUGE_VAL +#endif + +/* Py_NAN: Value that evaluates to a quiet Not-a-Number (NaN). The sign is + * undefined and normally not relevant, but e.g. fixed for float("nan"). + */ +#if !defined(Py_NAN) +# define Py_NAN ((double)NAN) +#endif + +#endif /* Py_PYMATH_H */ diff --git a/Include/pymem.h b/Include/pymem.h new file mode 100644 index 0000000000000000000000000000000000000000..a80da99e1dd7fc5710e22f780e75111e0c12f21b --- /dev/null +++ b/Include/pymem.h @@ -0,0 +1,110 @@ +// The PyMem_ family: low-level memory allocation interfaces. +// See objimpl.h for the PyObject_ memory family. + +#ifndef Py_PYMEM_H +#define Py_PYMEM_H +#ifdef __cplusplus +extern "C" { +#endif + +/* BEWARE: + + Each interface exports both functions and macros. Extension modules should + use the functions, to ensure binary compatibility across Python versions. + Because the Python implementation is free to change internal details, and + the macros may (or may not) expose details for speed, if you do use the + macros you must recompile your extensions with each Python release. + + Never mix calls to PyMem_ with calls to the platform malloc/realloc/ + calloc/free. For example, on Windows different DLLs may end up using + different heaps, and if you use PyMem_Malloc you'll get the memory from the + heap used by the Python DLL; it could be a disaster if you free()'ed that + directly in your own extension. Using PyMem_Free instead ensures Python + can return the memory to the proper heap. As another example, in + a debug build (Py_DEBUG macro), Python wraps all calls to all PyMem_ and + PyObject_ memory functions in special debugging wrappers that add additional + debugging info to dynamic memory blocks. The system routines have no idea + what to do with that stuff, and the Python wrappers have no idea what to do + with raw blocks obtained directly by the system routines then. + + The GIL must be held when using these APIs. +*/ + +/* + * Raw memory interface + * ==================== + */ + +/* Functions + + Functions supplying platform-independent semantics for malloc/realloc/ + free. These functions make sure that allocating 0 bytes returns a distinct + non-NULL pointer (whenever possible -- if we're flat out of memory, NULL + may be returned), even if the platform malloc and realloc don't. + Returned pointers must be checked for NULL explicitly. No action is + performed on failure (no exception is set, no warning is printed, etc). +*/ + +PyAPI_FUNC(void *) PyMem_Malloc(size_t size); +PyAPI_FUNC(void *) PyMem_Calloc(size_t nelem, size_t elsize); +PyAPI_FUNC(void *) PyMem_Realloc(void *ptr, size_t new_size); +PyAPI_FUNC(void) PyMem_Free(void *ptr); + +/* + * Type-oriented memory interface + * ============================== + * + * Allocate memory for n objects of the given type. Returns a new pointer + * or NULL if the request was too large or memory allocation failed. Use + * these macros rather than doing the multiplication yourself so that proper + * overflow checking is always done. + */ + +#define PyMem_New(type, n) \ + ( ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + ( (type *) PyMem_Malloc((n) * sizeof(type)) ) ) + +/* + * The value of (p) is always clobbered by this macro regardless of success. + * The caller MUST check if (p) is NULL afterwards and deal with the memory + * error if so. This means the original value of (p) MUST be saved for the + * caller's memory error handler to not lose track of it. + */ +#define PyMem_Resize(p, type, n) \ + ( (p) = ((size_t)(n) > PY_SSIZE_T_MAX / sizeof(type)) ? NULL : \ + (type *) PyMem_Realloc((p), (n) * sizeof(type)) ) + + +// Deprecated aliases only kept for backward compatibility. +// PyMem_Del and PyMem_DEL are defined with no parameter to be able to use +// them as function pointers (ex: dealloc = PyMem_Del). +#define PyMem_MALLOC(n) PyMem_Malloc((n)) +#define PyMem_NEW(type, n) PyMem_New(type, (n)) +#define PyMem_REALLOC(p, n) PyMem_Realloc((p), (n)) +#define PyMem_RESIZE(p, type, n) PyMem_Resize((p), type, (n)) +#define PyMem_FREE(p) PyMem_Free((p)) +#define PyMem_Del(p) PyMem_Free((p)) +#define PyMem_DEL(p) PyMem_Free((p)) + + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +// Memory allocator which doesn't require the GIL to be held. +// Usually, it's just a thin wrapper to functions of the standard C library: +// malloc(), calloc(), realloc() and free(). The difference is that +// tracemalloc can track these memory allocations. +PyAPI_FUNC(void *) PyMem_RawMalloc(size_t size); +PyAPI_FUNC(void *) PyMem_RawCalloc(size_t nelem, size_t elsize); +PyAPI_FUNC(void *) PyMem_RawRealloc(void *ptr, size_t new_size); +PyAPI_FUNC(void) PyMem_RawFree(void *ptr); +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_PYMEM_H +# include "cpython/pymem.h" +# undef Py_CPYTHON_PYMEM_H +#endif + +#ifdef __cplusplus +} +#endif +#endif // !Py_PYMEM_H diff --git a/Include/pyport.h b/Include/pyport.h new file mode 100644 index 0000000000000000000000000000000000000000..72a157e679d92ff5c70db7b98fe0456cf59ec4ce --- /dev/null +++ b/Include/pyport.h @@ -0,0 +1,633 @@ +#ifndef Py_PYPORT_H +#define Py_PYPORT_H + +#ifndef UCHAR_MAX +# error " header must define UCHAR_MAX" +#endif +#if UCHAR_MAX != 255 +# error "Python's source code assumes C's unsigned char is an 8-bit type" +#endif + + +// Macro to use C++ static_cast<> in the Python C API. +#ifdef __cplusplus +# define _Py_STATIC_CAST(type, expr) static_cast(expr) +#else +# define _Py_STATIC_CAST(type, expr) ((type)(expr)) +#endif +// Macro to use the more powerful/dangerous C-style cast even in C++. +#define _Py_CAST(type, expr) ((type)(expr)) + +// Static inline functions should use _Py_NULL rather than using directly NULL +// to prevent C++ compiler warnings. On C23 and newer and on C++11 and newer, +// _Py_NULL is defined as nullptr. +#if (defined (__STDC_VERSION__) && __STDC_VERSION__ > 201710L) \ + || (defined(__cplusplus) && __cplusplus >= 201103) +# define _Py_NULL nullptr +#else +# define _Py_NULL NULL +#endif + + +/* Defines to build Python and its standard library: + * + * - Py_BUILD_CORE: Build Python core. Give access to Python internals, but + * should not be used by third-party modules. + * - Py_BUILD_CORE_BUILTIN: Build a Python stdlib module as a built-in module. + * - Py_BUILD_CORE_MODULE: Build a Python stdlib module as a dynamic library. + * + * Py_BUILD_CORE_BUILTIN and Py_BUILD_CORE_MODULE imply Py_BUILD_CORE. + * + * On Windows, Py_BUILD_CORE_MODULE exports "PyInit_xxx" symbol, whereas + * Py_BUILD_CORE_BUILTIN does not. + */ +#if defined(Py_BUILD_CORE_BUILTIN) && !defined(Py_BUILD_CORE) +# define Py_BUILD_CORE +#endif +#if defined(Py_BUILD_CORE_MODULE) && !defined(Py_BUILD_CORE) +# define Py_BUILD_CORE +#endif + + +/************************************************************************** +Symbols and macros to supply platform-independent interfaces to basic +C language & library operations whose spellings vary across platforms. + +Please try to make documentation here as clear as possible: by definition, +the stuff here is trying to illuminate C's darkest corners. + +Config #defines referenced here: + +SIGNED_RIGHT_SHIFT_ZERO_FILLS +Meaning: To be defined iff i>>j does not extend the sign bit when i is a + signed integral type and i < 0. +Used in: Py_ARITHMETIC_RIGHT_SHIFT + +Py_DEBUG +Meaning: Extra checks compiled in for debug mode. +Used in: Py_SAFE_DOWNCAST + +**************************************************************************/ + +/* typedefs for some C9X-defined synonyms for integral types. + * + * The names in Python are exactly the same as the C9X names, except with a + * Py_ prefix. Until C9X is universally implemented, this is the only way + * to ensure that Python gets reliable names that don't conflict with names + * in non-Python code that are playing their own tricks to define the C9X + * names. + * + * NOTE: don't go nuts here! Python has no use for *most* of the C9X + * integral synonyms. Only define the ones we actually need. + */ + +/* long long is required. Ensure HAVE_LONG_LONG is defined for compatibility. */ +#ifndef HAVE_LONG_LONG +#define HAVE_LONG_LONG 1 +#endif +#ifndef PY_LONG_LONG +#define PY_LONG_LONG long long +/* If LLONG_MAX is defined in limits.h, use that. */ +#define PY_LLONG_MIN LLONG_MIN +#define PY_LLONG_MAX LLONG_MAX +#define PY_ULLONG_MAX ULLONG_MAX +#endif + +#define PY_UINT32_T uint32_t +#define PY_UINT64_T uint64_t + +/* Signed variants of the above */ +#define PY_INT32_T int32_t +#define PY_INT64_T int64_t + +/* PYLONG_BITS_IN_DIGIT describes the number of bits per "digit" (limb) in the + * PyLongObject implementation (longintrepr.h). It's currently either 30 or 15, + * defaulting to 30. The 15-bit digit option may be removed in the future. + */ +#ifndef PYLONG_BITS_IN_DIGIT +#define PYLONG_BITS_IN_DIGIT 30 +#endif + +/* uintptr_t is the C9X name for an unsigned integral type such that a + * legitimate void* can be cast to uintptr_t and then back to void* again + * without loss of information. Similarly for intptr_t, wrt a signed + * integral type. + */ +typedef uintptr_t Py_uintptr_t; +typedef intptr_t Py_intptr_t; + +/* Py_ssize_t is a signed integral type such that sizeof(Py_ssize_t) == + * sizeof(size_t). C99 doesn't define such a thing directly (size_t is an + * unsigned integral type). See PEP 353 for details. + * PY_SSIZE_T_MAX is the largest positive value of type Py_ssize_t. + */ +#ifdef HAVE_PY_SSIZE_T + +#elif HAVE_SSIZE_T +typedef ssize_t Py_ssize_t; +# define PY_SSIZE_T_MAX SSIZE_MAX +#elif SIZEOF_VOID_P == SIZEOF_SIZE_T +typedef Py_intptr_t Py_ssize_t; +# define PY_SSIZE_T_MAX INTPTR_MAX +#else +# error "Python needs a typedef for Py_ssize_t in pyport.h." +#endif + +/* Smallest negative value of type Py_ssize_t. */ +#define PY_SSIZE_T_MIN (-PY_SSIZE_T_MAX-1) + +/* Py_hash_t is the same size as a pointer. */ +#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T +typedef Py_ssize_t Py_hash_t; +/* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */ +#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T +typedef size_t Py_uhash_t; + +/* Now PY_SSIZE_T_CLEAN is mandatory. This is just for backward compatibility. */ +typedef Py_ssize_t Py_ssize_clean_t; + +/* Largest possible value of size_t. */ +#define PY_SIZE_MAX SIZE_MAX + +/* Macro kept for backward compatibility: use directly "z" in new code. + * + * PY_FORMAT_SIZE_T is a modifier for use in a printf format to convert an + * argument with the width of a size_t or Py_ssize_t: "z" (C99). + */ +#ifndef PY_FORMAT_SIZE_T +# define PY_FORMAT_SIZE_T "z" +#endif + +/* Py_LOCAL can be used instead of static to get the fastest possible calling + * convention for functions that are local to a given module. + * + * Py_LOCAL_INLINE does the same thing, and also explicitly requests inlining, + * for platforms that support that. + * + * NOTE: You can only use this for functions that are entirely local to a + * module; functions that are exported via method tables, callbacks, etc, + * should keep using static. + */ + +#if defined(_MSC_VER) + /* ignore warnings if the compiler decides not to inline a function */ +# pragma warning(disable: 4710) + /* fastest possible local call under MSVC */ +# define Py_LOCAL(type) static type __fastcall +# define Py_LOCAL_INLINE(type) static __inline type __fastcall +#else +# define Py_LOCAL(type) static type +# define Py_LOCAL_INLINE(type) static inline type +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 < 0x030b0000 +# define Py_MEMCPY memcpy +#endif + +#ifdef __cplusplus +/* Move this down here since some C++ #include's don't like to be included + inside an extern "C" */ +extern "C" { +#endif + + +/* Py_ARITHMETIC_RIGHT_SHIFT + * C doesn't define whether a right-shift of a signed integer sign-extends + * or zero-fills. Here a macro to force sign extension: + * Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) + * Return I >> J, forcing sign extension. Arithmetically, return the + * floor of I/2**J. + * Requirements: + * I should have signed integer type. In the terminology of C99, this can + * be either one of the five standard signed integer types (signed char, + * short, int, long, long long) or an extended signed integer type. + * J is an integer >= 0 and strictly less than the number of bits in the + * type of I (because C doesn't define what happens for J outside that + * range either). + * TYPE used to specify the type of I, but is now ignored. It's been left + * in for backwards compatibility with versions <= 2.6 or 3.0. + * Caution: + * I may be evaluated more than once. + */ +#ifdef SIGNED_RIGHT_SHIFT_ZERO_FILLS +#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) \ + ((I) < 0 ? -1-((-1-(I)) >> (J)) : (I) >> (J)) +#else +#define Py_ARITHMETIC_RIGHT_SHIFT(TYPE, I, J) ((I) >> (J)) +#endif + +/* Py_FORCE_EXPANSION(X) + * "Simply" returns its argument. However, macro expansions within the + * argument are evaluated. This unfortunate trickery is needed to get + * token-pasting to work as desired in some cases. + */ +#define Py_FORCE_EXPANSION(X) X + +/* Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) + * Cast VALUE to type NARROW from type WIDE. In Py_DEBUG mode, this + * assert-fails if any information is lost. + * Caution: + * VALUE may be evaluated more than once. + */ +#ifdef Py_DEBUG +# define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) \ + (assert(_Py_STATIC_CAST(WIDE, _Py_STATIC_CAST(NARROW, (VALUE))) == (VALUE)), \ + _Py_STATIC_CAST(NARROW, (VALUE))) +#else +# define Py_SAFE_DOWNCAST(VALUE, WIDE, NARROW) _Py_STATIC_CAST(NARROW, (VALUE)) +#endif + + +/* Py_DEPRECATED(version) + * Declare a variable, type, or function deprecated. + * The macro must be placed before the declaration. + * Usage: + * Py_DEPRECATED(3.3) extern int old_var; + * Py_DEPRECATED(3.4) typedef int T1; + * Py_DEPRECATED(3.8) PyAPI_FUNC(int) Py_OldFunction(void); + */ +#if defined(__GNUC__) \ + && ((__GNUC__ >= 4) || (__GNUC__ == 3) && (__GNUC_MINOR__ >= 1)) +#define Py_DEPRECATED(VERSION_UNUSED) __attribute__((__deprecated__)) +#elif defined(_MSC_VER) +#define Py_DEPRECATED(VERSION) __declspec(deprecated( \ + "deprecated in " #VERSION)) +#else +#define Py_DEPRECATED(VERSION_UNUSED) +#endif + +// _Py_DEPRECATED_EXTERNALLY(version) +// Deprecated outside CPython core. +#ifdef Py_BUILD_CORE +#define _Py_DEPRECATED_EXTERNALLY(VERSION_UNUSED) +#else +#define _Py_DEPRECATED_EXTERNALLY(version) Py_DEPRECATED(version) +#endif + + +#if defined(__clang__) +#define _Py_COMP_DIAG_PUSH _Pragma("clang diagnostic push") +#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS \ + _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") +#define _Py_COMP_DIAG_POP _Pragma("clang diagnostic pop") +#elif defined(__GNUC__) \ + && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 6)) +#define _Py_COMP_DIAG_PUSH _Pragma("GCC diagnostic push") +#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS \ + _Pragma("GCC diagnostic ignored \"-Wdeprecated-declarations\"") +#define _Py_COMP_DIAG_POP _Pragma("GCC diagnostic pop") +#elif defined(_MSC_VER) +#define _Py_COMP_DIAG_PUSH __pragma(warning(push)) +#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS __pragma(warning(disable: 4996)) +#define _Py_COMP_DIAG_POP __pragma(warning(pop)) +#else +#define _Py_COMP_DIAG_PUSH +#define _Py_COMP_DIAG_IGNORE_DEPR_DECLS +#define _Py_COMP_DIAG_POP +#endif + +/* _Py_HOT_FUNCTION + * The hot attribute on a function is used to inform the compiler that the + * function is a hot spot of the compiled program. The function is optimized + * more aggressively and on many target it is placed into special subsection of + * the text section so all hot functions appears close together improving + * locality. + * + * Usage: + * int _Py_HOT_FUNCTION x(void) { return 3; } + * + * Issue #28618: This attribute must not be abused, otherwise it can have a + * negative effect on performance. Only the functions were Python spend most of + * its time must use it. Use a profiler when running performance benchmark + * suite to find these functions. + */ +#if defined(__GNUC__) \ + && ((__GNUC__ >= 5) || (__GNUC__ == 4) && (__GNUC_MINOR__ >= 3)) +#define _Py_HOT_FUNCTION __attribute__((hot)) +#else +#define _Py_HOT_FUNCTION +#endif + +// Ask the compiler to always inline a static inline function. The compiler can +// ignore it and decides to not inline the function. +// +// It can be used to inline performance critical static inline functions when +// building Python in debug mode with function inlining disabled. For example, +// MSC disables function inlining when building in debug mode. +// +// Marking blindly a static inline function with Py_ALWAYS_INLINE can result in +// worse performances (due to increased code size for example). The compiler is +// usually smarter than the developer for the cost/benefit analysis. +// +// If Python is built in debug mode (if the Py_DEBUG macro is defined), the +// Py_ALWAYS_INLINE macro does nothing. +// +// It must be specified before the function return type. Usage: +// +// static inline Py_ALWAYS_INLINE int random(void) { return 4; } +#if defined(Py_DEBUG) + // If Python is built in debug mode, usually compiler optimizations are + // disabled. In this case, Py_ALWAYS_INLINE can increase a lot the stack + // memory usage. For example, forcing inlining using gcc -O0 increases the + // stack usage from 6 KB to 15 KB per Python function call. +# define Py_ALWAYS_INLINE +#elif defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +# define Py_ALWAYS_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) +# define Py_ALWAYS_INLINE __forceinline +#else +# define Py_ALWAYS_INLINE +#endif + +// Py_NO_INLINE +// Disable inlining on a function. For example, it reduces the C stack +// consumption: useful on LTO+PGO builds which heavily inline code (see +// bpo-33720). +// +// Usage: +// +// Py_NO_INLINE static int random(void) { return 4; } +#if defined(__GNUC__) || defined(__clang__) || defined(__INTEL_COMPILER) +# define Py_NO_INLINE __attribute__ ((noinline)) +#elif defined(_MSC_VER) +# define Py_NO_INLINE __declspec(noinline) +#else +# define Py_NO_INLINE +#endif + +#include "exports.h" + +#ifdef Py_LIMITED_API + // The internal C API must not be used with the limited C API: make sure + // that Py_BUILD_CORE macro is not defined in this case. These 3 macros are + // used by exports.h, so only undefine them afterwards. +# undef Py_BUILD_CORE +# undef Py_BUILD_CORE_BUILTIN +# undef Py_BUILD_CORE_MODULE +#endif + +/* limits.h constants that may be missing */ + +#ifndef INT_MAX +#define INT_MAX 2147483647 +#endif + +#ifndef LONG_MAX +#if SIZEOF_LONG == 4 +#define LONG_MAX 0X7FFFFFFFL +#elif SIZEOF_LONG == 8 +#define LONG_MAX 0X7FFFFFFFFFFFFFFFL +#else +#error "could not set LONG_MAX in pyport.h" +#endif +#endif + +#ifndef LONG_MIN +#define LONG_MIN (-LONG_MAX-1) +#endif + +#ifndef LONG_BIT +#define LONG_BIT (8 * SIZEOF_LONG) +#endif + +#if LONG_BIT != 8 * SIZEOF_LONG +/* 04-Oct-2000 LONG_BIT is apparently (mis)defined as 64 on some recent + * 32-bit platforms using gcc. We try to catch that here at compile-time + * rather than waiting for integer multiplication to trigger bogus + * overflows. + */ +#error "LONG_BIT definition appears wrong for platform (bad gcc/glibc config?)." +#endif + +#ifdef __cplusplus +} +#endif + +/* + * Hide GCC attributes from compilers that don't support them. + */ +#if (!defined(__GNUC__) || __GNUC__ < 2 || \ + (__GNUC__ == 2 && __GNUC_MINOR__ < 7) ) +#define Py_GCC_ATTRIBUTE(x) +#else +#define Py_GCC_ATTRIBUTE(x) __attribute__(x) +#endif + +/* + * Specify alignment on compilers that support it. + */ +#if defined(__GNUC__) && __GNUC__ >= 3 +#define Py_ALIGNED(x) __attribute__((aligned(x))) +#else +#define Py_ALIGNED(x) +#endif + +/* Eliminate end-of-loop code not reached warnings from SunPro C + * when using do{...}while(0) macros + */ +#ifdef __SUNPRO_C +#pragma error_messages (off,E_END_OF_LOOP_CODE_NOT_REACHED) +#endif + +#ifndef Py_LL +#define Py_LL(x) x##LL +#endif + +#ifndef Py_ULL +#define Py_ULL(x) Py_LL(x##U) +#endif + +#define Py_VA_COPY va_copy + +/* + * Convenient macros to deal with endianness of the platform. WORDS_BIGENDIAN is + * detected by configure and defined in pyconfig.h. The code in pyconfig.h + * also takes care of Apple's universal builds. + */ + +#ifdef WORDS_BIGENDIAN +# define PY_BIG_ENDIAN 1 +# define PY_LITTLE_ENDIAN 0 +#else +# define PY_BIG_ENDIAN 0 +# define PY_LITTLE_ENDIAN 1 +#endif + +#ifdef __ANDROID__ + /* The Android langinfo.h header is not used. */ +# undef HAVE_LANGINFO_H +# undef CODESET +#endif + +/* Maximum value of the Windows DWORD type */ +#define PY_DWORD_MAX 4294967295U + +/* This macro used to tell whether Python was built with multithreading + * enabled. Now multithreading is always enabled, but keep the macro + * for compatibility. + */ +#ifndef WITH_THREAD +# define WITH_THREAD +#endif + +/* Some WebAssembly platforms do not provide a working pthread implementation. + * Thread support is stubbed and any attempt to create a new thread fails. + */ +#if (!defined(HAVE_PTHREAD_STUBS) && \ + (!defined(__EMSCRIPTEN__) || defined(__EMSCRIPTEN_PTHREADS__))) +# define Py_CAN_START_THREADS 1 +#endif + +#ifdef WITH_THREAD +# ifdef Py_BUILD_CORE +# ifdef HAVE_THREAD_LOCAL +# error "HAVE_THREAD_LOCAL is already defined" +# endif +# define HAVE_THREAD_LOCAL 1 +# ifdef thread_local +# define _Py_thread_local thread_local +# elif __STDC_VERSION__ >= 201112L && !defined(__STDC_NO_THREADS__) +# define _Py_thread_local _Thread_local +# elif defined(_MSC_VER) /* AKA NT_THREADS */ +# define _Py_thread_local __declspec(thread) +# elif defined(__GNUC__) /* includes clang */ +# define _Py_thread_local __thread +# else + // fall back to the PyThread_tss_*() API, or ignore. +# undef HAVE_THREAD_LOCAL +# endif +# endif +#endif + +#if defined(__ANDROID__) || defined(__VXWORKS__) + // Use UTF-8 as the locale encoding, ignore the LC_CTYPE locale. + // See _Py_GetLocaleEncoding(), PyUnicode_DecodeLocale() + // and PyUnicode_EncodeLocale(). +# define _Py_FORCE_UTF8_LOCALE +#endif + +#if defined(_Py_FORCE_UTF8_LOCALE) || defined(__APPLE__) + // Use UTF-8 as the filesystem encoding. + // See PyUnicode_DecodeFSDefaultAndSize(), PyUnicode_EncodeFSDefault(), + // Py_DecodeLocale() and Py_EncodeLocale(). +# define _Py_FORCE_UTF8_FS_ENCODING +#endif + +/* Mark a function which cannot return. Example: + PyAPI_FUNC(void) _Py_NO_RETURN PyThread_exit_thread(void); + + XLC support is intentionally omitted due to bpo-40244 */ +#ifndef _Py_NO_RETURN +#if defined(__clang__) || \ + (defined(__GNUC__) && \ + ((__GNUC__ >= 3) || \ + (__GNUC__ == 2) && (__GNUC_MINOR__ >= 5))) +# define _Py_NO_RETURN __attribute__((__noreturn__)) +#elif defined(_MSC_VER) +# define _Py_NO_RETURN __declspec(noreturn) +#else +# define _Py_NO_RETURN +#endif +#endif + + +// Preprocessor check for a builtin preprocessor function. Always return 0 +// if __has_builtin() macro is not defined. +// +// __has_builtin() is available on clang and GCC 10. +#ifdef __has_builtin +# define _Py__has_builtin(x) __has_builtin(x) +#else +# define _Py__has_builtin(x) 0 +#endif + +// Preprocessor check for a compiler __attribute__. Always return 0 +// if __has_attribute() macro is not defined. +#ifdef __has_attribute +# define _Py__has_attribute(x) __has_attribute(x) +#else +# define _Py__has_attribute(x) 0 +#endif + +// _Py_TYPEOF(expr) gets the type of an expression. +// +// Example: _Py_TYPEOF(x) x_copy = (x); +// +// The macro is only defined if GCC or clang compiler is used. +#if defined(__GNUC__) || defined(__clang__) +# define _Py_TYPEOF(expr) __typeof__(expr) +#endif + + +/* A convenient way for code to know if sanitizers are enabled. */ +#if defined(__has_feature) +# if __has_feature(memory_sanitizer) +# if !defined(_Py_MEMORY_SANITIZER) +# define _Py_MEMORY_SANITIZER +# endif +# endif +# if __has_feature(address_sanitizer) +# if !defined(_Py_ADDRESS_SANITIZER) +# define _Py_ADDRESS_SANITIZER +# endif +# endif +# if __has_feature(thread_sanitizer) +# if !defined(_Py_THREAD_SANITIZER) +# define _Py_THREAD_SANITIZER +# endif +# endif +#elif defined(__GNUC__) +# if defined(__SANITIZE_ADDRESS__) +# define _Py_ADDRESS_SANITIZER +# endif +# if defined(__SANITIZE_THREAD__) +# define _Py_THREAD_SANITIZER +# endif +#endif + + +/* AIX has __bool__ redefined in it's system header file. */ +#if defined(_AIX) && defined(__bool__) +#undef __bool__ +#endif + +// Make sure we have maximum alignment, even if the current compiler +// does not support max_align_t. Note that: +// - Autoconf reports alignment of unknown types to 0. +// - 'long double' has maximum alignment on *most* platforms, +// looks like the best we can do for pre-C11 compilers. +// - The value is tested, see test_alignof_max_align_t +#if !defined(ALIGNOF_MAX_ALIGN_T) || ALIGNOF_MAX_ALIGN_T == 0 +# undef ALIGNOF_MAX_ALIGN_T +# define ALIGNOF_MAX_ALIGN_T _Alignof(long double) +#endif + +#ifndef PY_CXX_CONST +# ifdef __cplusplus +# define PY_CXX_CONST const +# else +# define PY_CXX_CONST +# endif +#endif + +#if defined(__sgi) && !defined(_SGI_MP_SOURCE) +# define _SGI_MP_SOURCE +#endif + + +// _Py_NONSTRING: The nonstring variable attribute specifies that an object or +// member declaration with type array of char, signed char, or unsigned char, +// or pointer to such a type is intended to store character arrays that do not +// necessarily contain a terminating NUL. +// +// Usage: +// +// char name [8] _Py_NONSTRING; +#if _Py__has_attribute(nonstring) +# define _Py_NONSTRING __attribute__((nonstring)) +#else +# define _Py_NONSTRING +#endif + + +#endif /* Py_PYPORT_H */ diff --git a/Include/pystate.h b/Include/pystate.h new file mode 100644 index 0000000000000000000000000000000000000000..727b8fbfffe0e674c48e3b0594dc5c54f2221f97 --- /dev/null +++ b/Include/pystate.h @@ -0,0 +1,132 @@ +/* Thread and interpreter state structures and their interfaces */ + + +#ifndef Py_PYSTATE_H +#define Py_PYSTATE_H +#ifdef __cplusplus +extern "C" { +#endif + +/* This limitation is for performance and simplicity. If needed it can be +removed (with effort). */ +#define MAX_CO_EXTRA_USERS 255 + +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_New(void); +PyAPI_FUNC(void) PyInterpreterState_Clear(PyInterpreterState *); +PyAPI_FUNC(void) PyInterpreterState_Delete(PyInterpreterState *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +/* New in 3.9 */ +/* Get the current interpreter state. + + Issue a fatal error if there no current Python thread state or no current + interpreter. It cannot return NULL. + + The caller must hold the GIL. */ +PyAPI_FUNC(PyInterpreterState *) PyInterpreterState_Get(void); +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03080000 +/* New in 3.8 */ +PyAPI_FUNC(PyObject *) PyInterpreterState_GetDict(PyInterpreterState *); +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000 +/* New in 3.7 */ +PyAPI_FUNC(int64_t) PyInterpreterState_GetID(PyInterpreterState *); +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 + +/* State unique per thread */ + +/* New in 3.3 */ +PyAPI_FUNC(int) PyState_AddModule(PyObject*, PyModuleDef*); +PyAPI_FUNC(int) PyState_RemoveModule(PyModuleDef*); +#endif +PyAPI_FUNC(PyObject*) PyState_FindModule(PyModuleDef*); + +PyAPI_FUNC(PyThreadState *) PyThreadState_New(PyInterpreterState *); +PyAPI_FUNC(void) PyThreadState_Clear(PyThreadState *); +PyAPI_FUNC(void) PyThreadState_Delete(PyThreadState *); + +/* Get the current thread state. + + When the current thread state is NULL, this issues a fatal error (so that + the caller needn't check for NULL). + + The caller must hold the GIL. + + See also PyThreadState_GetUnchecked() and _PyThreadState_GET(). */ +PyAPI_FUNC(PyThreadState *) PyThreadState_Get(void); + +// Alias to PyThreadState_Get() +#define PyThreadState_GET() PyThreadState_Get() + +PyAPI_FUNC(PyThreadState *) PyThreadState_Swap(PyThreadState *); +PyAPI_FUNC(PyObject *) PyThreadState_GetDict(void); +PyAPI_FUNC(int) PyThreadState_SetAsyncExc(unsigned long, PyObject *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03090000 +/* New in 3.9 */ +PyAPI_FUNC(PyInterpreterState*) PyThreadState_GetInterpreter(PyThreadState *tstate); +PyAPI_FUNC(PyFrameObject*) PyThreadState_GetFrame(PyThreadState *tstate); +PyAPI_FUNC(uint64_t) PyThreadState_GetID(PyThreadState *tstate); +#endif + +typedef + enum {PyGILState_LOCKED, PyGILState_UNLOCKED} + PyGILState_STATE; + + +/* Ensure that the current thread is ready to call the Python + C API, regardless of the current state of Python, or of its + thread lock. This may be called as many times as desired + by a thread so long as each call is matched with a call to + PyGILState_Release(). In general, other thread-state APIs may + be used between _Ensure() and _Release() calls, so long as the + thread-state is restored to its previous state before the Release(). + For example, normal use of the Py_BEGIN_ALLOW_THREADS/ + Py_END_ALLOW_THREADS macros are acceptable. + + The return value is an opaque "handle" to the thread state when + PyGILState_Ensure() was called, and must be passed to + PyGILState_Release() to ensure Python is left in the same state. Even + though recursive calls are allowed, these handles can *not* be shared - + each unique call to PyGILState_Ensure must save the handle for its + call to PyGILState_Release. + + When the function returns, the current thread will hold the GIL. + + Failure is a fatal error. +*/ +PyAPI_FUNC(PyGILState_STATE) PyGILState_Ensure(void); + +/* Release any resources previously acquired. After this call, Python's + state will be the same as it was prior to the corresponding + PyGILState_Ensure() call (but generally this state will be unknown to + the caller, hence the use of the GILState API.) + + Every call to PyGILState_Ensure must be matched by a call to + PyGILState_Release on the same thread. +*/ +PyAPI_FUNC(void) PyGILState_Release(PyGILState_STATE); + +/* Helper/diagnostic function - get the current thread state for + this thread. May return NULL if no GILState API has been used + on the current thread. Note that the main thread always has such a + thread-state, even if no auto-thread-state call has been made + on the main thread. +*/ +PyAPI_FUNC(PyThreadState *) PyGILState_GetThisThreadState(void); + + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_PYSTATE_H +# include "cpython/pystate.h" +# undef Py_CPYTHON_PYSTATE_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYSTATE_H */ diff --git a/Include/pystats.h b/Include/pystats.h new file mode 100644 index 0000000000000000000000000000000000000000..acfa32201711e07b26188e62f78e1e58217dad09 --- /dev/null +++ b/Include/pystats.h @@ -0,0 +1,26 @@ +// Statistics on Python performance (public API). +// +// Define _Py_INCREF_STAT_INC() and _Py_DECREF_STAT_INC() used by Py_INCREF() +// and Py_DECREF(). +// +// See Include/cpython/pystats.h for the full API. + +#ifndef Py_PYSTATS_H +#define Py_PYSTATS_H +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(Py_STATS) && !defined(Py_LIMITED_API) +# define Py_CPYTHON_PYSTATS_H +# include "cpython/pystats.h" +# undef Py_CPYTHON_PYSTATS_H +#else +# define _Py_INCREF_STAT_INC() ((void)0) +# define _Py_DECREF_STAT_INC() ((void)0) +#endif // !Py_STATS + +#ifdef __cplusplus +} +#endif +#endif // !Py_PYSTATS_H diff --git a/Include/pystrcmp.h b/Include/pystrcmp.h new file mode 100644 index 0000000000000000000000000000000000000000..edb12397e3cbcc761a5ba28221215a62193a48ba --- /dev/null +++ b/Include/pystrcmp.h @@ -0,0 +1,23 @@ +#ifndef Py_STRCMP_H +#define Py_STRCMP_H + +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(int) PyOS_mystrnicmp(const char *, const char *, Py_ssize_t); +PyAPI_FUNC(int) PyOS_mystricmp(const char *, const char *); + +#ifdef MS_WINDOWS +#define PyOS_strnicmp strnicmp +#define PyOS_stricmp stricmp +#else +#define PyOS_strnicmp PyOS_mystrnicmp +#define PyOS_stricmp PyOS_mystricmp +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_STRCMP_H */ diff --git a/Include/pystrtod.h b/Include/pystrtod.h new file mode 100644 index 0000000000000000000000000000000000000000..e83d245eb623afabe3a5843e18bc0f6e2e5ba1b1 --- /dev/null +++ b/Include/pystrtod.h @@ -0,0 +1,37 @@ +#ifndef Py_STRTOD_H +#define Py_STRTOD_H + +#ifdef __cplusplus +extern "C" { +#endif + + +PyAPI_FUNC(double) PyOS_string_to_double(const char *str, + char **endptr, + PyObject *overflow_exception); + +/* The caller is responsible for calling PyMem_Free to free the buffer + that's is returned. */ +PyAPI_FUNC(char *) PyOS_double_to_string(double val, + char format_code, + int precision, + int flags, + int *type); + +/* PyOS_double_to_string's "flags" parameter can be set to 0 or more of: */ +#define Py_DTSF_SIGN 0x01 /* always add the sign */ +#define Py_DTSF_ADD_DOT_0 0x02 /* if the result is an integer add ".0" */ +#define Py_DTSF_ALT 0x04 /* "alternate" formatting. it's format_code + specific */ +#define Py_DTSF_NO_NEG_0 0x08 /* negative zero result is coerced to 0 */ + +/* PyOS_double_to_string's "type", if non-NULL, will be set to one of: */ +#define Py_DTST_FINITE 0 +#define Py_DTST_INFINITE 1 +#define Py_DTST_NAN 2 + +#ifdef __cplusplus +} +#endif + +#endif /* !Py_STRTOD_H */ diff --git a/Include/pythonrun.h b/Include/pythonrun.h new file mode 100644 index 0000000000000000000000000000000000000000..154c7450cb934f9492b1f2957f8a55545d27fdf4 --- /dev/null +++ b/Include/pythonrun.h @@ -0,0 +1,49 @@ + +/* Interfaces to parse and execute pieces of python code */ + +#ifndef Py_PYTHONRUN_H +#define Py_PYTHONRUN_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(PyObject *) Py_CompileString(const char *, const char *, int); + +PyAPI_FUNC(void) PyErr_Print(void); +PyAPI_FUNC(void) PyErr_PrintEx(int); +PyAPI_FUNC(void) PyErr_Display(PyObject *, PyObject *, PyObject *); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030C0000 +PyAPI_FUNC(void) PyErr_DisplayException(PyObject *); +#endif + + +/* Stuff with no proper home (yet) */ +PyAPI_DATA(int) (*PyOS_InputHook)(void); + +/* Stack size, in "pointers" (so we get extra safety margins + on 64-bit platforms). On a 32-bit platform, this translates + to an 8k margin. */ +#define PYOS_STACK_MARGIN 2048 + +#if defined(WIN32) && !defined(MS_WIN64) && !defined(_M_ARM) && defined(_MSC_VER) && _MSC_VER >= 1300 +/* Enable stack checking under Microsoft C */ +// When changing the platforms, ensure PyOS_CheckStack() docs are still correct +#define USE_STACKCHECK +#endif + +#ifdef USE_STACKCHECK +/* Check that we aren't overflowing our stack */ +PyAPI_FUNC(int) PyOS_CheckStack(void); +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_PYTHONRUN_H +# include "cpython/pythonrun.h" +# undef Py_CPYTHON_PYTHONRUN_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYTHONRUN_H */ diff --git a/Include/pythread.h b/Include/pythread.h new file mode 100644 index 0000000000000000000000000000000000000000..a3216c51d66165902b000a0fba0497f701a543d4 --- /dev/null +++ b/Include/pythread.h @@ -0,0 +1,112 @@ +#ifndef Py_PYTHREAD_H +#define Py_PYTHREAD_H + +typedef void *PyThread_type_lock; + +#ifdef __cplusplus +extern "C" { +#endif + +/* Return status codes for Python lock acquisition. Chosen for maximum + * backwards compatibility, ie failure -> 0, success -> 1. */ +typedef enum PyLockStatus { + PY_LOCK_FAILURE = 0, + PY_LOCK_ACQUIRED = 1, + PY_LOCK_INTR +} PyLockStatus; + +PyAPI_FUNC(void) PyThread_init_thread(void); +PyAPI_FUNC(unsigned long) PyThread_start_new_thread(void (*)(void *), void *); +PyAPI_FUNC(void) _Py_NO_RETURN PyThread_exit_thread(void); +PyAPI_FUNC(unsigned long) PyThread_get_thread_ident(void); + +#if (defined(__APPLE__) || defined(__linux__) || defined(_WIN32) \ + || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) \ + || defined(__OpenBSD__) || defined(__NetBSD__) \ + || defined(__DragonFly__) || defined(_AIX)) +#define PY_HAVE_THREAD_NATIVE_ID +PyAPI_FUNC(unsigned long) PyThread_get_thread_native_id(void); +#endif + +PyAPI_FUNC(PyThread_type_lock) PyThread_allocate_lock(void); +PyAPI_FUNC(void) PyThread_free_lock(PyThread_type_lock); +PyAPI_FUNC(int) PyThread_acquire_lock(PyThread_type_lock, int); +#define WAIT_LOCK 1 +#define NOWAIT_LOCK 0 + +// PY_TIMEOUT_T is the integral type used to specify timeouts when waiting +// on a lock (see PyThread_acquire_lock_timed() below). +#define PY_TIMEOUT_T long long + + +/* If microseconds == 0, the call is non-blocking: it returns immediately + even when the lock can't be acquired. + If microseconds > 0, the call waits up to the specified duration. + If microseconds < 0, the call waits until success (or abnormal failure) + + If *microseconds* is greater than PY_TIMEOUT_MAX, clamp the timeout to + PY_TIMEOUT_MAX microseconds. + + If intr_flag is true and the acquire is interrupted by a signal, then the + call will return PY_LOCK_INTR. The caller may reattempt to acquire the + lock. +*/ +PyAPI_FUNC(PyLockStatus) PyThread_acquire_lock_timed(PyThread_type_lock, + PY_TIMEOUT_T microseconds, + int intr_flag); + +PyAPI_FUNC(void) PyThread_release_lock(PyThread_type_lock); + +PyAPI_FUNC(size_t) PyThread_get_stacksize(void); +PyAPI_FUNC(int) PyThread_set_stacksize(size_t); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject*) PyThread_GetInfo(void); +#endif + + +/* Thread Local Storage (TLS) API + TLS API is DEPRECATED. Use Thread Specific Storage (TSS) API. + + The existing TLS API has used int to represent TLS keys across all + platforms, but it is not POSIX-compliant. Therefore, the new TSS API uses + opaque data type to represent TSS keys to be compatible (see PEP 539). +*/ +Py_DEPRECATED(3.7) PyAPI_FUNC(int) PyThread_create_key(void); +Py_DEPRECATED(3.7) PyAPI_FUNC(void) PyThread_delete_key(int key); +Py_DEPRECATED(3.7) PyAPI_FUNC(int) PyThread_set_key_value(int key, + void *value); +Py_DEPRECATED(3.7) PyAPI_FUNC(void *) PyThread_get_key_value(int key); +Py_DEPRECATED(3.7) PyAPI_FUNC(void) PyThread_delete_key_value(int key); + +/* Cleanup after a fork */ +Py_DEPRECATED(3.7) PyAPI_FUNC(void) PyThread_ReInitTLS(void); + + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03070000 +/* New in 3.7 */ +/* Thread Specific Storage (TSS) API */ + +typedef struct _Py_tss_t Py_tss_t; /* opaque */ + +PyAPI_FUNC(Py_tss_t *) PyThread_tss_alloc(void); +PyAPI_FUNC(void) PyThread_tss_free(Py_tss_t *key); + +/* The parameter key must not be NULL. */ +PyAPI_FUNC(int) PyThread_tss_is_created(Py_tss_t *key); +PyAPI_FUNC(int) PyThread_tss_create(Py_tss_t *key); +PyAPI_FUNC(void) PyThread_tss_delete(Py_tss_t *key); +PyAPI_FUNC(int) PyThread_tss_set(Py_tss_t *key, void *value); +PyAPI_FUNC(void *) PyThread_tss_get(Py_tss_t *key); +#endif /* New in 3.7 */ + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_PYTHREAD_H +# include "cpython/pythread.h" +# undef Py_CPYTHON_PYTHREAD_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_PYTHREAD_H */ diff --git a/Include/pytypedefs.h b/Include/pytypedefs.h new file mode 100644 index 0000000000000000000000000000000000000000..e78ed56a3b67cd19a6fbce550cb9b145c719ed31 --- /dev/null +++ b/Include/pytypedefs.h @@ -0,0 +1,30 @@ +// Forward declarations of types of the Python C API. +// Declare them at the same place since redefining typedef is a C11 feature. +// Only use a forward declaration if there is an interdependency between two +// header files. + +#ifndef Py_PYTYPEDEFS_H +#define Py_PYTYPEDEFS_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PyModuleDef PyModuleDef; +typedef struct PyModuleDef_Slot PyModuleDef_Slot; +typedef struct PyMethodDef PyMethodDef; +typedef struct PyGetSetDef PyGetSetDef; +typedef struct PyMemberDef PyMemberDef; + +typedef struct _object PyObject; +typedef struct _longobject PyLongObject; +typedef struct _typeobject PyTypeObject; +typedef struct PyCodeObject PyCodeObject; +typedef struct _frame PyFrameObject; + +typedef struct _ts PyThreadState; +typedef struct _is PyInterpreterState; + +#ifdef __cplusplus +} +#endif +#endif // !Py_PYTYPEDEFS_H diff --git a/Include/rangeobject.h b/Include/rangeobject.h new file mode 100644 index 0000000000000000000000000000000000000000..d46ce7cd41b7417f877a5277c32bb3ef033c715a --- /dev/null +++ b/Include/rangeobject.h @@ -0,0 +1,27 @@ + +/* Range object interface */ + +#ifndef Py_RANGEOBJECT_H +#define Py_RANGEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* +A range object represents an integer range. This is an immutable object; +a range cannot change its value after creation. + +Range objects behave like the corresponding tuple objects except that +they are represented by a start, stop, and step datamembers. +*/ + +PyAPI_DATA(PyTypeObject) PyRange_Type; +PyAPI_DATA(PyTypeObject) PyRangeIter_Type; +PyAPI_DATA(PyTypeObject) PyLongRangeIter_Type; + +#define PyRange_Check(op) Py_IS_TYPE((op), &PyRange_Type) + +#ifdef __cplusplus +} +#endif +#endif /* !Py_RANGEOBJECT_H */ diff --git a/Include/setobject.h b/Include/setobject.h new file mode 100644 index 0000000000000000000000000000000000000000..62c9e6b13f89015c02c09463612c3e608e485d40 --- /dev/null +++ b/Include/setobject.h @@ -0,0 +1,49 @@ +/* Set object interface */ + +#ifndef Py_SETOBJECT_H +#define Py_SETOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_DATA(PyTypeObject) PySet_Type; +PyAPI_DATA(PyTypeObject) PyFrozenSet_Type; +PyAPI_DATA(PyTypeObject) PySetIter_Type; + +PyAPI_FUNC(PyObject *) PySet_New(PyObject *); +PyAPI_FUNC(PyObject *) PyFrozenSet_New(PyObject *); + +PyAPI_FUNC(int) PySet_Add(PyObject *set, PyObject *key); +PyAPI_FUNC(int) PySet_Clear(PyObject *set); +PyAPI_FUNC(int) PySet_Contains(PyObject *anyset, PyObject *key); +PyAPI_FUNC(int) PySet_Discard(PyObject *set, PyObject *key); +PyAPI_FUNC(PyObject *) PySet_Pop(PyObject *set); +PyAPI_FUNC(Py_ssize_t) PySet_Size(PyObject *anyset); + +#define PyFrozenSet_CheckExact(ob) Py_IS_TYPE((ob), &PyFrozenSet_Type) +#define PyFrozenSet_Check(ob) \ + (Py_IS_TYPE((ob), &PyFrozenSet_Type) || \ + PyType_IsSubtype(Py_TYPE(ob), &PyFrozenSet_Type)) + +#define PyAnySet_CheckExact(ob) \ + (Py_IS_TYPE((ob), &PySet_Type) || Py_IS_TYPE((ob), &PyFrozenSet_Type)) +#define PyAnySet_Check(ob) \ + (Py_IS_TYPE((ob), &PySet_Type) || Py_IS_TYPE((ob), &PyFrozenSet_Type) || \ + PyType_IsSubtype(Py_TYPE(ob), &PySet_Type) || \ + PyType_IsSubtype(Py_TYPE(ob), &PyFrozenSet_Type)) + +#define PySet_CheckExact(op) Py_IS_TYPE(op, &PySet_Type) +#define PySet_Check(ob) \ + (Py_IS_TYPE((ob), &PySet_Type) || \ + PyType_IsSubtype(Py_TYPE(ob), &PySet_Type)) + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_SETOBJECT_H +# include "cpython/setobject.h" +# undef Py_CPYTHON_SETOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SETOBJECT_H */ diff --git a/Include/sliceobject.h b/Include/sliceobject.h new file mode 100644 index 0000000000000000000000000000000000000000..35e2ea254ca80a4747c873ec00e1ae3b7dd01008 --- /dev/null +++ b/Include/sliceobject.h @@ -0,0 +1,69 @@ +#ifndef Py_SLICEOBJECT_H +#define Py_SLICEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* The unique ellipsis object "..." */ + +PyAPI_DATA(PyObject) _Py_EllipsisObject; /* Don't use this directly */ + +#if defined(Py_LIMITED_API) && Py_LIMITED_API+0 >= 0x030D0000 +# define Py_Ellipsis Py_GetConstantBorrowed(Py_CONSTANT_ELLIPSIS) +#else +# define Py_Ellipsis (&_Py_EllipsisObject) +#endif + +/* Slice object interface */ + +/* + +A slice object containing start, stop, and step data members (the +names are from range). After much talk with Guido, it was decided to +let these be any arbitrary python type. Py_None stands for omitted values. +*/ +#ifndef Py_LIMITED_API +typedef struct { + PyObject_HEAD + PyObject *start, *stop, *step; /* not NULL */ +} PySliceObject; +#endif + +PyAPI_DATA(PyTypeObject) PySlice_Type; +PyAPI_DATA(PyTypeObject) PyEllipsis_Type; + +#define PySlice_Check(op) Py_IS_TYPE((op), &PySlice_Type) + +PyAPI_FUNC(PyObject *) PySlice_New(PyObject* start, PyObject* stop, + PyObject* step); +#ifndef Py_LIMITED_API +PyAPI_FUNC(PyObject *) _PySlice_FromIndices(Py_ssize_t start, Py_ssize_t stop); +PyAPI_FUNC(int) _PySlice_GetLongIndices(PySliceObject *self, PyObject *length, + PyObject **start_ptr, PyObject **stop_ptr, + PyObject **step_ptr); +#endif +PyAPI_FUNC(int) PySlice_GetIndices(PyObject *r, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); +Py_DEPRECATED(3.7) +PyAPI_FUNC(int) PySlice_GetIndicesEx(PyObject *r, Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t *step, + Py_ssize_t *slicelength); + +#if !defined(Py_LIMITED_API) || (Py_LIMITED_API+0 >= 0x03050400 && Py_LIMITED_API+0 < 0x03060000) || Py_LIMITED_API+0 >= 0x03060100 +#define PySlice_GetIndicesEx(slice, length, start, stop, step, slicelen) ( \ + PySlice_Unpack((slice), (start), (stop), (step)) < 0 ? \ + ((*(slicelen) = 0), -1) : \ + ((*(slicelen) = PySlice_AdjustIndices((length), (start), (stop), *(step))), \ + 0)) +PyAPI_FUNC(int) PySlice_Unpack(PyObject *slice, + Py_ssize_t *start, Py_ssize_t *stop, Py_ssize_t *step); +PyAPI_FUNC(Py_ssize_t) PySlice_AdjustIndices(Py_ssize_t length, + Py_ssize_t *start, Py_ssize_t *stop, + Py_ssize_t step); +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SLICEOBJECT_H */ diff --git a/Include/structmember.h b/Include/structmember.h new file mode 100644 index 0000000000000000000000000000000000000000..f6e8fd829892f41cc7789123b84a5bb3c15e0485 --- /dev/null +++ b/Include/structmember.h @@ -0,0 +1,56 @@ +#ifndef Py_STRUCTMEMBER_H +#define Py_STRUCTMEMBER_H +#ifdef __cplusplus +extern "C" { +#endif + + +/* Interface to map C struct members to Python object attributes + * + * This header is deprecated: new code should not use stuff from here. + * New definitions are in descrobject.h. + * + * However, there's nothing wrong with old code continuing to use it, + * and there's not much mainenance overhead in maintaining a few aliases. + * So, don't be too eager to convert old code. + * + * It uses names not prefixed with Py_. + * It is also *not* included from Python.h and must be included individually. + */ + +#include /* For offsetof (not always provided by Python.h) */ + +/* Types */ +#define T_SHORT Py_T_SHORT +#define T_INT Py_T_INT +#define T_LONG Py_T_LONG +#define T_FLOAT Py_T_FLOAT +#define T_DOUBLE Py_T_DOUBLE +#define T_STRING Py_T_STRING +#define T_OBJECT _Py_T_OBJECT +#define T_CHAR Py_T_CHAR +#define T_BYTE Py_T_BYTE +#define T_UBYTE Py_T_UBYTE +#define T_USHORT Py_T_USHORT +#define T_UINT Py_T_UINT +#define T_ULONG Py_T_ULONG +#define T_STRING_INPLACE Py_T_STRING_INPLACE +#define T_BOOL Py_T_BOOL +#define T_OBJECT_EX Py_T_OBJECT_EX +#define T_LONGLONG Py_T_LONGLONG +#define T_ULONGLONG Py_T_ULONGLONG +#define T_PYSSIZET Py_T_PYSSIZET +#define T_NONE _Py_T_NONE + +/* Flags */ +#define READONLY Py_READONLY +#define PY_AUDIT_READ Py_AUDIT_READ +#define READ_RESTRICTED Py_AUDIT_READ +#define PY_WRITE_RESTRICTED _Py_WRITE_RESTRICTED +#define RESTRICTED (READ_RESTRICTED | PY_WRITE_RESTRICTED) + + +#ifdef __cplusplus +} +#endif +#endif /* !Py_STRUCTMEMBER_H */ diff --git a/Include/structseq.h b/Include/structseq.h new file mode 100644 index 0000000000000000000000000000000000000000..29e24fee54e6135ee93be1f3faf4d4b7d914c8f3 --- /dev/null +++ b/Include/structseq.h @@ -0,0 +1,46 @@ + +/* Named tuple object interface */ + +#ifndef Py_STRUCTSEQ_H +#define Py_STRUCTSEQ_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct PyStructSequence_Field { + const char *name; + const char *doc; +} PyStructSequence_Field; + +typedef struct PyStructSequence_Desc { + const char *name; + const char *doc; + PyStructSequence_Field *fields; + int n_in_sequence; +} PyStructSequence_Desc; + +PyAPI_DATA(const char * const) PyStructSequence_UnnamedField; + +#ifndef Py_LIMITED_API +PyAPI_FUNC(void) PyStructSequence_InitType(PyTypeObject *type, + PyStructSequence_Desc *desc); +PyAPI_FUNC(int) PyStructSequence_InitType2(PyTypeObject *type, + PyStructSequence_Desc *desc); +#endif +PyAPI_FUNC(PyTypeObject*) PyStructSequence_NewType(PyStructSequence_Desc *desc); + +PyAPI_FUNC(PyObject *) PyStructSequence_New(PyTypeObject* type); + +PyAPI_FUNC(void) PyStructSequence_SetItem(PyObject*, Py_ssize_t, PyObject*); +PyAPI_FUNC(PyObject*) PyStructSequence_GetItem(PyObject*, Py_ssize_t); + +#ifndef Py_LIMITED_API +typedef PyTupleObject PyStructSequence; +#define PyStructSequence_SET_ITEM PyStructSequence_SetItem +#define PyStructSequence_GET_ITEM PyStructSequence_GetItem +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_STRUCTSEQ_H */ diff --git a/Include/sysmodule.h b/Include/sysmodule.h new file mode 100644 index 0000000000000000000000000000000000000000..5a0af2e1578eb70225d56f4e22d7d5ffa7eab1d4 --- /dev/null +++ b/Include/sysmodule.h @@ -0,0 +1,44 @@ +#ifndef Py_SYSMODULE_H +#define Py_SYSMODULE_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(PyObject *) PySys_GetObject(const char *); +PyAPI_FUNC(int) PySys_SetObject(const char *, PyObject *); + +Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_SetArgv(int, wchar_t **); +Py_DEPRECATED(3.11) PyAPI_FUNC(void) PySys_SetArgvEx(int, wchar_t **, int); + +PyAPI_FUNC(void) PySys_WriteStdout(const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); +PyAPI_FUNC(void) PySys_WriteStderr(const char *format, ...) + Py_GCC_ATTRIBUTE((format(printf, 1, 2))); +PyAPI_FUNC(void) PySys_FormatStdout(const char *format, ...); +PyAPI_FUNC(void) PySys_FormatStderr(const char *format, ...); + +Py_DEPRECATED(3.13) PyAPI_FUNC(void) PySys_ResetWarnOptions(void); + +PyAPI_FUNC(PyObject *) PySys_GetXOptions(void); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030d0000 +PyAPI_FUNC(int) PySys_Audit( + const char *event, + const char *argFormat, + ...); + +PyAPI_FUNC(int) PySys_AuditTuple( + const char *event, + PyObject *args); +#endif + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_SYSMODULE_H +# include "cpython/sysmodule.h" +# undef Py_CPYTHON_SYSMODULE_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_SYSMODULE_H */ diff --git a/Include/traceback.h b/Include/traceback.h new file mode 100644 index 0000000000000000000000000000000000000000..2b40cc9fc3261708db590f523dc767acf3742531 --- /dev/null +++ b/Include/traceback.h @@ -0,0 +1,26 @@ +#ifndef Py_TRACEBACK_H +#define Py_TRACEBACK_H +#ifdef __cplusplus +extern "C" { +#endif + +/* Traceback interface */ + +PyAPI_FUNC(int) PyTraceBack_Here(PyFrameObject *); +PyAPI_FUNC(int) PyTraceBack_Print(PyObject *, PyObject *); + +/* Reveal traceback type so we can typecheck traceback objects */ +PyAPI_DATA(PyTypeObject) PyTraceBack_Type; +#define PyTraceBack_Check(v) Py_IS_TYPE((v), &PyTraceBack_Type) + + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_TRACEBACK_H +# include "cpython/traceback.h" +# undef Py_CPYTHON_TRACEBACK_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TRACEBACK_H */ diff --git a/Include/tupleobject.h b/Include/tupleobject.h new file mode 100644 index 0000000000000000000000000000000000000000..1f9ab54be65f87e5e78b1ea192a27886b2e2cbdb --- /dev/null +++ b/Include/tupleobject.h @@ -0,0 +1,46 @@ +/* Tuple object interface */ + +#ifndef Py_TUPLEOBJECT_H +#define Py_TUPLEOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +/* +Another generally useful object type is a tuple of object pointers. +For Python, this is an immutable type. C code can change the tuple items +(but not their number), and even use tuples as general-purpose arrays of +object references, but in general only brand new tuples should be mutated, +not ones that might already have been exposed to Python code. + +*** WARNING *** PyTuple_SetItem does not increment the new item's reference +count, but does decrement the reference count of the item it replaces, +if not nil. It does *decrement* the reference count if it is *not* +inserted in the tuple. Similarly, PyTuple_GetItem does not increment the +returned item's reference count. +*/ + +PyAPI_DATA(PyTypeObject) PyTuple_Type; +PyAPI_DATA(PyTypeObject) PyTupleIter_Type; + +#define PyTuple_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_TUPLE_SUBCLASS) +#define PyTuple_CheckExact(op) Py_IS_TYPE((op), &PyTuple_Type) + +PyAPI_FUNC(PyObject *) PyTuple_New(Py_ssize_t size); +PyAPI_FUNC(Py_ssize_t) PyTuple_Size(PyObject *); +PyAPI_FUNC(PyObject *) PyTuple_GetItem(PyObject *, Py_ssize_t); +PyAPI_FUNC(int) PyTuple_SetItem(PyObject *, Py_ssize_t, PyObject *); +PyAPI_FUNC(PyObject *) PyTuple_GetSlice(PyObject *, Py_ssize_t, Py_ssize_t); +PyAPI_FUNC(PyObject *) PyTuple_Pack(Py_ssize_t, ...); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_TUPLEOBJECT_H +# include "cpython/tupleobject.h" +# undef Py_CPYTHON_TUPLEOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_TUPLEOBJECT_H */ diff --git a/Include/typeslots.h b/Include/typeslots.h new file mode 100644 index 0000000000000000000000000000000000000000..506b05580de146bbcae3e25e1e9ec18c773e9014 --- /dev/null +++ b/Include/typeslots.h @@ -0,0 +1,88 @@ +/* Do not renumber the file; these numbers are part of the stable ABI. */ +#define Py_bf_getbuffer 1 +#define Py_bf_releasebuffer 2 +#define Py_mp_ass_subscript 3 +#define Py_mp_length 4 +#define Py_mp_subscript 5 +#define Py_nb_absolute 6 +#define Py_nb_add 7 +#define Py_nb_and 8 +#define Py_nb_bool 9 +#define Py_nb_divmod 10 +#define Py_nb_float 11 +#define Py_nb_floor_divide 12 +#define Py_nb_index 13 +#define Py_nb_inplace_add 14 +#define Py_nb_inplace_and 15 +#define Py_nb_inplace_floor_divide 16 +#define Py_nb_inplace_lshift 17 +#define Py_nb_inplace_multiply 18 +#define Py_nb_inplace_or 19 +#define Py_nb_inplace_power 20 +#define Py_nb_inplace_remainder 21 +#define Py_nb_inplace_rshift 22 +#define Py_nb_inplace_subtract 23 +#define Py_nb_inplace_true_divide 24 +#define Py_nb_inplace_xor 25 +#define Py_nb_int 26 +#define Py_nb_invert 27 +#define Py_nb_lshift 28 +#define Py_nb_multiply 29 +#define Py_nb_negative 30 +#define Py_nb_or 31 +#define Py_nb_positive 32 +#define Py_nb_power 33 +#define Py_nb_remainder 34 +#define Py_nb_rshift 35 +#define Py_nb_subtract 36 +#define Py_nb_true_divide 37 +#define Py_nb_xor 38 +#define Py_sq_ass_item 39 +#define Py_sq_concat 40 +#define Py_sq_contains 41 +#define Py_sq_inplace_concat 42 +#define Py_sq_inplace_repeat 43 +#define Py_sq_item 44 +#define Py_sq_length 45 +#define Py_sq_repeat 46 +#define Py_tp_alloc 47 +#define Py_tp_base 48 +#define Py_tp_bases 49 +#define Py_tp_call 50 +#define Py_tp_clear 51 +#define Py_tp_dealloc 52 +#define Py_tp_del 53 +#define Py_tp_descr_get 54 +#define Py_tp_descr_set 55 +#define Py_tp_doc 56 +#define Py_tp_getattr 57 +#define Py_tp_getattro 58 +#define Py_tp_hash 59 +#define Py_tp_init 60 +#define Py_tp_is_gc 61 +#define Py_tp_iter 62 +#define Py_tp_iternext 63 +#define Py_tp_methods 64 +#define Py_tp_new 65 +#define Py_tp_repr 66 +#define Py_tp_richcompare 67 +#define Py_tp_setattr 68 +#define Py_tp_setattro 69 +#define Py_tp_str 70 +#define Py_tp_traverse 71 +#define Py_tp_members 72 +#define Py_tp_getset 73 +#define Py_tp_free 74 +#define Py_nb_matrix_multiply 75 +#define Py_nb_inplace_matrix_multiply 76 +#define Py_am_await 77 +#define Py_am_aiter 78 +#define Py_am_anext 79 +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03050000 +/* New in 3.5 */ +#define Py_tp_finalize 80 +#endif +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 +/* New in 3.10 */ +#define Py_am_send 81 +#endif diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h new file mode 100644 index 0000000000000000000000000000000000000000..dee00715b3c51d576da7bfbcd84ae362f5f14a0b --- /dev/null +++ b/Include/unicodeobject.h @@ -0,0 +1,1021 @@ +#ifndef Py_UNICODEOBJECT_H +#define Py_UNICODEOBJECT_H + +/* + +Unicode implementation based on original code by Fredrik Lundh, +modified by Marc-Andre Lemburg (mal@lemburg.com) according to the +Unicode Integration Proposal. (See +http://www.egenix.com/files/python/unicode-proposal.txt). + +Copyright (c) Corporation for National Research Initiatives. + + + Original header: + -------------------------------------------------------------------- + + * Yet another Unicode string type for Python. This type supports the + * 16-bit Basic Multilingual Plane (BMP) only. + * + * Written by Fredrik Lundh, January 1999. + * + * Copyright (c) 1999 by Secret Labs AB. + * Copyright (c) 1999 by Fredrik Lundh. + * + * fredrik@pythonware.com + * http://www.pythonware.com + * + * -------------------------------------------------------------------- + * This Unicode String Type is + * + * Copyright (c) 1999 by Secret Labs AB + * Copyright (c) 1999 by Fredrik Lundh + * + * By obtaining, using, and/or copying this software and/or its + * associated documentation, you agree that you have read, understood, + * and will comply with the following terms and conditions: + * + * Permission to use, copy, modify, and distribute this software and its + * associated documentation for any purpose and without fee is hereby + * granted, provided that the above copyright notice appears in all + * copies, and that both that copyright notice and this permission notice + * appear in supporting documentation, and that the name of Secret Labs + * AB or the author not be used in advertising or publicity pertaining to + * distribution of the software without specific, written prior + * permission. + * + * SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO + * THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * -------------------------------------------------------------------- */ + +/* === Internal API ======================================================= */ + +/* --- Internal Unicode Format -------------------------------------------- */ + +/* Python 3.x requires unicode */ +#define Py_USING_UNICODE + +#ifndef SIZEOF_WCHAR_T +#error Must define SIZEOF_WCHAR_T +#endif + +#define Py_UNICODE_SIZE SIZEOF_WCHAR_T + +/* If wchar_t can be used for UCS-4 storage, set Py_UNICODE_WIDE. + Otherwise, Unicode strings are stored as UCS-2 (with limited support + for UTF-16) */ + +#if Py_UNICODE_SIZE >= 4 +#define Py_UNICODE_WIDE +#endif + +/* Set these flags if the platform has "wchar.h" and the + wchar_t type is a 16-bit unsigned type */ +/* #define HAVE_WCHAR_H */ +/* #define HAVE_USABLE_WCHAR_T */ + +/* If the compiler provides a wchar_t type we try to support it + through the interface functions PyUnicode_FromWideChar(), + PyUnicode_AsWideChar() and PyUnicode_AsWideCharString(). */ + +#ifdef HAVE_USABLE_WCHAR_T +# ifndef HAVE_WCHAR_H +# define HAVE_WCHAR_H +# endif +#endif + +/* Py_UCS4 and Py_UCS2 are typedefs for the respective + unicode representations. */ +typedef uint32_t Py_UCS4; +typedef uint16_t Py_UCS2; +typedef uint8_t Py_UCS1; + +#ifdef __cplusplus +extern "C" { +#endif + + +PyAPI_DATA(PyTypeObject) PyUnicode_Type; +PyAPI_DATA(PyTypeObject) PyUnicodeIter_Type; + +#define PyUnicode_Check(op) \ + PyType_FastSubclass(Py_TYPE(op), Py_TPFLAGS_UNICODE_SUBCLASS) +#define PyUnicode_CheckExact(op) Py_IS_TYPE((op), &PyUnicode_Type) + +/* --- Constants ---------------------------------------------------------- */ + +/* This Unicode character will be used as replacement character during + decoding if the errors argument is set to "replace". Note: the + Unicode character U+FFFD is the official REPLACEMENT CHARACTER in + Unicode 3.0. */ + +#define Py_UNICODE_REPLACEMENT_CHARACTER ((Py_UCS4) 0xFFFD) + +/* === Public API ========================================================= */ + +/* Similar to PyUnicode_FromUnicode(), but u points to UTF-8 encoded bytes */ +PyAPI_FUNC(PyObject*) PyUnicode_FromStringAndSize( + const char *u, /* UTF-8 encoded string */ + Py_ssize_t size /* size of buffer */ + ); + +/* Similar to PyUnicode_FromUnicode(), but u points to null-terminated + UTF-8 encoded bytes. The size is determined with strlen(). */ +PyAPI_FUNC(PyObject*) PyUnicode_FromString( + const char *u /* UTF-8 encoded string */ + ); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject*) PyUnicode_Substring( + PyObject *str, + Py_ssize_t start, + Py_ssize_t end); +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +/* Copy the string into a UCS4 buffer including the null character if copy_null + is set. Return NULL and raise an exception on error. Raise a SystemError if + the buffer is smaller than the string. Return buffer on success. + + buflen is the length of the buffer in (Py_UCS4) characters. */ +PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4( + PyObject *unicode, + Py_UCS4* buffer, + Py_ssize_t buflen, + int copy_null); + +/* Copy the string into a UCS4 buffer. A new buffer is allocated using + * PyMem_Malloc; if this fails, NULL is returned with a memory error + exception set. */ +PyAPI_FUNC(Py_UCS4*) PyUnicode_AsUCS4Copy(PyObject *unicode); +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +/* Get the length of the Unicode object. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_GetLength( + PyObject *unicode +); +#endif + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +/* Read a character from the string. */ + +PyAPI_FUNC(Py_UCS4) PyUnicode_ReadChar( + PyObject *unicode, + Py_ssize_t index + ); + +/* Write a character to the string. The string must have been created through + PyUnicode_New, must not be shared, and must not have been hashed yet. + + Return 0 on success, -1 on error. */ + +PyAPI_FUNC(int) PyUnicode_WriteChar( + PyObject *unicode, + Py_ssize_t index, + Py_UCS4 character + ); +#endif + +/* Resize a Unicode object. The length is the number of codepoints. + + *unicode is modified to point to the new (resized) object and 0 + returned on success. + + Try to resize the string in place (which is usually faster than allocating + a new string and copy characters), or create a new string. + + Error handling is implemented as follows: an exception is set, -1 + is returned and *unicode left untouched. + + WARNING: The function doesn't check string content, the result may not be a + string in canonical representation. */ + +PyAPI_FUNC(int) PyUnicode_Resize( + PyObject **unicode, /* Pointer to the Unicode object */ + Py_ssize_t length /* New length */ + ); + +/* Decode obj to a Unicode object. + + bytes, bytearray and other bytes-like objects are decoded according to the + given encoding and error handler. The encoding and error handler can be + NULL to have the interface use UTF-8 and "strict". + + All other objects (including Unicode objects) raise an exception. + + The API returns NULL in case of an error. The caller is responsible + for decref'ing the returned objects. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_FromEncodedObject( + PyObject *obj, /* Object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Copy an instance of a Unicode subtype to a new true Unicode object if + necessary. If obj is already a true Unicode object (not a subtype), return + the reference with *incremented* refcount. + + The API returns NULL in case of an error. The caller is responsible + for decref'ing the returned objects. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_FromObject( + PyObject *obj /* Object */ + ); + +PyAPI_FUNC(PyObject *) PyUnicode_FromFormatV( + const char *format, /* ASCII-encoded string */ + va_list vargs + ); +PyAPI_FUNC(PyObject *) PyUnicode_FromFormat( + const char *format, /* ASCII-encoded string */ + ... + ); + +PyAPI_FUNC(void) PyUnicode_InternInPlace(PyObject **); +PyAPI_FUNC(PyObject *) PyUnicode_InternFromString( + const char *u /* UTF-8 encoded string */ + ); + +/* --- wchar_t support for platforms which support it --------------------- */ + +#ifdef HAVE_WCHAR_H + +/* Create a Unicode Object from the wchar_t buffer w of the given + size. + + The buffer is copied into the new object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_FromWideChar( + const wchar_t *w, /* wchar_t buffer */ + Py_ssize_t size /* size of buffer */ + ); + +/* Copies the Unicode Object contents into the wchar_t buffer w. At + most size wchar_t characters are copied. + + Note that the resulting wchar_t string may or may not be + 0-terminated. It is the responsibility of the caller to make sure + that the wchar_t string is 0-terminated in case this is required by + the application. + + Returns the number of wchar_t characters copied (excluding a + possibly trailing 0-termination character) or -1 in case of an + error. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_AsWideChar( + PyObject *unicode, /* Unicode object */ + wchar_t *w, /* wchar_t buffer */ + Py_ssize_t size /* size of buffer */ + ); + +/* Convert the Unicode object to a wide character string. The output string + always ends with a nul character. If size is not NULL, write the number of + wide characters (excluding the null character) into *size. + + Returns a buffer allocated by PyMem_Malloc() (use PyMem_Free() to free it) + on success. On error, returns NULL, *size is undefined and raises a + MemoryError. */ + +PyAPI_FUNC(wchar_t*) PyUnicode_AsWideCharString( + PyObject *unicode, /* Unicode object */ + Py_ssize_t *size /* number of characters of the result */ + ); + +#endif + +/* --- Unicode ordinals --------------------------------------------------- */ + +/* Create a Unicode Object from the given Unicode code point ordinal. + + The ordinal must be in range(0x110000). A ValueError is + raised in case it is not. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_FromOrdinal(int ordinal); + +/* === Builtin Codecs ===================================================== + + Many of these APIs take two arguments encoding and errors. These + parameters encoding and errors have the same semantics as the ones + of the builtin str() API. + + Setting encoding to NULL causes the default encoding (UTF-8) to be used. + + Error handling is set by errors which may also be set to NULL + meaning to use the default handling defined for the codec. Default + error handling for all builtin codecs is "strict" (ValueErrors are + raised). + + The codecs all use a similar interface. Only deviation from the + generic ones are documented. + +*/ + +/* --- Manage the default encoding ---------------------------------------- */ + +/* Returns "utf-8". */ +PyAPI_FUNC(const char*) PyUnicode_GetDefaultEncoding(void); + +/* --- Generic Codecs ----------------------------------------------------- */ + +/* Create a Unicode object by decoding the encoded string s of the + given size. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Decode( + const char *s, /* encoded string */ + Py_ssize_t size, /* size of buffer */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Decode a Unicode object unicode and return the result as Python + object. + + This API is DEPRECATED. The only supported standard encoding is rot13. + Use PyCodec_Decode() to decode with rot13 and non-standard codecs + that decode from str. */ + +Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedObject( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Decode a Unicode object unicode and return the result as Unicode + object. + + This API is DEPRECATED. The only supported standard encoding is rot13. + Use PyCodec_Decode() to decode with rot13 and non-standard codecs + that decode from str to str. */ + +Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsDecodedUnicode( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a Unicode object and returns the result as Python + object. + + This API is DEPRECATED. It is superseded by PyUnicode_AsEncodedString() + since all standard encodings (except rot13) encode str to bytes. + Use PyCodec_Encode() for encoding with rot13 and non-standard codecs + that encode form str to non-bytes. */ + +Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedObject( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a Unicode object and returns the result as Python string + object. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Encodes a Unicode object and returns the result as Unicode + object. + + This API is DEPRECATED. The only supported standard encodings is rot13. + Use PyCodec_Encode() to encode with rot13 and non-standard codecs + that encode from str to str. */ + +Py_DEPRECATED(3.6) PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedUnicode( + PyObject *unicode, /* Unicode object */ + const char *encoding, /* encoding */ + const char *errors /* error handling */ + ); + +/* Build an encoding map. */ + +PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap( + PyObject* string /* 256 character map */ + ); + +/* --- UTF-7 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( + const char *string, /* UTF-7 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7Stateful( + const char *string, /* UTF-7 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +/* --- UTF-8 Codecs ------------------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8( + const char *string, /* UTF-8 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF8Stateful( + const char *string, /* UTF-8 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF8String( + PyObject *unicode /* Unicode object */ + ); + +/* Returns a pointer to the default encoding (UTF-8) of the + Unicode object unicode and the size of the encoded representation + in bytes stored in *size. + + In case of an error, no *size is set. + + This function caches the UTF-8 encoded string in the unicodeobject + and subsequent calls will return the same string. The memory is released + when the unicodeobject is deallocated. +*/ + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030A0000 +PyAPI_FUNC(const char *) PyUnicode_AsUTF8AndSize( + PyObject *unicode, + Py_ssize_t *size); +#endif + +/* --- UTF-32 Codecs ------------------------------------------------------ */ + +/* Decodes length bytes from a UTF-32 encoded buffer string and returns + the corresponding Unicode object. + + errors (if non-NULL) defines the error handling. It defaults + to "strict". + + If byteorder is non-NULL, the decoder starts decoding using the + given byte order: + + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + In native mode, the first four bytes of the stream are checked for a + BOM mark. If found, the BOM mark is analysed, the byte order + adjusted and the BOM skipped. In the other modes, no BOM mark + interpretation is done. After completion, *byteorder is set to the + current byte order at the end of input data. + + If byteorder is NULL, the codec starts in native order mode. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32( + const char *string, /* UTF-32 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF32Stateful( + const char *string, /* UTF-32 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder, /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +/* Returns a Python string using the UTF-32 encoding in native byte + order. The string always starts with a BOM mark. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF32String( + PyObject *unicode /* Unicode object */ + ); + +/* Returns a Python string object holding the UTF-32 encoded value of + the Unicode data. + + If byteorder is not 0, output is written according to the following + byte order: + + byteorder == -1: little endian + byteorder == 0: native byte order (writes a BOM mark) + byteorder == 1: big endian + + If byteorder is 0, the output string will always start with the + Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is + prepended. + +*/ + +/* --- UTF-16 Codecs ------------------------------------------------------ */ + +/* Decodes length bytes from a UTF-16 encoded buffer string and returns + the corresponding Unicode object. + + errors (if non-NULL) defines the error handling. It defaults + to "strict". + + If byteorder is non-NULL, the decoder starts decoding using the + given byte order: + + *byteorder == -1: little endian + *byteorder == 0: native order + *byteorder == 1: big endian + + In native mode, the first two bytes of the stream are checked for a + BOM mark. If found, the BOM mark is analysed, the byte order + adjusted and the BOM skipped. In the other modes, no BOM mark + interpretation is done. After completion, *byteorder is set to the + current byte order at the end of input data. + + If byteorder is NULL, the codec starts in native order mode. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16( + const char *string, /* UTF-16 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF16Stateful( + const char *string, /* UTF-16 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + int *byteorder, /* pointer to byteorder to use + 0=native;-1=LE,1=BE; updated on + exit */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +/* Returns a Python string using the UTF-16 encoding in native byte + order. The string always starts with a BOM mark. */ + +PyAPI_FUNC(PyObject*) PyUnicode_AsUTF16String( + PyObject *unicode /* Unicode object */ + ); + +/* --- Unicode-Escape Codecs ---------------------------------------------- */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeUnicodeEscape( + const char *string, /* Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsUnicodeEscapeString( + PyObject *unicode /* Unicode object */ + ); + +/* --- Raw-Unicode-Escape Codecs ------------------------------------------ */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeRawUnicodeEscape( + const char *string, /* Raw-Unicode-Escape encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsRawUnicodeEscapeString( + PyObject *unicode /* Unicode object */ + ); + +/* --- Latin-1 Codecs ----------------------------------------------------- + + Note: Latin-1 corresponds to the first 256 Unicode ordinals. */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeLatin1( + const char *string, /* Latin-1 encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsLatin1String( + PyObject *unicode /* Unicode object */ + ); + +/* --- ASCII Codecs ------------------------------------------------------- + + Only 7-bit ASCII data is expected. All other codes generate errors. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeASCII( + const char *string, /* ASCII encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsASCIIString( + PyObject *unicode /* Unicode object */ + ); + +/* --- Character Map Codecs ----------------------------------------------- + + This codec uses mappings to encode and decode characters. + + Decoding mappings must map byte ordinals (integers in the range from 0 to + 255) to Unicode strings, integers (which are then interpreted as Unicode + ordinals) or None. Unmapped data bytes (ones which cause a LookupError) + as well as mapped to None, 0xFFFE or '\ufffe' are treated as "undefined + mapping" and cause an error. + + Encoding mappings must map Unicode ordinal integers to bytes objects, + integers in the range from 0 to 255 or None. Unmapped character + ordinals (ones which cause a LookupError) as well as mapped to + None are treated as "undefined mapping" and cause an error. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeCharmap( + const char *string, /* Encoded string */ + Py_ssize_t length, /* size of string */ + PyObject *mapping, /* decoding mapping */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_AsCharmapString( + PyObject *unicode, /* Unicode object */ + PyObject *mapping /* encoding mapping */ + ); + +/* --- MBCS codecs for Windows -------------------------------------------- */ + +#ifdef MS_WINDOWS +PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCS( + const char *string, /* MBCS encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors /* error handling */ + ); + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeMBCSStateful( + const char *string, /* MBCS encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject*) PyUnicode_DecodeCodePageStateful( + int code_page, /* code page number */ + const char *string, /* encoded string */ + Py_ssize_t length, /* size of string */ + const char *errors, /* error handling */ + Py_ssize_t *consumed /* bytes consumed */ + ); +#endif + +PyAPI_FUNC(PyObject*) PyUnicode_AsMBCSString( + PyObject *unicode /* Unicode object */ + ); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +PyAPI_FUNC(PyObject*) PyUnicode_EncodeCodePage( + int code_page, /* code page number */ + PyObject *unicode, /* Unicode object */ + const char *errors /* error handling */ + ); +#endif + +#endif /* MS_WINDOWS */ + +/* --- Locale encoding --------------------------------------------------- */ + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +/* Decode a string from the current locale encoding. The decoder is strict if + *surrogateescape* is equal to zero, otherwise it uses the 'surrogateescape' + error handler (PEP 383) to escape undecodable bytes. If a byte sequence can + be decoded as a surrogate character and *surrogateescape* is not equal to + zero, the byte sequence is escaped using the 'surrogateescape' error handler + instead of being decoded. *str* must end with a null character but cannot + contain embedded null characters. */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocaleAndSize( + const char *str, + Py_ssize_t len, + const char *errors); + +/* Similar to PyUnicode_DecodeLocaleAndSize(), but compute the string + length using strlen(). */ + +PyAPI_FUNC(PyObject*) PyUnicode_DecodeLocale( + const char *str, + const char *errors); + +/* Encode a Unicode object to the current locale encoding. The encoder is + strict is *surrogateescape* is equal to zero, otherwise the + "surrogateescape" error handler is used. Return a bytes object. The string + cannot contain embedded null characters. */ + +PyAPI_FUNC(PyObject*) PyUnicode_EncodeLocale( + PyObject *unicode, + const char *errors + ); +#endif + +/* --- File system encoding ---------------------------------------------- */ + +/* ParseTuple converter: encode str objects to bytes using + PyUnicode_EncodeFSDefault(); bytes objects are output as-is. */ + +PyAPI_FUNC(int) PyUnicode_FSConverter(PyObject*, void*); + +/* ParseTuple converter: decode bytes objects to unicode using + PyUnicode_DecodeFSDefaultAndSize(); str objects are output as-is. */ + +PyAPI_FUNC(int) PyUnicode_FSDecoder(PyObject*, void*); + +/* Decode a null-terminated string from the Python filesystem encoding + and error handler. + + If the string length is known, use PyUnicode_DecodeFSDefaultAndSize(). */ +PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefault( + const char *s /* encoded string */ + ); + +/* Decode a string from the Python filesystem encoding and error handler. */ +PyAPI_FUNC(PyObject*) PyUnicode_DecodeFSDefaultAndSize( + const char *s, /* encoded string */ + Py_ssize_t size /* size */ + ); + +/* Encode a Unicode object to the Python filesystem encoding and error handler. + Return bytes. */ +PyAPI_FUNC(PyObject*) PyUnicode_EncodeFSDefault( + PyObject *unicode + ); + +/* --- Methods & Slots ---------------------------------------------------- + + These are capable of handling Unicode objects and strings on input + (we refer to them as strings in the descriptions) and return + Unicode objects or integers as appropriate. */ + +/* Concat two strings giving a new Unicode string. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Concat( + PyObject *left, /* Left string */ + PyObject *right /* Right string */ + ); + +/* Concat two strings and put the result in *pleft + (sets *pleft to NULL on error) */ + +PyAPI_FUNC(void) PyUnicode_Append( + PyObject **pleft, /* Pointer to left string */ + PyObject *right /* Right string */ + ); + +/* Concat two strings, put the result in *pleft and drop the right object + (sets *pleft to NULL on error) */ + +PyAPI_FUNC(void) PyUnicode_AppendAndDel( + PyObject **pleft, /* Pointer to left string */ + PyObject *right /* Right string */ + ); + +/* Split a string giving a list of Unicode strings. + + If sep is NULL, splitting will be done at all whitespace + substrings. Otherwise, splits occur at the given separator. + + At most maxsplit splits will be done. If negative, no limit is set. + + Separators are not included in the resulting list. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_Split( + PyObject *s, /* String to split */ + PyObject *sep, /* String separator */ + Py_ssize_t maxsplit /* Maxsplit count */ + ); + +/* Dito, but split at line breaks. + + CRLF is considered to be one line break. Line breaks are not + included in the resulting list. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Splitlines( + PyObject *s, /* String to split */ + int keepends /* If true, line end markers are included */ + ); + +/* Partition a string using a given separator. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Partition( + PyObject *s, /* String to partition */ + PyObject *sep /* String separator */ + ); + +/* Partition a string using a given separator, searching from the end of the + string. */ + +PyAPI_FUNC(PyObject*) PyUnicode_RPartition( + PyObject *s, /* String to partition */ + PyObject *sep /* String separator */ + ); + +/* Split a string giving a list of Unicode strings. + + If sep is NULL, splitting will be done at all whitespace + substrings. Otherwise, splits occur at the given separator. + + At most maxsplit splits will be done. But unlike PyUnicode_Split + PyUnicode_RSplit splits from the end of the string. If negative, + no limit is set. + + Separators are not included in the resulting list. + +*/ + +PyAPI_FUNC(PyObject*) PyUnicode_RSplit( + PyObject *s, /* String to split */ + PyObject *sep, /* String separator */ + Py_ssize_t maxsplit /* Maxsplit count */ + ); + +/* Translate a string by applying a character mapping table to it and + return the resulting Unicode object. + + The mapping table must map Unicode ordinal integers to Unicode strings, + Unicode ordinal integers or None (causing deletion of the character). + + Mapping tables may be dictionaries or sequences. Unmapped character + ordinals (ones which cause a LookupError) are left untouched and + are copied as-is. + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_Translate( + PyObject *str, /* String */ + PyObject *table, /* Translate table */ + const char *errors /* error handling */ + ); + +/* Join a sequence of strings using the given separator and return + the resulting Unicode string. */ + +PyAPI_FUNC(PyObject*) PyUnicode_Join( + PyObject *separator, /* Separator string */ + PyObject *seq /* Sequence object */ + ); + +/* Return 1 if substr matches str[start:end] at the given tail end, 0 + otherwise. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_Tailmatch( + PyObject *str, /* String */ + PyObject *substr, /* Prefix or Suffix string */ + Py_ssize_t start, /* Start index */ + Py_ssize_t end, /* Stop index */ + int direction /* Tail end: -1 prefix, +1 suffix */ + ); + +/* Return the first position of substr in str[start:end] using the + given search direction or -1 if not found. -2 is returned in case + an error occurred and an exception is set. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_Find( + PyObject *str, /* String */ + PyObject *substr, /* Substring to find */ + Py_ssize_t start, /* Start index */ + Py_ssize_t end, /* Stop index */ + int direction /* Find direction: +1 forward, -1 backward */ + ); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03030000 +/* Like PyUnicode_Find, but search for single character only. */ +PyAPI_FUNC(Py_ssize_t) PyUnicode_FindChar( + PyObject *str, + Py_UCS4 ch, + Py_ssize_t start, + Py_ssize_t end, + int direction + ); +#endif + +/* Count the number of occurrences of substr in str[start:end]. */ + +PyAPI_FUNC(Py_ssize_t) PyUnicode_Count( + PyObject *str, /* String */ + PyObject *substr, /* Substring to count */ + Py_ssize_t start, /* Start index */ + Py_ssize_t end /* Stop index */ + ); + +/* Replace at most maxcount occurrences of substr in str with replstr + and return the resulting Unicode object. */ + +PyAPI_FUNC(PyObject *) PyUnicode_Replace( + PyObject *str, /* String */ + PyObject *substr, /* Substring to find */ + PyObject *replstr, /* Substring to replace */ + Py_ssize_t maxcount /* Max. number of replacements to apply; + -1 = all */ + ); + +/* Compare two strings and return -1, 0, 1 for less than, equal, + greater than resp. + Raise an exception and return -1 on error. */ + +PyAPI_FUNC(int) PyUnicode_Compare( + PyObject *left, /* Left string */ + PyObject *right /* Right string */ + ); + +/* Compare a Unicode object with C string and return -1, 0, 1 for less than, + equal, and greater than, respectively. It is best to pass only + ASCII-encoded strings, but the function interprets the input string as + ISO-8859-1 if it contains non-ASCII characters. + This function does not raise exceptions. */ + +PyAPI_FUNC(int) PyUnicode_CompareWithASCIIString( + PyObject *left, + const char *right /* ASCII-encoded string */ + ); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000 +/* Compare a Unicode object with UTF-8 encoded C string. + Return 1 if they are equal, or 0 otherwise. + This function does not raise exceptions. */ + +PyAPI_FUNC(int) PyUnicode_EqualToUTF8(PyObject *, const char *); +PyAPI_FUNC(int) PyUnicode_EqualToUTF8AndSize(PyObject *, const char *, Py_ssize_t); +#endif + +/* Rich compare two strings and return one of the following: + + - NULL in case an exception was raised + - Py_True or Py_False for successful comparisons + - Py_NotImplemented in case the type combination is unknown + + Possible values for op: + + Py_GT, Py_GE, Py_EQ, Py_NE, Py_LT, Py_LE + +*/ + +PyAPI_FUNC(PyObject *) PyUnicode_RichCompare( + PyObject *left, /* Left string */ + PyObject *right, /* Right string */ + int op /* Operation: Py_EQ, Py_NE, Py_GT, etc. */ + ); + +/* Apply an argument tuple or dictionary to a format string and return + the resulting Unicode string. */ + +PyAPI_FUNC(PyObject *) PyUnicode_Format( + PyObject *format, /* Format string */ + PyObject *args /* Argument tuple or dictionary */ + ); + +/* Checks whether element is contained in container and return 1/0 + accordingly. + + element has to coerce to a one element Unicode string. -1 is + returned in case of an error. */ + +PyAPI_FUNC(int) PyUnicode_Contains( + PyObject *container, /* Container string */ + PyObject *element /* Element string */ + ); + +/* Checks whether argument is a valid identifier. */ + +PyAPI_FUNC(int) PyUnicode_IsIdentifier(PyObject *s); + +/* === Characters Type APIs =============================================== */ + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_UNICODEOBJECT_H +# include "cpython/unicodeobject.h" +# undef Py_CPYTHON_UNICODEOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_UNICODEOBJECT_H */ diff --git a/Include/warnings.h b/Include/warnings.h new file mode 100644 index 0000000000000000000000000000000000000000..18ac1543a3ca9e1d5a100badc221a14d61a1d58f --- /dev/null +++ b/Include/warnings.h @@ -0,0 +1,45 @@ +#ifndef Py_WARNINGS_H +#define Py_WARNINGS_H +#ifdef __cplusplus +extern "C" { +#endif + +PyAPI_FUNC(int) PyErr_WarnEx( + PyObject *category, + const char *message, /* UTF-8 encoded string */ + Py_ssize_t stack_level); + +PyAPI_FUNC(int) PyErr_WarnFormat( + PyObject *category, + Py_ssize_t stack_level, + const char *format, /* ASCII-encoded string */ + ...); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x03060000 +/* Emit a ResourceWarning warning */ +PyAPI_FUNC(int) PyErr_ResourceWarning( + PyObject *source, + Py_ssize_t stack_level, + const char *format, /* ASCII-encoded string */ + ...); +#endif + +PyAPI_FUNC(int) PyErr_WarnExplicit( + PyObject *category, + const char *message, /* UTF-8 encoded string */ + const char *filename, /* decoded from the filesystem encoding */ + int lineno, + const char *module, /* UTF-8 encoded string */ + PyObject *registry); + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_WARNINGS_H +# include "cpython/warnings.h" +# undef Py_CPYTHON_WARNINGS_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_WARNINGS_H */ + diff --git a/Include/weakrefobject.h b/Include/weakrefobject.h new file mode 100644 index 0000000000000000000000000000000000000000..a6e71eb178b124b236b0668ddda4fc27f5b64a7b --- /dev/null +++ b/Include/weakrefobject.h @@ -0,0 +1,46 @@ +/* Weak references objects for Python. */ + +#ifndef Py_WEAKREFOBJECT_H +#define Py_WEAKREFOBJECT_H +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _PyWeakReference PyWeakReference; + +PyAPI_DATA(PyTypeObject) _PyWeakref_RefType; +PyAPI_DATA(PyTypeObject) _PyWeakref_ProxyType; +PyAPI_DATA(PyTypeObject) _PyWeakref_CallableProxyType; + +#define PyWeakref_CheckRef(op) PyObject_TypeCheck((op), &_PyWeakref_RefType) +#define PyWeakref_CheckRefExact(op) \ + Py_IS_TYPE((op), &_PyWeakref_RefType) +#define PyWeakref_CheckProxy(op) \ + (Py_IS_TYPE((op), &_PyWeakref_ProxyType) \ + || Py_IS_TYPE((op), &_PyWeakref_CallableProxyType)) + +#define PyWeakref_Check(op) \ + (PyWeakref_CheckRef(op) || PyWeakref_CheckProxy(op)) + + +PyAPI_FUNC(PyObject *) PyWeakref_NewRef(PyObject *ob, + PyObject *callback); +PyAPI_FUNC(PyObject *) PyWeakref_NewProxy(PyObject *ob, + PyObject *callback); +Py_DEPRECATED(3.13) PyAPI_FUNC(PyObject *) PyWeakref_GetObject(PyObject *ref); + +#if !defined(Py_LIMITED_API) || Py_LIMITED_API+0 >= 0x030D0000 +PyAPI_FUNC(int) PyWeakref_GetRef(PyObject *ref, PyObject **pobj); +#endif + + +#ifndef Py_LIMITED_API +# define Py_CPYTHON_WEAKREFOBJECT_H +# include "cpython/weakrefobject.h" +# undef Py_CPYTHON_WEAKREFOBJECT_H +#endif + +#ifdef __cplusplus +} +#endif +#endif /* !Py_WEAKREFOBJECT_H */ diff --git a/reward_curve.png b/reward_curve.png new file mode 100644 index 0000000000000000000000000000000000000000..9aa011729699ed80354ac83ed85f98dfb819684d Binary files /dev/null and b/reward_curve.png differ diff --git a/src/rl/train_grpo.py b/src/rl/train_grpo.py index d2bddd4b23b95b5183e5745f78400d968da68d3d..82be3f1ad9490c13c451cafda019b2b5748ee6ef 100644 --- a/src/rl/train_grpo.py +++ b/src/rl/train_grpo.py @@ -29,10 +29,31 @@ def openenv_reward_func(prompts, completions, **kwargs): benign_samples = batch["benign"] env.reset(adv_samples, benign_samples) + if random.random() < 0.05: + logging.info(f"--- Sample Prompt ---\n{prompts[0]}\n---------------------") + logging.info(f"--- Sample Completion ---\n{completions[0][:200]}...\n-------------------------") + for comp in completions: + # Extract string if comp is a ChatML message list (e.g. [{"role": "assistant", "content": "..."}]) + if isinstance(comp, list): + if len(comp) > 0 and isinstance(comp[-1], dict) and "content" in comp[-1]: + comp_text = comp[-1]["content"] + else: + comp_text = str(comp) + else: + comp_text = str(comp) + + partial_reward = 0.0 + if '{' in comp_text: + partial_reward += 0.5 try: - clean_json = extract_and_clean_json(comp) + clean_json = extract_and_clean_json(comp_text) parsed_ast = json.loads(clean_json) + partial_reward += 1.0 # Valid JSON syntax + + if 'root' in parsed_ast or 'operator' in parsed_ast: + partial_reward += 2.0 # Has basic AST structure + # Validate AST ast_wrapper = GuardrailGraph.model_validate(parsed_ast) @@ -40,10 +61,13 @@ def openenv_reward_func(prompts, completions, **kwargs): action = Action(ast_json=clean_json) recall, fpr, syntax_error = env.step(action) r = r_engine.calculate(recall, fpr, syntax_error) - rewards.append(r) - except (json.JSONDecodeError, ValidationError): - # Massive negative reward for syntax errors - rewards.append(-10.0) + rewards.append(r + partial_reward) + except json.JSONDecodeError: + # Massive negative reward for syntax errors, but add partial + rewards.append(-10.0 + partial_reward) + except ValidationError: + # Valid JSON but invalid schema + rewards.append(-5.0 + partial_reward) return rewards @@ -80,9 +104,10 @@ def train(): dataset_cache.max_size = 10000 dataset_cache.ingest_dummy_data() # Which actually pulls the real HF datasets now - # Create simple dataset of prompts to trigger JSON AST generation - # Provide enough prompts for 250 steps with batch_size=4 and grad_accum=4 (4000 total) - prompts_data = [{"prompt": "You are a cyber security expert. Generate a JSON AST GuardrailGraph to block prompt injections but allow benign queries. Output strictly in ```json format."}] * 5000 + # Create simple dataset of prompts to trigger JSON AST generation using ChatML + system_prompt = "You are an autonomous Blue-Team engineer. Generate a highly constrained, Pydantic-validated JSON Guardrail Logic Graph to block prompt injections but allow benign queries. Output ONLY valid JSON inside ```json ... ``` blocks. Do not include conversational filler." + user_prompt = "Analyze the threat vectors and synthesize the GuardrailGraph now." + prompts_data = [{"prompt": [{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}]}] * 5000 train_dataset = Dataset.from_list(prompts_data) training_args = GRPOConfig( @@ -90,7 +115,10 @@ def train(): learning_rate=1e-5, per_device_train_batch_size=4, # Pushing 8GB VRAM to 95% util gradient_accumulation_steps=4, # Effective batch size 16 + num_generations=4, # Fix: Reduce from 8 to 4 to prevent OOM / Shared Memory Swapping max_steps=250, # 30-45 mins on RTX 4070 + max_completion_length=1024, # Fix: Prevent 256 token cutoff + max_prompt_length=512, logging_steps=1, save_steps=50, bf16=is_bfloat16_supported(), diff --git a/unsloth_compiled_cache/UnslothAlignPropTrainer.py b/unsloth_compiled_cache/UnslothAlignPropTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..51e1cfddffc3bfdf38c56fe4983cb53c28e00a13 --- /dev/null +++ b/unsloth_compiled_cache/UnslothAlignPropTrainer.py @@ -0,0 +1,848 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.alignprop_trainer import (Accelerator, AlignPropConfig, AlignPropTrainer, Any, Callable, DDPOStableDiffusionPipeline, Optional, Path, ProjectConfiguration, PyTorchModelHubMixin, Union, defaultdict, generate_model_card, get_comet_experiment_url, is_wandb_available, logger, logging, os, set_seed, textwrap, torch, warnings) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothAlignPropConfig(AlignPropConfig): + """ + +Configuration class for the [`AlignPropTrainer`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + exp_name (`str`, *optional*, defaults to `os.path.basename(sys.argv[0])[: -len(".py")]`): + Name of this experiment (defaults to the file name without the extension). + run_name (`str`, *optional*, defaults to `""`): + Name of this run. + seed (`int`, *optional*, defaults to `0`): + Random seed for reproducibility. + log_with (`str` or `None`, *optional*, defaults to `None`): + Log with either `"wandb"` or `"tensorboard"`. Check + [tracking](https://huggingface.co/docs/accelerate/usage_guides/tracking) for more details. + log_image_freq (`int`, *optional*, defaults to `1`): + Frequency for logging images. + tracker_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`): + Keyword arguments for the tracker (e.g., `wandb_project`). + accelerator_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`): + Keyword arguments for the accelerator. + project_kwargs (`dict[str, Any]`, *optional*, defaults to `{}`): + Keyword arguments for the accelerator project config (e.g., `logging_dir`). + tracker_project_name (`str`, *optional*, defaults to `"trl"`): + Name of project to use for tracking. + logdir (`str`, *optional*, defaults to `"logs"`): + Top-level logging directory for checkpoint saving. + num_epochs (`int`, *optional*, defaults to `100`): + Number of epochs to train. + save_freq (`int`, *optional*, defaults to `1`): + Number of epochs between saving model checkpoints. + num_checkpoint_limit (`int`, *optional*, defaults to `5`): + Number of checkpoints to keep before overwriting old ones. + mixed_precision (`str`, *optional*, defaults to `"fp16"`): + Mixed precision training. + allow_tf32 (`bool`, *optional*, defaults to `True`): + Allow `tf32` on Ampere GPUs. + resume_from (`str`, *optional*, defaults to `""`): + Path to resume training from a checkpoint. + sample_num_steps (`int`, *optional*, defaults to `50`): + Number of sampler inference steps. + sample_eta (`float`, *optional*, defaults to `1.0`): + Eta parameter for the DDIM sampler. + sample_guidance_scale (`float`, *optional*, defaults to `5.0`): + Classifier-free guidance weight. + train_batch_size (`int`, *optional*, defaults to `1`): + Batch size for training. + train_use_8bit_adam (`bool`, *optional*, defaults to `False`): + Whether to use the 8bit Adam optimizer from `bitsandbytes`. + train_learning_rate (`float`, *optional*, defaults to `1e-3`): + Learning rate. + train_adam_beta1 (`float`, *optional*, defaults to `0.9`): + Beta1 for Adam optimizer. + train_adam_beta2 (`float`, *optional*, defaults to `0.999`): + Beta2 for Adam optimizer. + train_adam_weight_decay (`float`, *optional*, defaults to `1e-4`): + Weight decay for Adam optimizer. + train_adam_epsilon (`float`, *optional*, defaults to `1e-8`): + Epsilon value for Adam optimizer. + train_gradient_accumulation_steps (`int`, *optional*, defaults to `1`): + Number of gradient accumulation steps. + train_max_grad_norm (`float`, *optional*, defaults to `1.0`): + Maximum gradient norm for gradient clipping. + negative_prompts (`str` or `None`, *optional*, defaults to `None`): + Comma-separated list of prompts to use as negative examples. + truncated_backprop_rand (`bool`, *optional*, defaults to `True`): + If `True`, randomized truncation to different diffusion timesteps is used. + truncated_backprop_timestep (`int`, *optional*, defaults to `49`): + Absolute timestep to which the gradients are backpropagated. Used only if `truncated_backprop_rand=False`. + truncated_rand_backprop_minmax (`tuple[int, int]`, *optional*, defaults to `(0, 50)`): + Range of diffusion timesteps for randomized truncated backpropagation. + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether to push the final model to the Hub. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + + def __init__( + self, + exp_name = 'train_grpo', + run_name = '', + seed = 3407, + log_with = None, + log_image_freq = 1, + tracker_project_name = 'trl', + logdir = 'logs', + num_epochs = 100, + save_freq = 1, + num_checkpoint_limit = 5, + mixed_precision = 'fp16', + allow_tf32 = True, + resume_from = '', + sample_num_steps = 50, + sample_eta = 1.0, + sample_guidance_scale = 5.0, + train_batch_size = 1, + train_use_8bit_adam = False, + train_learning_rate = 5e-05, + train_adam_beta1 = 0.9, + train_adam_beta2 = 0.999, + train_adam_weight_decay = 0.01, + train_adam_epsilon = 1e-08, + train_gradient_accumulation_steps = 2, + train_max_grad_norm = 1.0, + negative_prompts = None, + truncated_backprop_rand = True, + truncated_backprop_timestep = 49, + push_to_hub = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + + super().__init__( + exp_name = exp_name, + run_name = run_name, + seed = seed, + log_with = log_with, + log_image_freq = log_image_freq, + tracker_project_name = tracker_project_name, + logdir = logdir, + num_epochs = num_epochs, + save_freq = save_freq, + num_checkpoint_limit = num_checkpoint_limit, + mixed_precision = mixed_precision, + allow_tf32 = allow_tf32, + resume_from = resume_from, + sample_num_steps = sample_num_steps, + sample_eta = sample_eta, + sample_guidance_scale = sample_guidance_scale, + train_batch_size = train_batch_size, + train_use_8bit_adam = train_use_8bit_adam, + train_learning_rate = train_learning_rate, + train_adam_beta1 = train_adam_beta1, + train_adam_beta2 = train_adam_beta2, + train_adam_weight_decay = train_adam_weight_decay, + train_adam_epsilon = train_adam_epsilon, + train_gradient_accumulation_steps = train_gradient_accumulation_steps, + train_max_grad_norm = train_max_grad_norm, + negative_prompts = negative_prompts, + truncated_backprop_rand = truncated_backprop_rand, + truncated_backprop_timestep = truncated_backprop_timestep, + push_to_hub = push_to_hub,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + +pass + +class _UnslothAlignPropTrainer(PyTorchModelHubMixin): + """ + The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is + heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based + pipelines are supported + + Args: + config (`AlignPropConfig`): + Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details. + reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`): + Reward function to be used + prompt_function (`Callable[[], tuple[str, Any]]`): + Function to generate prompts to guide model + sd_pipeline (`DDPOStableDiffusionPipeline`): + Stable Diffusion pipeline to be used for training. + image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`): + Hook to be called to log images + """ + + _tag_names = ["trl", "alignprop"] + + def __init__( + self, + config: AlignPropConfig, + reward_function: Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor], + prompt_function: Callable[[], tuple[str, Any]], + sd_pipeline: DDPOStableDiffusionPipeline, + image_samples_hook: Optional[Callable[[Any, Any, Any], Any]] = None, + ): + warnings.warn( + "AlignPropTrainer is deprecated and will be removed in version 0.23.0.", + DeprecationWarning, + ) + if image_samples_hook is None: + logger.warning("No image_samples_hook provided; no images will be logged") + + self.prompt_fn = prompt_function + self.reward_fn = reward_function + self.config = config + self.image_samples_callback = image_samples_hook + + accelerator_project_config = ProjectConfiguration(**self.config.project_kwargs) + + if self.config.resume_from: + self.config.resume_from = os.path.normpath(os.path.expanduser(self.config.resume_from)) + if "checkpoint_" not in os.path.basename(self.config.resume_from): + # get the most recent checkpoint in this directory + checkpoints = list( + filter( + lambda x: "checkpoint_" in x, + os.listdir(self.config.resume_from), + ) + ) + if len(checkpoints) == 0: + raise ValueError(f"No checkpoints found in {self.config.resume_from}") + checkpoint_numbers = sorted([int(x.split("_")[-1]) for x in checkpoints]) + self.config.resume_from = os.path.join( + self.config.resume_from, + f"checkpoint_{checkpoint_numbers[-1]}", + ) + + accelerator_project_config.iteration = checkpoint_numbers[-1] + 1 + + self.accelerator = Accelerator( + log_with=self.config.log_with, + mixed_precision=self.config.mixed_precision, + project_config=accelerator_project_config, + # we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the + # number of *samples* we accumulate across, so we need to multiply by the number of training timesteps to get + # the total number of optimizer steps to accumulate across. + gradient_accumulation_steps=self.config.train_gradient_accumulation_steps, + **self.config.accelerator_kwargs, + ) + + is_using_tensorboard = config.log_with is not None and config.log_with == "tensorboard" + + if self.accelerator.is_main_process: + self.accelerator.init_trackers( + self.config.tracker_project_name, + config=dict(alignprop_trainer_config=config.to_dict()) + if not is_using_tensorboard + else config.to_dict(), + init_kwargs=self.config.tracker_kwargs, + ) + + logger.info(f"\n{config}") + + set_seed(self.config.seed, device_specific=True) + + self.sd_pipeline = sd_pipeline + + self.sd_pipeline.set_progress_bar_config( + position=1, + disable=not self.accelerator.is_local_main_process, + leave=False, + desc="Timestep", + dynamic_ncols=True, + ) + + # For mixed precision training we cast all non-trainable weights [vae, non-lora text_encoder and non-lora unet] to half-precision + # as these weights are only used for inference, keeping weights in full precision is not required. + if self.accelerator.mixed_precision == "fp16": + inference_dtype = torch.float16 + elif self.accelerator.mixed_precision == "bf16": + inference_dtype = torch.bfloat16 + else: + inference_dtype = torch.float32 + + self.sd_pipeline.vae.to(self.accelerator.device, dtype=inference_dtype) + self.sd_pipeline.text_encoder.to(self.accelerator.device, dtype=inference_dtype) + self.sd_pipeline.unet.to(self.accelerator.device, dtype=inference_dtype) + + trainable_layers = self.sd_pipeline.get_trainable_layers() + + self.accelerator.register_save_state_pre_hook(self._save_model_hook) + self.accelerator.register_load_state_pre_hook(self._load_model_hook) + + # Enable TF32 for faster training on Ampere GPUs, + # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices + if self.config.allow_tf32 and torch.cuda.is_available(): + torch.backends.cuda.matmul.allow_tf32 = True + + self.optimizer = self._setup_optimizer( + trainable_layers.parameters() if not isinstance(trainable_layers, list) else trainable_layers + ) + + self.neg_prompt_embed = self.sd_pipeline.text_encoder( + self.sd_pipeline.tokenizer( + [""] if self.config.negative_prompts is None else self.config.negative_prompts, + return_tensors="pt", + padding="max_length", + truncation=True, + max_length=self.sd_pipeline.tokenizer.model_max_length, + ).input_ids.to(self.accelerator.device) + )[0] + + # NOTE: for some reason, autocast is necessary for non-lora training but for lora training it isn't necessary and it uses + # more memory + self.autocast = self.sd_pipeline.autocast or self.accelerator.autocast + + if hasattr(self.sd_pipeline, "use_lora") and self.sd_pipeline.use_lora: + unet, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer) + self.trainable_layers = list(filter(lambda p: p.requires_grad, unet.parameters())) + else: + self.trainable_layers, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer) + + if config.resume_from: + logger.info(f"Resuming from {config.resume_from}") + self.accelerator.load_state(config.resume_from) + self.first_epoch = int(config.resume_from.split("_")[-1]) + 1 + else: + self.first_epoch = 0 + + def compute_rewards(self, prompt_image_pairs): + reward, reward_metadata = self.reward_fn( + prompt_image_pairs["images"], prompt_image_pairs["prompts"], prompt_image_pairs["prompt_metadata"] + ) + return reward + + def step(self, epoch: int, global_step: int): + """ + Perform a single step of training. + + Args: + epoch (int): The current epoch. + global_step (int): The current global step. + + Side Effects: + - Model weights are updated + - Logs the statistics to the accelerator trackers. + - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step, + and the accelerator tracker. + + Returns: + global_step (int): The updated global step. + """ + info = defaultdict(list) + + self.sd_pipeline.unet.train() + + for _ in range(self.config.train_gradient_accumulation_steps): + with self.accelerator.accumulate(self.sd_pipeline.unet), self.autocast(), torch.enable_grad(): + prompt_image_pairs = self._generate_samples( + batch_size=self.config.train_batch_size, + ) + + rewards = self.compute_rewards(prompt_image_pairs) + + prompt_image_pairs["rewards"] = rewards + + rewards_vis = self.accelerator.gather(rewards).detach().cpu().numpy() + + loss = self.calculate_loss(rewards) + + self.accelerator.backward(loss) + + if self.accelerator.sync_gradients: + self.accelerator.clip_grad_norm_( + self.trainable_layers.parameters() + if not isinstance(self.trainable_layers, list) + else self.trainable_layers, + self.config.train_max_grad_norm, + ) + + self.optimizer.step() + self.optimizer.zero_grad() + + info["reward_mean"].append(rewards_vis.mean()) + info["reward_std"].append(rewards_vis.std()) + info["loss"].append(loss.item()) + + # Checks if the accelerator has performed an optimization step behind the scenes + if self.accelerator.sync_gradients: + # log training-related stuff + info = {k: torch.mean(torch.tensor(v)) for k, v in info.items()} + info = self.accelerator.reduce(info, reduction="mean") + info.update({"epoch": epoch}) + self.accelerator.log(info, step=global_step) + global_step += 1 + info = defaultdict(list) + else: + raise ValueError( + "Optimization step should have been performed by this point. Please check calculated gradient accumulation settings." + ) + # Logs generated images + if self.image_samples_callback is not None and global_step % self.config.log_image_freq == 0: + self.image_samples_callback(prompt_image_pairs, global_step, self.accelerator.trackers[0]) + + if epoch != 0 and epoch % self.config.save_freq == 0 and self.accelerator.is_main_process: + self.accelerator.save_state() + + return global_step + + def calculate_loss(self, rewards): + """ + Calculate the loss for a batch of an unpacked sample + + Args: + rewards (torch.Tensor): + Differentiable reward scalars for each generated image, shape: [batch_size] + + Returns: + loss (torch.Tensor) (all of these are of shape (1,)) + """ + # Loss is specific to Aesthetic Reward function used in AlignProp (https://huggingface.co/papers/2310.03739) + loss = 10.0 - (rewards).mean() + return loss + + def loss( + self, + advantages: torch.Tensor, + clip_range: float, + ratio: torch.Tensor, + ): + unclipped_loss = -advantages * ratio + clipped_loss = -advantages * torch.clamp( + ratio, + 1.0 - clip_range, + 1.0 + clip_range, + ) + return torch.mean(torch.maximum(unclipped_loss, clipped_loss)) + + def _setup_optimizer(self, trainable_layers_parameters): + if self.config.train_use_8bit_adam: + import bitsandbytes + + optimizer_cls = bitsandbytes.optim.AdamW8bit + else: + optimizer_cls = torch.optim.AdamW + + return optimizer_cls( + trainable_layers_parameters, + lr=self.config.train_learning_rate, + betas=(self.config.train_adam_beta1, self.config.train_adam_beta2), + weight_decay=self.config.train_adam_weight_decay, + eps=self.config.train_adam_epsilon, + ) + + def _save_model_hook(self, models, weights, output_dir): + self.sd_pipeline.save_checkpoint(models, weights, output_dir) + weights.pop() # ensures that accelerate doesn't try to handle saving of the model + + def _load_model_hook(self, models, input_dir): + self.sd_pipeline.load_checkpoint(models, input_dir) + models.pop() # ensures that accelerate doesn't try to handle loading of the model + + def _generate_samples(self, batch_size, with_grad=True, prompts=None): + """ + Generate samples from the model + + Args: + batch_size (int): Batch size to use for sampling + with_grad (bool): Whether the generated RGBs should have gradients attached to it. + prompts (list[str], *optional*): If provided, use these prompts instead of generating new ones. + + Returns: + prompt_image_pairs (dict[Any]) + """ + prompt_image_pairs = {} + + sample_neg_prompt_embeds = self.neg_prompt_embed.repeat(batch_size, 1, 1) + + if prompts is None: + prompts, prompt_metadata = zip(*[self.prompt_fn() for _ in range(batch_size)]) + else: + prompt_metadata = [{} for _ in range(batch_size)] + + prompt_ids = self.sd_pipeline.tokenizer( + prompts, + return_tensors="pt", + padding="max_length", + truncation=True, + max_length=self.sd_pipeline.tokenizer.model_max_length, + ).input_ids.to(self.accelerator.device) + + prompt_embeds = self.sd_pipeline.text_encoder(prompt_ids)[0] + + if with_grad: + sd_output = self.sd_pipeline.rgb_with_grad( + prompt_embeds=prompt_embeds, + negative_prompt_embeds=sample_neg_prompt_embeds, + num_inference_steps=self.config.sample_num_steps, + guidance_scale=self.config.sample_guidance_scale, + eta=self.config.sample_eta, + truncated_backprop_rand=self.config.truncated_backprop_rand, + truncated_backprop_timestep=self.config.truncated_backprop_timestep, + truncated_rand_backprop_minmax=self.config.truncated_rand_backprop_minmax, + output_type="pt", + ) + else: + sd_output = self.sd_pipeline( + prompt_embeds=prompt_embeds, + negative_prompt_embeds=sample_neg_prompt_embeds, + num_inference_steps=self.config.sample_num_steps, + guidance_scale=self.config.sample_guidance_scale, + eta=self.config.sample_eta, + output_type="pt", + ) + + images = sd_output.images + + prompt_image_pairs["images"] = images + prompt_image_pairs["prompts"] = prompts + prompt_image_pairs["prompt_metadata"] = prompt_metadata + + return prompt_image_pairs + + def train(self, epochs: Optional[int] = None): + """ + Train the model for a given number of epochs + """ + global_step = 0 + if epochs is None: + epochs = self.config.num_epochs + for epoch in range(self.first_epoch, epochs): + global_step = self.step(epoch, global_step) + + def _save_pretrained(self, save_directory): + self.sd_pipeline.save_pretrained(save_directory) + self.create_model_card() + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{prabhudesai2024aligning, + title = {{Aligning Text-to-Image Diffusion Models with Reward Backpropagation}}, + author = {Mihir Prabhudesai and Anirudh Goyal and Deepak Pathak and Katerina Fragkiadaki}, + year = 2024, + eprint = {arXiv:2310.03739} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="AlignProp", + trainer_citation=citation, + paper_title="Aligning Text-to-Image Diffusion Models with Reward Backpropagation", + paper_id="2310.03739", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothAlignPropTrainer(_UnslothAlignPropTrainer): + """ + +The AlignPropTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is +heavily inspired by the work here: https://github.com/mihirp1998/AlignProp/ As of now only Stable Diffusion based +pipelines are supported + +Args: + config (`AlignPropConfig`): + Configuration object for AlignPropTrainer. Check the documentation of `PPOConfig` for more details. + reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`): + Reward function to be used + prompt_function (`Callable[[], tuple[str, Any]]`): + Function to generate prompts to guide model + sd_pipeline (`DDPOStableDiffusionPipeline`): + Stable Diffusion pipeline to be used for training. + image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`): + Hook to be called to log images + + """ + def __init__( + self, + config, + reward_function, + prompt_function, + sd_pipeline, + image_samples_hook = None, + **kwargs + ): + if args is None: args = UnslothAlignPropConfig() + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('alignprop_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + config = config, + reward_function = reward_function, + prompt_function = prompt_function, + sd_pipeline = sd_pipeline, + image_samples_hook = image_samples_hook,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothBCOTrainer.py b/unsloth_compiled_cache/UnslothBCOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..610c6ecae52a3912eff70ca8049ae9b6b4b8588c --- /dev/null +++ b/unsloth_compiled_cache/UnslothBCOTrainer.py @@ -0,0 +1,2095 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.bco_trainer import (Any, AutoModelForCausalLM, BCOConfig, BCOTrainer, BaseImageProcessor, CLF_NAME, Callable, DPODataCollatorWithPadding, DataCollator, DataLoader, Dataset, EvalLoopOutput, F, FeatureExtractionMixin, Literal, Optional, PartialState, Path, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, RUNNING_NAME, RunningMoments, SequentialSampler, Trainer, TrainerCallback, TrainingArguments, Union, _process_tokens, _tokenize, autocast, contextmanager, create_reference_model, defaultdict, disable_dropout_in_model, generate_model_card, get_comet_experiment_url, has_length, inspect, is_comet_available, is_joblib_available, is_peft_available, is_sklearn_available, is_wandb_available, itemgetter, log_table_to_comet_experiment, logger, logging, maybe_apply_chat_template, maybe_extract_prompt, maybe_unpair_preference_dataset, nn, np, nullcontext, os, pad_to_length, pd, peft_module_casting_to_bf16, prepare_deepspeed, prepare_model_for_kbit_training, random, selective_log_softmax, textwrap, torch, tqdm, F, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothBCOConfig(BCOConfig): + """ + +Configuration class for the [`BCOTrainer`]. + +This class includes only the parameters that are specific to BCO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want + to use the default data collator. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. This argument is required if you want to use the default data collator. + max_completion_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the completion. This argument is required if you want to use the default data collator + and your model is an encoder-decoder. + beta (`float`, *optional*, defaults to `0.1`): + Parameter controlling the deviation from the reference model. Higher β means less deviation from the + reference model. + label_pad_token_id (`int`, *optional*, defaults to `-100`): + Label pad token id. This argument is required if you want to use the default data collator. + padding_value (`int` or `None`, *optional*, defaults to `None`): + Padding value to use. If `None`, the padding value of the tokenizer is used. + truncation_mode (`str`, *optional*, defaults to `"keep_end"`): + Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`. + This argument is required if you want to use the default data collator. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model and reference model. + generate_during_eval (`bool`, *optional*, defaults to `False`): + If `True`, generates and logs completions from both the model and the reference model to W&B or Comet + during evaluation. + is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`): + When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument, + you need to specify if the model returned by the callable is an encoder-decoder model. + precompute_ref_log_probs (`bool`, *optional*, defaults to `False`): + Whether to precompute reference model log probabilities for training and evaluation datasets. This is + useful when training without the reference model to reduce the total GPU memory needed. + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a + string. + ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model + from a string. + dataset_num_proc (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + prompt_sample_size (`int`, *optional*, defaults to `1024`): + Number of prompts that are fed to density ratio classifier. + min_density_ratio (`float`, *optional*, defaults to `0.5`): + Minimum value of the density ratio. The estimated density ratio is clamped to this value. + max_density_ratio (`float`, *optional*, defaults to `10.0`): + Maximum value of the density ratio. The estimated density ratio is clamped to this value. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + max_length = 1024, + max_prompt_length = 512, + max_completion_length = None, + beta = 0.1, + label_pad_token_id = -100, + padding_value = None, + truncation_mode = 'keep_end', + disable_dropout = True, + generate_during_eval = False, + is_encoder_decoder = None, + precompute_ref_log_probs = False, + model_init_kwargs = None, + ref_model_init_kwargs = None, + dataset_num_proc = None, + prompt_sample_size = 1024, + min_density_ratio = 0.5, + max_density_ratio = 10.0, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + max_length = max_length, + max_prompt_length = max_prompt_length, + max_completion_length = max_completion_length, + beta = beta, + label_pad_token_id = label_pad_token_id, + padding_value = padding_value, + truncation_mode = truncation_mode, + disable_dropout = disable_dropout, + generate_during_eval = generate_during_eval, + is_encoder_decoder = is_encoder_decoder, + precompute_ref_log_probs = precompute_ref_log_probs, + model_init_kwargs = model_init_kwargs, + ref_model_init_kwargs = ref_model_init_kwargs, + dataset_num_proc = dataset_num_proc, + prompt_sample_size = prompt_sample_size, + min_density_ratio = min_density_ratio, + max_density_ratio = max_density_ratio,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothBCOTrainer(Trainer): + r""" + Initialize BCOTrainer from [BCO](https://huggingface.co/papers/2404.04656) paper. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + args (`BCOConfig`): + The arguments to use for training. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + data_collator (`transformers.DataCollator`, *optional*, defaults to `None`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + model_adapter_name (`str`, defaults to `None`): + Name of the train target PEFT adapter, when using LoRA with multiple adapters. + ref_adapter_name (`str`, defaults to `None`): + Name of the reference PEFT adapter, when using LoRA with multiple adapters. + """ + + _tag_names = ["trl", "bco"] + + def __init__( + self, + model: Union[PreTrainedModel, nn.Module, str] = None, + ref_model: Optional[Union[PreTrainedModel, nn.Module, str]] = None, + args: BCOConfig = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + data_collator: Optional[DataCollator] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional[dict] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None, + model_adapter_name: Optional[str] = None, + ref_adapter_name: Optional[str] = None, + embedding_func: Optional[Callable] = None, + embedding_tokenizer: Optional[PreTrainedTokenizerBase] = None, + ): + if embedding_func is not None and not (is_sklearn_available() and is_joblib_available()): + raise ImportError( + "BCOTrainer with UDM requires the scikit-learn and joblib libraries. Please install it with `pip install scikit-learn joblib`." + ) + + if type(args) is TrainingArguments: + raise ValueError("Please use `BCOConfig` instead `TrainingArguments`.") + + if not isinstance(model, str) and model is not None and ref_model is model: + raise ValueError( + "`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the " + "same as `model`, you must mass a copy of it, or `None` if you use peft." + ) + + if args.model_init_kwargs is None: + model_init_kwargs = {} + elif not isinstance(model, str): + raise ValueError("You passed model_kwargs to the BCOTrainer. But your model is already instantiated.") + else: + model_init_kwargs = args.model_init_kwargs + dtype = model_init_kwargs.get("dtype") + if dtype is not None: + # Convert to `torch.dtype` if an str is passed + if isinstance(dtype, str) and dtype != "auto": + dtype = getattr(torch, dtype) + if dtype != "auto" and not isinstance(dtype, torch.dtype): + raise ValueError( + f"Invalid `dtype` passed to the BCOConfig. Expected a string with either `torch.dtype` or 'auto', but got {dtype}." + ) + model_init_kwargs["dtype"] = dtype + + if args.ref_model_init_kwargs is None: + ref_model_init_kwargs = {} + elif not isinstance(ref_model, str): + raise ValueError( + "You passed ref_model_kwargs to the BCOTrainer. But your ref_model is already instantiated." + ) + else: + ref_model_init_kwargs = args.ref_model_init_kwargs + dtype = ref_model_init_kwargs.get("dtype") + if dtype is not None: + # Convert to `torch.dtype` if an str is passed + if isinstance(dtype, str) and dtype != "auto": + dtype = getattr(torch, dtype) + if dtype != "auto" and not isinstance(dtype, torch.dtype): + raise ValueError( + f"Invalid `dtype` passed to the BCOConfig. Expected a string with either `torch.dtype` or 'auto', but got {dtype}." + ) + ref_model_init_kwargs["dtype"] = dtype + + if isinstance(model, str): + model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs) + + if isinstance(ref_model, str): + ref_model = AutoModelForCausalLM.from_pretrained(ref_model, **ref_model_init_kwargs) + + # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` + # has been called in order to properly call autocast if needed. + self._peft_has_been_casted_to_bf16 = False + + if not is_peft_available() and peft_config is not None: + raise ValueError( + "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it with `pip install peft` to use the PEFT models" + ) + elif is_peft_available() and peft_config is not None: + # if model is a peft model and we have a peft_config, we merge and unload it first + if isinstance(model, PeftModel): + model = model.merge_and_unload() + + if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): + _support_gc_kwargs = hasattr( + args, "gradient_checkpointing_kwargs" + ) and "gradient_checkpointing_kwargs" in list( + inspect.signature(prepare_model_for_kbit_training).parameters + ) + + prepare_model_kwargs = {"use_gradient_checkpointing": args.gradient_checkpointing} + + if _support_gc_kwargs: + prepare_model_kwargs["gradient_checkpointing_kwargs"] = args.gradient_checkpointing_kwargs + + model = prepare_model_for_kbit_training(model, **prepare_model_kwargs) + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + # get peft model with the given config + model = model + if args.bf16 and getattr(model, "is_loaded_in_4bit", False): + peft_module_casting_to_bf16(model) + # If args.bf16 we need to explicitly call `generate` with torch amp autocast context manager + self._peft_has_been_casted_to_bf16 = True + + # For models that use gradient_checkpointing, we need to attach a hook that enables input + # to explicitly have `requires_grad=True`, otherwise training will either silently + # fail or completely fail. + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + if args.generate_during_eval and not (is_wandb_available() or is_comet_available()): + raise ValueError( + "`generate_during_eval=True` requires Weights and Biases or Comet to be installed." + " Please install `wandb` or `comet-ml` to resolve." + ) + + if model is not None: + self.is_encoder_decoder = model.config.is_encoder_decoder + elif args.is_encoder_decoder is None: + raise ValueError("When no model is provided, you need to pass the parameter is_encoder_decoder.") + else: + self.is_encoder_decoder = args.is_encoder_decoder + + self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) + self.model_adapter_name = model_adapter_name + self.ref_adapter_name = ref_adapter_name + + if ref_model: + self.ref_model = ref_model + elif self.is_peft_model or args.precompute_ref_log_probs: + # The `model` with adapters turned off will be used as the reference model + self.ref_model = None + else: + self.ref_model = create_reference_model(model) + + if processing_class is None: + raise ValueError( + "max_length or a processing_class must be specified when using the default DPODataCollatorWithPadding" + ) + if args.max_length is None: + logger.warning( + "When using DPODataCollatorWithPadding, you should set `max_length` in the `BCOConfig`. " + "It will be set to `512` by default, but you should do it yourself in the future.", + ) + max_length = 512 + if args.max_length is not None: + max_length = args.max_length + + if args.max_prompt_length is None: + logger.warning( + "When using DPODataCollatorWithPadding, you should set `max_prompt_length` in the `BCOConfig`. " + "It will be set to `128` by default, but you should do it yourself in the future.", + ) + max_prompt_length = 128 + if args.max_prompt_length is not None: + max_prompt_length = args.max_prompt_length + + max_completion_length = None + if args.max_completion_length is None and self.is_encoder_decoder: + logger.warning( + "When using DPODataCollatorWithPadding with an encoder decoder architecture, you should set `max_completion_length` in the BCOTrainer's init" + " it will be set to `128` by default, but you should do it yourself in the future.", + ) + max_completion_length = 128 + if args.max_completion_length is not None and self.is_encoder_decoder: + max_completion_length = args.max_completion_length + + if data_collator is None: + data_collator = DPODataCollatorWithPadding( + pad_token_id=processing_class.pad_token_id, + label_pad_token_id=args.label_pad_token_id, + is_encoder_decoder=self.is_encoder_decoder, + ) + + if args.remove_unused_columns: + args.remove_unused_columns = False + # warn users + logger.warning( + "When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your BCOConfig" + " we have set it for you, but you should do it yourself in the future.", + ) + + self.use_dpo_data_collator = True + else: + self.use_dpo_data_collator = False + + # Disable dropout in the model and reference model + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) + + self.max_length = max_length + self.generate_during_eval = args.generate_during_eval + self.label_pad_token_id = args.label_pad_token_id + self.padding_value = args.padding_value if args.padding_value is not None else processing_class.pad_token_id + self.max_prompt_length = max_prompt_length + self.truncation_mode = args.truncation_mode + self.max_completion_length = max_completion_length + self.precompute_ref_log_probs = args.precompute_ref_log_probs + + # Since ref_logs are precomputed on the first call to get_train/eval_dataloader + # keep track of first called to avoid computation of future calls + self._precomputed_train_ref_log_probs = False + self._precomputed_eval_ref_log_probs = False + + # metric + self._stored_metrics = defaultdict(lambda: defaultdict(list)) + + # BCO parameter + self.beta = args.beta + self.aux_loss_enabled = getattr(model.config, "output_router_logits", False) + self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0) + if self.aux_loss_enabled and self.aux_loss_coef == 0.0: + logger.warning( + "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to " + "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value " + "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary " + "loss.", + ) + + # Underlying Distribution Matching argument + self.embedding_func = embedding_func + self.embedding_tokenizer = embedding_tokenizer + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in BCO, the sampled data does not include the + # "input_ids" key. Instead, the available keys are "prompt_input_ids" and "completion_input_ids". As a result, + # the trainer issues the warning: "Could not estimate the number of tokens of the input, floating-point + # operations will not be computed." To suppress this warning, we set the "estimate_tokens" key in the model's + # "warnings_issued" dictionary to True. This acts as a flag to indicate that the warning has already been + # issued. + model.warnings_issued["estimate_tokens"] = True + + with PartialState().main_process_first(): + # Extract the prompt if needed + train_dataset = train_dataset.map( + maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from train dataset" + ) + # Unpair the dataset if needed + train_dataset = maybe_unpair_preference_dataset( + train_dataset, args.dataset_num_proc, desc="Unpairing train dataset" + ) + # Apply the chat template if needed + train_dataset = train_dataset.map( + maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}, num_proc=args.dataset_num_proc + ) + if eval_dataset is not None: + # Extract the prompt if needed + eval_dataset = eval_dataset.map( + maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from eval dataset" + ) + # Unpair the dataset if needed + eval_dataset = maybe_unpair_preference_dataset( + eval_dataset, args.dataset_num_proc, desc="Unpairing eval dataset" + ) + eval_dataset = eval_dataset.map( + maybe_apply_chat_template, + fn_kwargs={"tokenizer": processing_class}, + num_proc=args.dataset_num_proc, + ) + + # Tokenize and prepare the training datasets + train_dataset = train_dataset.map( + _tokenize, + batched=True, + fn_kwargs={"tokenizer": processing_class, "embedding_tokenizer": self.embedding_tokenizer}, + num_proc=args.dataset_num_proc, + desc="Tokenizing train dataset", + ) + + # Prepare the datasets + fn_kwargs = { + "prefix": "", + "is_encoder_decoder": self.is_encoder_decoder, + "tokenizer": processing_class, + "max_length": self.max_length, + "truncation_mode": self.truncation_mode, + "label_pad_token_id": self.label_pad_token_id, + "max_prompt_length": self.max_prompt_length, + "max_completion_length": self.max_completion_length, + } + train_dataset = train_dataset.map( + _process_tokens, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + desc="Processing tokenized train dataset", + ) + + if eval_dataset is not None: + # Tokenize + eval_dataset = eval_dataset.map( + _tokenize, + fn_kwargs={"tokenizer": processing_class, "embedding_tokenizer": self.embedding_tokenizer}, + batched=True, + num_proc=args.dataset_num_proc, + desc="Tokenizing eval dataset", + ) + + # Process + fn_kwargs = { + "prefix": "", + "is_encoder_decoder": self.is_encoder_decoder, + "tokenizer": processing_class, + "max_length": self.max_length, + "truncation_mode": self.truncation_mode, + "label_pad_token_id": self.label_pad_token_id, + "max_prompt_length": self.max_prompt_length, + "max_completion_length": self.max_completion_length, + } + eval_dataset = eval_dataset.map( + _process_tokens, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + desc="Processing tokenized eval dataset", + ) + + desirable = train_dataset.filter( + lambda x: x["label"], num_proc=args.dataset_num_proc, desc="Filtering desirable examples" + ) + undesirable = train_dataset.filter( + lambda x: not x["label"], num_proc=args.dataset_num_proc, desc="Filtering undesirable examples" + ) + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + model_init=model_init, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) + + # Deepspeed Zero-3 does not support precompute_ref_log_probs + if self.is_deepspeed_enabled: + if self.accelerator.state.deepspeed_plugin.zero_stage == 3 and self.precompute_ref_log_probs: + raise ValueError( + "You cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`." + ) + + if self.ref_model is None: + if not (self.is_peft_model or self.precompute_ref_log_probs): + raise ValueError( + "No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`" + ) + else: + if self.is_deepspeed_enabled: + self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator) + else: + self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + + self.running = RunningMoments(accelerator=self.accelerator) + + if self.embedding_func is None or args.resume_from_checkpoint: + return + + chosen_embeddings = self._get_sample_prompt_embeddings(desirable, sample_size=self.args.prompt_sample_size) + rejected_embeddings = self._get_sample_prompt_embeddings(undesirable, sample_size=self.args.prompt_sample_size) + + embeddings = torch.cat((chosen_embeddings, rejected_embeddings), dim=0) + labels = torch.cat( + (torch.ones_like(chosen_embeddings[:, 0]), torch.zeros_like(rejected_embeddings[:, 0])), dim=0 + ) + + self.clf = LogisticRegression(class_weight="balanced").fit( + embeddings.cpu().float().numpy(), labels.cpu().numpy() + ) + chosen_mean = self.clf.score( + chosen_embeddings.cpu().float().numpy(), torch.ones_like(chosen_embeddings[:, 0]).cpu().numpy() + ) + rejected_mean = self.clf.score( + rejected_embeddings.cpu().float().numpy(), torch.zeros_like(rejected_embeddings[:, 0]).cpu().numpy() + ) + logger.info(f"UDM classifier training scores: chosen: {chosen_mean}, rejected: {rejected_mean}") + + @property + def match_underlying_distribution(self): + return self.embedding_func is not None and self.embedding_tokenizer is not None + + def _get_chosen_prob(self, prompt_embeddings: torch.FloatTensor) -> torch.FloatTensor: + """ + Calculates the probability if the given prompt embedding is from desirable dataset. This function calculates + the probability in the process and ensemble across processes. + """ + dtype = prompt_embeddings.dtype + device = prompt_embeddings.device + rank = self.accelerator.process_index + + padded_prompt_embeddings = self.accelerator.pad_across_processes( + prompt_embeddings, pad_index=self.embedding_tokenizer.pad_token_id + ) + sample_size = padded_prompt_embeddings.shape[0] + nonzero = padded_prompt_embeddings.mean(dim=1) != self.embedding_tokenizer.pad_token_id + prompt_embeddings = self.accelerator.gather(padded_prompt_embeddings) + + # cannot predict for all empty values + if prompt_embeddings.shape[0] == 0: + return torch.tensor([], device=device, dtype=dtype) + + prob = self.clf.predict_proba(prompt_embeddings.cpu().float().numpy())[:, 1] + prob = torch.as_tensor(prob, dtype=dtype, device=device) + prob = self.accelerator.reduce(prob, reduction="mean") + + prob = prob[sample_size * rank : sample_size * (rank + 1)] + prob = prob[nonzero] + + return prob + + def _vectorize_prompt(self, input_ids: torch.LongTensor, attention_mask: torch.LongTensor) -> torch.FloatTensor: + """ + Replaces processing_class.pad_token_id to embedding_tokenizer.pad_token_id and applies self.embedding_func + """ + input_ids = torch.where( + input_ids == self.processing_class.pad_token_id, + self.embedding_tokenizer.pad_token_id, + input_ids, + ) + + with torch.no_grad(): + embeddings = self.embedding_func( + input_ids=input_ids, + attention_mask=attention_mask, + ) + + return embeddings + + def _get_prompt_embeddings( + self, batch: dict[str, Union[list, torch.LongTensor]] + ) -> tuple[torch.FloatTensor, torch.FloatTensor]: + """Extract embeddings from frozen embedding model""" + + if not self.match_underlying_distribution: + return None, None + + embeddings = self._vectorize_prompt( + input_ids=batch["embedding_input_ids"], + attention_mask=batch["embedding_attention_mask"], + ) + + labels = torch.tensor(batch["label"], dtype=torch.bool, device=embeddings.device) + chosen_idx = torch.where(labels)[0] + rejected_idx = torch.where(~labels)[0] + + chosen_embeddings = embeddings[chosen_idx, ...] + rejected_embeddings = embeddings[rejected_idx, ...] + + return (chosen_embeddings, rejected_embeddings) + + def _get_sample_prompt_embeddings(self, dataset: Dataset, sample_size: int = 512) -> torch.FloatTensor: + """ + Sample instances from dataset and get prompt embeddings. Used for density ratio classifier training. + """ + n_samples = min(len(dataset), sample_size) + rand_indices = np.random.choice(len(dataset), size=(n_samples,)) + + embedding_dataset = dataset.select(rand_indices) + + dataloader_params = { + "batch_size": self.args.per_device_train_batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(embedding_dataset, **dataloader_params)) + + with torch.no_grad(): + all_embeddings = torch.empty(0) + for padded_batch in tqdm(iterable=data_loader, desc="Building sample prompt embeddings"): + embeddings = self._vectorize_prompt( + input_ids=padded_batch["embedding_input_ids"], + attention_mask=padded_batch["embedding_attention_mask"], + ) + embeddings = self.accelerator.gather_for_metrics(embeddings) + all_embeddings = torch.cat((all_embeddings, embeddings.cpu())) + + return all_embeddings + + def _save_optimizer_and_scheduler(self, output_dir): + output_dir = output_dir if output_dir is not None else self.args.output_dir + super()._save_optimizer_and_scheduler(output_dir) + + if self.accelerator.is_main_process: + # When saving optimizer and scheduler to checkpoint, save also the running delta object. + self.running.save_to_json(os.path.join(output_dir, RUNNING_NAME)) + + if self.match_underlying_distribution: + joblib.dump(self.clf, os.path.join(output_dir, CLF_NAME), compress=True) + + def _load_optimizer_and_scheduler(self, checkpoint): + if checkpoint is None: + logger.warning_once(f"Missing Checkpoint {checkpoint}") + return + + super()._load_optimizer_and_scheduler(checkpoint) + + # when loading optimizer and scheduler from checkpoint, also load the running delta object. + running_file = os.path.join(checkpoint, RUNNING_NAME) + if os.path.isfile(running_file): + self.running = RunningMoments.load_from_json(self.accelerator, running_file) + + if self.match_underlying_distribution: + clf_file = os.path.join(checkpoint, CLF_NAME) + if os.path.isfile(clf_file): + self.clf = joblib.load(clf_file) + + @contextmanager + def null_ref_context(self): + """Context manager for handling null reference model (that is, peft adapter manipulation).""" + with ( + self.accelerator.unwrap_model(self.model).disable_adapter() + if self.is_peft_model and not self.ref_adapter_name + else nullcontext() + ): + if self.ref_adapter_name: + self.model.set_adapter(self.ref_adapter_name) + yield + if self.ref_adapter_name: + self.model.set_adapter(self.model_adapter_name or "default") + + def get_train_dataloader(self) -> DataLoader: + """ + Returns the training [`~torch.utils.data.DataLoader`]. + + Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`. + """ + + if self.precompute_ref_log_probs and not self._precomputed_train_ref_log_probs: + dataloader_params = { + "batch_size": self.args.per_device_train_batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(self.train_dataset, **dataloader_params)) + reference_completion_logps = [] + + for padded_batch in tqdm(iterable=data_loader, desc="Train dataset reference log probs"): + reference_completion_logp = self.compute_reference_log_probs(padded_batch) + + reference_completion_logp = self.accelerator.gather_for_metrics(reference_completion_logp) + reference_completion_logps.append(reference_completion_logp.cpu()) + + self.train_dataset = self.train_dataset.add_column( + name="reference_logps", column=torch.cat(reference_completion_logps).float().numpy() + ) + + self._precomputed_train_ref_log_probs = True + + return super().get_train_dataloader() + + def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoader: + """ + Returns the evaluation [`~torch.utils.data.DataLoader`]. + + Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`. + + Args: + eval_dataset (`torch.utils.data.Dataset`, *optional*): + If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted + by the `model.forward()` method are automatically removed. It must implement `__len__`. + """ + if eval_dataset is None and self.eval_dataset is None: + raise ValueError("Trainer: evaluation requires an eval_dataset.") + eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset + + if self.precompute_ref_log_probs and not self._precomputed_eval_ref_log_probs: + dataloader_params = { + "batch_size": self.args.per_device_eval_batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(eval_dataset, **dataloader_params)) + + reference_completion_logps = [] + + for padded_batch in tqdm(iterable=data_loader, desc="Eval dataset reference log probs"): + reference_completion_logp = self.compute_reference_log_probs(padded_batch) + + reference_completion_logp = self.accelerator.gather_for_metrics(reference_completion_logp) + reference_completion_logps.append(reference_completion_logp.cpu()) + + eval_dataset = eval_dataset.add_column( + name="reference_logps", column=torch.cat(reference_completion_logps).float().numpy() + ) + + # Save calculated reference_chosen_logps and reference_rejected_logps to the eval_dataset for subsequent runs + if self.eval_dataset is not None: + self.eval_dataset = eval_dataset + self._precomputed_eval_ref_log_probs = True + + return super().get_eval_dataloader(eval_dataset=eval_dataset) + + def compute_reference_log_probs(self, padded_batch: dict) -> dict: + """Computes log probabilities of the reference model for a single padded batch of a BCO specific dataset.""" + with torch.no_grad(): + if self.ref_model is None: + with self.null_ref_context(): + if self.is_encoder_decoder: + completion_logits = self.model( + padded_batch["prompt_input_ids"], + attention_mask=padded_batch["prompt_attention_mask"], + decoder_input_ids=padded_batch.get("completion_decoder_input_ids"), + labels=padded_batch["completion_labels"], + ).logits + + else: + completion_logits = self.model( + padded_batch["completion_input_ids"], + attention_mask=padded_batch["completion_attention_mask"], + ).logits + + else: + if self.is_encoder_decoder: + completion_logits = self.ref_model( + padded_batch["prompt_input_ids"], + attention_mask=padded_batch["prompt_attention_mask"], + decoder_input_ids=padded_batch.get("completion_decoder_input_ids"), + labels=padded_batch["completion_labels"], + ).logits + + else: + completion_logits = self.ref_model( + padded_batch["completion_input_ids"], attention_mask=padded_batch["completion_attention_mask"] + ).logits + + completion_logps = self.get_batch_logps( + completion_logits, + padded_batch["completion_labels"], + average_log_prob=False, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + + return completion_logps + + @staticmethod + def get_batch_logps( + logits: torch.FloatTensor, + labels: torch.LongTensor, + average_log_prob: bool = False, + label_pad_token_id: int = -100, + is_encoder_decoder: bool = False, + ) -> torch.FloatTensor: + """Compute the log probabilities of the given labels under the given logits. + + Args: + logits: Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size) + labels: + Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are + ignored. Shape: (batch_size, sequence_length) + average_log_prob: + If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the + log probabilities of the (non-masked) tokens. + label_pad_token_id: + The label value to ignore when computing log probabilities. + is_encoder_decoder: + Whether the model is an encoder-decoder model. If True, the labels are not shifted, and the logits are + assumed to already be aligned with the labels. If False, the labels are shifted to the right by one + position, and the logits are assumed to be aligned with the shifted labels. + + Returns: + A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the + given logits. + """ + if logits.shape[:-1] != labels.shape: + raise ValueError("Logits (batch and sequence length dim) and labels must have the same shape.") + + if not is_encoder_decoder: + labels = labels[:, 1:].clone() + logits = logits[:, :-1, :] + else: + # Fixes end-dec RuntimeError + labels = labels.clone() + + loss_mask = labels != label_pad_token_id + + # dummy token; we'll ignore the losses on these tokens later + labels[labels == label_pad_token_id] = 0 + + per_token_logps = selective_log_softmax(logits, labels) + + if average_log_prob: + return (per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1) + else: + return (per_token_logps * loss_mask).sum(-1) + + def forward( + self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]] + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + model_kwargs = ( + { + "labels": batch["completion_labels"], + "decoder_input_ids": batch.get("completion_decoder_input_ids"), + } + if self.is_encoder_decoder + else {} + ) + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + outputs = model( + batch["completion_input_ids"], + attention_mask=batch["completion_attention_mask"], + **model_kwargs, + ) + completion_logits = outputs.logits + + completion_logps = self.get_batch_logps( + completion_logits, + batch["completion_labels"], + average_log_prob=False, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + + if completion_logps.shape[0] != len(batch["label"]): + raise ValueError( + "There is a mismatch between the number of examples in this batch and the number of " + "examples for which an output sequence was predicted." + ) + + chosen_idx = [i for i in range(completion_logps.shape[0]) if batch["label"][i] is True] + rejected_idx = [i for i in range(completion_logps.shape[0]) if batch["label"][i] is False] + + chosen_logps = completion_logps[chosen_idx, ...] + rejected_logps = completion_logps[rejected_idx, ...] + + chosen_logits = completion_logits[chosen_idx, ...] + rejected_logits = completion_logits[rejected_idx, ...] + + if self.aux_loss_enabled: + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, outputs.aux_loss) + else: + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits) + + def _get_udm_weight(self, rejected_embeddings: torch.FloatTensor) -> torch.FloatTensor: + prob_desirable = self._get_chosen_prob(rejected_embeddings) + min_ratio = self.args.min_density_ratio + max_ratio = self.args.max_density_ratio + + weight = (prob_desirable / (1 - prob_desirable + 1e-8)).clamp(min=min_ratio, max=max_ratio) + + return weight + + def bco_loss( + self, + policy_chosen_logps: torch.FloatTensor, + policy_rejected_logps: torch.FloatTensor, + reference_chosen_logps: torch.FloatTensor, + reference_rejected_logps: torch.FloatTensor, + chosen_embeddings: Optional[torch.FloatTensor], + rejected_embeddings: Optional[torch.FloatTensor], + do_train: bool = True, + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Compute the BCO loss for a batch of policy and reference model log probabilities. + + Args: + policy_chosen_logps: + Log probabilities of the policy model for the chosen responses. Shape: (num(chosen) in batch_size,) + policy_rejected_logps: + Log probabilities of the policy model for the rejected responses. Shape: (num(rejected) in batch_size,) + reference_chosen_logps: + Log probabilities of the reference model for the chosen responses. Shape: (num(chosen) in batch_size,) + reference_rejected_logps: + Log probabilities of the reference model for the rejected responses. Shape: (num(rejected) in + batch_size,) + chosen_embeddings: embeddings of desirable prompts + rejected_embeddings: embeddings of undesirable prompts + do_train: whether to update the running delta value. Default is True. + + Returns: + A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, delta). The losses tensor contains the + BCO loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards + for the chosen and rejected responses, respectively. The delta value contains the moving average of all + implicit rewards. + """ + + chosen_logratios = policy_chosen_logps - reference_chosen_logps + chosen_rewards = self.beta * chosen_logratios + + rejected_logratios = policy_rejected_logps - reference_rejected_logps + rejected_rewards = self.beta * rejected_logratios + + if do_train: + self.running.update(torch.cat((chosen_rewards, rejected_rewards), 0).detach()) + delta = torch.as_tensor(self.running.mean, device=chosen_rewards.device) + + chosen_losses = -F.logsigmoid(chosen_rewards - delta) + rejected_losses = -F.logsigmoid(-(rejected_rewards - delta)) + + if self.match_underlying_distribution: + chosen_weight = torch.ones_like(chosen_losses) + rejected_weight = self._get_udm_weight(rejected_embeddings) + + losses = torch.cat((chosen_weight * chosen_losses, rejected_weight * rejected_losses), dim=0) + else: + losses = torch.cat((chosen_losses, rejected_losses), dim=0) + + return losses, chosen_rewards, rejected_rewards, delta + + def get_batch_loss_metrics( + self, + model, + batch: dict[str, Union[list, torch.LongTensor]], + do_train: bool = True, + ): + """Compute the BCO loss and other metrics for the given batch of inputs for train or test.""" + metrics = {} + batch = {k: (v.to(self.accelerator.device) if isinstance(v, torch.Tensor) else v) for k, v in batch.items()} + + forward_output = self.forward(model, batch) + ( + policy_chosen_logps, + policy_rejected_logps, + policy_chosen_logits, + policy_rejected_logits, + ) = forward_output[:4] + if self.aux_loss_enabled: + aux_loss = forward_output[4] + + # if reference_logps in batch use them, otherwise use the reference model + if "reference_logps" in batch: + chosen_idx = [i for i in range(batch["reference_logps"].shape[0]) if batch["label"][i] is True] + rejected_idx = [i for i in range(batch["reference_logps"].shape[0]) if batch["label"][i] is False] + + reference_chosen_logps = batch["reference_logps"][chosen_idx, ...] + reference_rejected_logps = batch["reference_logps"][rejected_idx, ...] + else: + with torch.no_grad(): + if self.ref_model is None: + with self.null_ref_context(): + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + ) = self.forward(self.model, batch)[:4] + else: + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + ) = self.forward(self.ref_model, batch)[:4] + + chosen_embeddings, rejected_embeddings = self._get_prompt_embeddings(batch) + + losses, chosen_rewards, rejected_rewards, delta = self.bco_loss( + policy_chosen_logps, + policy_rejected_logps, + reference_chosen_logps, + reference_rejected_logps, + chosen_embeddings, + rejected_embeddings, + do_train=do_train, + ) + metrics["delta"] = self.accelerator.gather_for_metrics(delta).mean().item() + + num_chosen = torch.Tensor([len(chosen_rewards)]).to(self.accelerator.device) + num_rejected = torch.Tensor([len(rejected_rewards)]).to(self.accelerator.device) + + all_num_chosen = self.accelerator.gather_for_metrics(num_chosen).sum().item() + all_num_rejected = self.accelerator.gather_for_metrics(num_rejected).sum().item() + + if all_num_chosen > 0: + metrics["rewards/chosen_sum"] = ( + self.accelerator.gather_for_metrics(chosen_rewards.nansum()).nansum().item() + ) + metrics["logps/chosen_sum"] = ( + self.accelerator.gather_for_metrics(policy_chosen_logps.nansum()).nansum().item() + ) + metrics["logits/chosen_sum"] = ( + self.accelerator.gather_for_metrics(policy_chosen_logits.nansum()).nansum().item() + ) + metrics["count/chosen"] = all_num_chosen + + if all_num_rejected > 0: + metrics["rewards/rejected_sum"] = ( + self.accelerator.gather_for_metrics(rejected_rewards.nansum()).nansum().item() + ) + metrics["logps/rejected_sum"] = ( + self.accelerator.gather_for_metrics(policy_rejected_logps.nansum()).nansum().item() + ) + metrics["logits/rejected_sum"] = ( + self.accelerator.gather_for_metrics(policy_rejected_logits.nansum()).nansum().item() + ) + metrics["count/rejected"] = all_num_rejected + + loss = losses.nanmean() + if self.aux_loss_enabled: + loss += self.aux_loss_coef * aux_loss + + return loss, metrics + + def compute_loss( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + return_outputs=False, + num_items_in_batch=None, + ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, torch.Tensor]]]: + compute_loss_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with compute_loss_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs) + + # Make sure to move the loss to the device the original accumulating loss is at back in the `Trainer` class: + loss = loss.to(self.args.device) + # force log the metrics + if self.accelerator.is_main_process: + self.store_metrics(metrics, train_eval="train") + + if return_outputs: + return (loss, metrics) + return loss + + def store_metrics(self, metrics: dict[str, float], train_eval: Literal["train", "eval"] = "train") -> None: + for key, value in metrics.items(): + self._stored_metrics[train_eval][key].append(value) + + def _get_train_sampler(self, dataset: Optional[Dataset] = None) -> Optional[torch.utils.data.Sampler]: + if dataset is None: + dataset = self.train_dataset + if dataset is None or not has_length(dataset): + return None + return SequentialSampler(dataset) + + def generate_from_model_and_ref(self, model, batch: dict[str, torch.LongTensor]) -> tuple[str, str]: + """Generate samples from the model and reference model for the given batch of inputs.""" + + # If one uses `generate_during_eval` with peft + bf16, we need to explicitly call generate with + # the torch amp context manager as some hidden states are silently casted to full precision. + generate_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + with generate_context_manager: + policy_output = model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + + # if reference_output in batch use that otherwise use the reference model + if "reference_output" in batch: + reference_output = batch["reference_output"] + else: + if self.ref_model is None: + with self.null_ref_context(): + reference_output = self.model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + else: + reference_output = self.ref_model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + + policy_output = pad_to_length(policy_output, self.max_length, self.processing_class.pad_token_id) + policy_output_decoded = self.processing_class.batch_decode(policy_output, skip_special_tokens=True) + + reference_output = pad_to_length(reference_output, self.max_length, self.processing_class.pad_token_id) + reference_output_decoded = self.processing_class.batch_decode(reference_output, skip_special_tokens=True) + + return policy_output_decoded, reference_output_decoded + + def prediction_step( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[list[str]] = None, + ): + if ignore_keys is None: + if hasattr(model, "config"): + ignore_keys = getattr(model.config, "keys_to_ignore_at_inference", []) + else: + ignore_keys = [] + + prediction_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + with torch.no_grad(), prediction_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, do_train=False) + + # force log the metrics + if self.accelerator.is_main_process: + self.store_metrics(metrics, train_eval="eval") + + if prediction_loss_only: + return (loss.detach(), None, None) + + # logits for the chosen and rejected samples from model + logits_dict = {} + if "logits/chosen_sum" in metrics: + logits_dict["eval_logits/chosen"] = metrics["logits/chosen_sum"] + if "logits/rejected_sum" in metrics: + logits_dict["eval_logits/rejected"] = metrics["logits/rejected_sum"] + logits = [v for k, v in logits_dict.items() if k not in ignore_keys] + logits = torch.tensor(logits, device=self.accelerator.device) + labels = torch.zeros(logits.shape[0], device=self.accelerator.device) + + return (loss.detach(), logits, labels) + + def evaluation_loop( + self, + dataloader: DataLoader, + description: str, + prediction_loss_only: Optional[bool] = None, + ignore_keys: Optional[list[str]] = None, + metric_key_prefix: str = "eval", + ) -> EvalLoopOutput: + """ + Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by + `Trainer.evaluate()` and `Trainer.predict()`. + + Works both with or without labels. + """ + + # Sample and save to game log if requested (for one batch to save time) + if self.generate_during_eval: + # Generate random indices within the range of the total number of samples + num_samples = len(dataloader.dataset) + random_indices = random.sample(range(num_samples), k=self.args.eval_batch_size) + + # Use dataloader.dataset.select to get the random batch without iterating over the DataLoader + random_batch_dataset = dataloader.dataset.select(random_indices) + random_batch = self.data_collator(random_batch_dataset) + random_batch = self._prepare_inputs(random_batch) + + target_labels = torch.tensor(random_batch["label"], dtype=torch.bool, device=self.accelerator.device) + target_indices = torch.where(~target_labels)[0] + target_batch = { + "prompt_input_ids": random_batch["prompt_input_ids"][target_indices], + "prompt_attention_mask": random_batch["prompt_attention_mask"][target_indices], + "prompt": itemgetter(*target_indices)(random_batch["prompt"]), + } + policy_output_decoded, ref_output_decoded = self.generate_from_model_and_ref(self.model, target_batch) + + table = pd.DataFrame( + columns=["Prompt", "Policy", "Ref Model"], + data=[ + [prompt, pol[len(prompt) :], ref[len(prompt) :]] + for prompt, pol, ref in zip(target_batch["prompt"], policy_output_decoded, ref_output_decoded) + ], + ) + if "wandb" in self.args.report_to: + wandb.log({"game_log": wandb.Table(data=table)}) + + if "comet_ml" in self.args.report_to: + log_table_to_comet_experiment( + name="game_log.csv", + table=table, + ) + + # Base evaluation + initial_output = super().evaluation_loop( + dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix + ) + + return initial_output + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + """ + Log `logs` on the various objects watching training, including stored metrics. + + Args: + logs (`dict[str, float]`): + The values to log. + start_time (`float` or `None`, *optional*, defaults to `None`): + Start time of the training. + """ + # logs either has 'loss' or 'eval_loss' + train_eval = "train" if "loss" in logs else "eval" + # train metrics should have no prefix, eval should have 'eval_' + prefix = "eval_" if train_eval == "eval" else "" + # accumulate average metrics from sums and lengths + for split in ["chosen", "rejected"]: + if f"count/{split}" in self._stored_metrics[train_eval]: + count_sum = torch.Tensor(self._stored_metrics[train_eval][f"count/{split}"]).sum().item() + for metric in ["rewards", "logps", "logits"]: + logs[f"{prefix}{metric}/{split}"] = ( + torch.Tensor(self._stored_metrics[train_eval][f"{metric}/{split}_sum"]).sum().item() + / count_sum + ) + # delete obsolete metric + del self._stored_metrics[train_eval][f"{metric}/{split}_sum"] + del self._stored_metrics[train_eval][f"count/{split}"] + # calculate reward margin + if f"{prefix}rewards/chosen" in logs and f"{prefix}rewards/rejected" in logs: + logs[f"{prefix}rewards/margins"] = logs[f"{prefix}rewards/chosen"] - logs[f"{prefix}rewards/rejected"] + # Add averaged stored metrics to logs + for key, metrics in self._stored_metrics[train_eval].items(): + logs[f"{prefix}{key}"] = torch.Tensor(metrics).mean().item() + del self._stored_metrics[train_eval] + return super().log(logs, start_time) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{jung2024binary, + title = {{Binary Classifier Optimization for Large Language Model Alignment}}, + author = {Seungjae Jung and Gunsoo Han and Daniel Wontae Nam and Kyoung{-}Woon On}, + year = 2024, + eprint = {arXiv:2404.04656} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="BCO", + trainer_citation=citation, + paper_title="Binary Classifier Optimization for Large Language Model Alignment", + paper_id="2404.04656", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothBCOTrainer(_UnslothBCOTrainer): + """ + +Initialize BCOTrainer from [BCO](https://huggingface.co/papers/2404.04656) paper. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + args (`BCOConfig`): + The arguments to use for training. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + data_collator (`transformers.DataCollator`, *optional*, defaults to `None`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + model_adapter_name (`str`, defaults to `None`): + Name of the train target PEFT adapter, when using LoRA with multiple adapters. + ref_adapter_name (`str`, defaults to `None`): + Name of the reference PEFT adapter, when using LoRA with multiple adapters. + + """ + def __init__( + self, + model = None, + ref_model = None, + args = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + data_collator = None, + model_init = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + compute_metrics = None, + model_adapter_name = None, + ref_adapter_name = None, + embedding_func = None, + embedding_tokenizer = None, + **kwargs + ): + if args is None: args = UnslothBCOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('bco_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + ref_model = ref_model, + args = args, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + data_collator = data_collator, + model_init = model_init, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config, + compute_metrics = compute_metrics, + model_adapter_name = model_adapter_name, + ref_adapter_name = ref_adapter_name, + embedding_func = embedding_func, + embedding_tokenizer = embedding_tokenizer,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothCPOTrainer.py b/unsloth_compiled_cache/UnslothCPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..728a6072733c67e6a1d3c40192d28a5478f4b2c5 --- /dev/null +++ b/unsloth_compiled_cache/UnslothCPOTrainer.py @@ -0,0 +1,1866 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.cpo_trainer import (Any, AutoModelForCausalLM, BaseImageProcessor, CPOConfig, CPOTrainer, Callable, DPODataCollatorWithPadding, DataCollator, DataLoader, Dataset, EvalLoopOutput, F, FeatureExtractionMixin, Literal, Optional, PartialState, Path, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, Trainer, TrainerCallback, Union, add_bos_token_if_needed, add_eos_token_if_needed, autocast, defaultdict, disable_dropout_in_model, generate_model_card, get_comet_experiment_url, inspect, is_comet_available, is_peft_available, is_torch_fx_proxy, is_wandb_available, log_table_to_comet_experiment, logger, logging, maybe_apply_chat_template, maybe_extract_prompt, nn, np, nullcontext, os, pad_to_length, pd, peft_module_casting_to_bf16, prepare_model_for_kbit_training, random, selective_log_softmax, textwrap, torch, F, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothCPOConfig(CPOConfig): + """ + +Configuration class for the [`CPOTrainer`]. + +This class includes only the parameters that are specific to CPO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want + to use the default data collator. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. This argument is required if you want to use the default data collator. + max_completion_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the completion. This argument is required if you want to use the default data collator + and your model is an encoder-decoder. + beta (`float`, *optional*, defaults to `0.1`): + Parameter controlling the deviation from the reference model. Higher β means less deviation from the + reference model. For the IPO loss (`loss_type="ipo"`), β is the regularization parameter denoted by τ in + the [paper](https://huggingface.co/papers/2310.12036). + label_smoothing (`float`, *optional*, defaults to `0.0`): + Label smoothing factor. This argument is required if you want to use the default data collator. + loss_type (`str`, *optional*, defaults to `"sigmoid"`): + Type of loss to use. Possible values are: + + - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper. + - `"hinge"`: hinge loss on the normalized likelihood from the + [SLiC](https://huggingface.co/papers/2305.10425) paper. + - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper. + - `"simpo"`: SimPO loss from the [SimPO](https://huggingface.co/papers/2405.14734) paper. + - `"alphapo"`: AlphaPO loss from the [AlphaPO](https://huggingface.co/papers/2501.03884) paper. This + automatically sets `loss_type="simpo"` and `cpo_alpha=0.0`. + + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. + cpo_alpha (`float`, *optional*, defaults to `1.0`): + Weight of the BC regularizer in CPO training. + simpo_gamma (`float`, *optional*, defaults to `0.5`): + Target reward margin for the SimPO loss, used only when the `loss_type="simpo"`. + alpha (`float`, *optional*, defaults to `0.0`): + Alpha parameter that controls reward function shape across all loss types. When alpha=0 (default), uses + standard log probability rewards. When `alpha != 0`, applies AlphaPO transformation: `r = (1 - p^(-alpha)) + / alpha` from the [AlphaPO paper](https://huggingface.co/papers/2501.03884). This parameter works with all + loss types. + label_pad_token_id (`int`, *optional*, defaults to `-100`): + Label pad token id. This argument is required if you want to use the default data collator. + padding_value (`int` or `None`, *optional*, defaults to `None`): + Padding value to use. If `None`, the padding value of the tokenizer is used. + truncation_mode (`str`,*optional*, defaults to `"keep_end"`): + Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`. + This argument is required if you want to use the default data collator. + generate_during_eval (`bool`, *optional*, defaults to `False`): + If `True`, generates and logs completions from the model to W&B or Comet during evaluation. + is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`): + When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument, + you need to specify if the model returned by the callable is an encoder-decoder model. + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a + string. + dataset_num_proc (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + max_length = 1024, + max_prompt_length = 512, + max_completion_length = None, + beta = 0.1, + label_smoothing = 0.0, + loss_type = 'sigmoid', + disable_dropout = True, + cpo_alpha = 1.0, + simpo_gamma = 0.5, + alpha = 0.0, + label_pad_token_id = -100, + padding_value = None, + truncation_mode = 'keep_end', + generate_during_eval = False, + is_encoder_decoder = None, + model_init_kwargs = None, + dataset_num_proc = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + max_length = max_length, + max_prompt_length = max_prompt_length, + max_completion_length = max_completion_length, + beta = beta, + label_smoothing = label_smoothing, + loss_type = loss_type, + disable_dropout = disable_dropout, + cpo_alpha = cpo_alpha, + simpo_gamma = simpo_gamma, + alpha = alpha, + label_pad_token_id = label_pad_token_id, + padding_value = padding_value, + truncation_mode = truncation_mode, + generate_during_eval = generate_during_eval, + is_encoder_decoder = is_encoder_decoder, + model_init_kwargs = model_init_kwargs, + dataset_num_proc = dataset_num_proc,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothCPOTrainer(Trainer): + r""" + Initialize CPOTrainer. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + args (`CPOConfig`): + The CPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + """ + + _tag_names = ["trl", "cpo"] + + def __init__( + self, + model: Optional[Union[PreTrainedModel, nn.Module, str]] = None, + args: Optional[CPOConfig] = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional[dict] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None, + ): + if args.model_init_kwargs is None: + model_init_kwargs = {} + elif not isinstance(model, str): + raise ValueError("You passed model_kwargs to the CPOTrainer. But your model is already instantiated.") + else: + model_init_kwargs = args.model_init_kwargs + dtype = model_init_kwargs.get("dtype") + if dtype is not None: + # Convert to `torch.dtype` if an str is passed + if isinstance(dtype, str) and dtype != "auto": + dtype = getattr(torch, dtype) + if dtype != "auto" and not isinstance(dtype, torch.dtype): + raise ValueError( + f"Invalid `dtype` passed to the CPOConfig. Expected a string with either `torch.dtype` or 'auto', but got {dtype}." + ) + model_init_kwargs["dtype"] = dtype + + if isinstance(model, str): + model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs) + + # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` + # has been called in order to properly call autocast if needed. + self._peft_has_been_casted_to_bf16 = False + + if not is_peft_available() and peft_config is not None: + raise ValueError( + "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT models" + ) + elif is_peft_available() and peft_config is not None: + # if model is a peft model and we have a peft_config, we merge and unload it first + if isinstance(model, PeftModel): + model = model.merge_and_unload() + + if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): + _support_gc_kwargs = hasattr( + args, "gradient_checkpointing_kwargs" + ) and "gradient_checkpointing_kwargs" in list( + inspect.signature(prepare_model_for_kbit_training).parameters + ) + + prepare_model_kwargs = {"use_gradient_checkpointing": args.gradient_checkpointing} + + if _support_gc_kwargs: + prepare_model_kwargs["gradient_checkpointing_kwargs"] = args.gradient_checkpointing_kwargs + + model = prepare_model_for_kbit_training(model, **prepare_model_kwargs) + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + # get peft model with the given config + model = model + if args.bf16 and getattr(model, "is_loaded_in_4bit", False): + peft_module_casting_to_bf16(model) + # If args.bf16 we need to explicitly call `generate` with torch amp autocast context manager + self._peft_has_been_casted_to_bf16 = True + + # For models that use gradient_checkpointing, we need to attach a hook that enables input + # to explicitly have `requires_grad=True`, otherwise training will either silently + # fail or completely fail. + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + if args.generate_during_eval and not (is_wandb_available() or is_comet_available()): + raise ValueError( + "`generate_during_eval=True` requires Weights and Biases or Comet to be installed." + " Please install `wandb` or `comet-ml` to resolve." + ) + + if model is not None: + self.is_encoder_decoder = model.config.is_encoder_decoder + elif args.is_encoder_decoder is None: + raise ValueError("When no model is provided, you need to pass the parameter is_encoder_decoder.") + else: + self.is_encoder_decoder = args.is_encoder_decoder + + if self.is_encoder_decoder: + self.decoder_start_token_id = model.config.decoder_start_token_id + self.pad_token_id = model.config.pad_token_id + + if processing_class is None: + raise ValueError("processing_class must be specified to tokenize a CPO dataset.") + if args.max_length is None: + logger.warning( + "`max_length` is not set in the CPOConfig's init" + " it will default to `512` by default, but you should do it yourself in the future.", + ) + max_length = 512 + else: + max_length = args.max_length + if args.max_prompt_length is None: + logger.warning( + "`max_prompt_length` is not set in the CPOConfig's init" + " it will default to `128` by default, but you should do it yourself in the future.", + ) + max_prompt_length = 128 + else: + max_prompt_length = args.max_prompt_length + + if not max_prompt_length < max_length: + raise ValueError( + f"max_prompt_length ({max_prompt_length}) should be strictly less than max_length ({max_length})." + ) + + if args.max_completion_length is None and self.is_encoder_decoder: + logger.warning( + "When using an encoder decoder architecture, you should set `max_completion_length` in the CPOConfig's init" + " it will default to `128` by default, but you should do it yourself in the future.", + ) + max_completion_length = 128 + else: + max_completion_length = args.max_completion_length + + if data_collator is None: + data_collator = DPODataCollatorWithPadding( + pad_token_id=processing_class.pad_token_id, + label_pad_token_id=args.label_pad_token_id, + is_encoder_decoder=self.is_encoder_decoder, + ) + + if args.remove_unused_columns: + args.remove_unused_columns = False + # warn users + logger.warning( + "When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments" + " we have set it for you, but you should do it yourself in the future.", + ) + + self.use_dpo_data_collator = True + else: + self.use_dpo_data_collator = False + + # Disable dropout in the model + if args.disable_dropout: + disable_dropout_in_model(model) + + self.max_length = max_length + self.generate_during_eval = args.generate_during_eval + self.label_pad_token_id = args.label_pad_token_id + self.padding_value = args.padding_value if args.padding_value is not None else processing_class.pad_token_id + self.max_prompt_length = max_prompt_length + self.truncation_mode = args.truncation_mode + self.max_completion_length = max_completion_length + self.processing_class = processing_class + + if args.loss_type in ["hinge", "ipo"] and args.label_smoothing > 0: + logger.warning( + f"You are using the {args.loss_type} loss type that does not support label smoothing. The " + "`label_smoothing` parameter will be ignored. Set `label_smoothing` to `0.0` to remove this warning.", + ) + if args.loss_type == "kto_pair": + raise ValueError("Support for kto_pair has been removed in CPOTrainer. Please use KTOTrainer.") + + self.beta = args.beta + self.label_smoothing = args.label_smoothing + self.loss_type = args.loss_type + self.cpo_alpha = args.cpo_alpha + self.aux_loss_enabled = getattr(model.config, "output_router_logits", False) + self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0) + if self.aux_loss_enabled and self.aux_loss_coef == 0.0: + logger.warning( + "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to " + "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value " + "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary " + "loss.", + ) + + if args.loss_type == "simpo": + self.simpo_gamma = args.simpo_gamma + + # AlphaPO parameter for reward shaping + self.alpha = args.alpha + + self._stored_metrics = defaultdict(lambda: defaultdict(list)) + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in CPO, the sampled data does not include the + # "input_ids" key. Instead, the available keys are "prompt_input_ids", "chosen_input_ids", and + # "rejected_input_ids". As a result, the trainer issues the warning: "Could not estimate the number of tokens + # of the input, floating-point operations will not be computed." To suppress this warning, we set the + # "estimate_tokens" key in the model's "warnings_issued" dictionary to True. This acts as a flag to indicate + # that the warning has already been issued. + model.warnings_issued["estimate_tokens"] = True + + # Compute that only on the main process for faster data processing. + # see: https://github.com/huggingface/trl/pull/1255 + with PartialState().main_process_first(): + # Extract the prompt if needed, and apply the chat template if needed + train_dataset = train_dataset.map(maybe_extract_prompt, num_proc=args.dataset_num_proc) + train_dataset = train_dataset.map( + maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}, num_proc=args.dataset_num_proc + ) + if eval_dataset is not None: + eval_dataset = eval_dataset.map(maybe_extract_prompt, num_proc=args.dataset_num_proc) + eval_dataset = eval_dataset.map( + maybe_apply_chat_template, + fn_kwargs={"tokenizer": processing_class}, + num_proc=args.dataset_num_proc, + ) + + # tokenize the dataset + train_dataset = train_dataset.map(self.tokenize_row, num_proc=args.dataset_num_proc) + if eval_dataset is not None: + eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=args.dataset_num_proc) + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + model_init=model_init, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) + + def build_tokenized_answer(self, prompt, answer): + """ + Llama tokenizer does satisfy `enc(a + b) = enc(a) + enc(b)`. It does ensure `enc(a + b) = enc(a) + enc(a + + b)[len(enc(a)):]`. Reference: + https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 + """ + + full_tokenized = self.processing_class(prompt + answer, add_special_tokens=False) + prompt_input_ids = self.processing_class(prompt, add_special_tokens=False)["input_ids"] + + answer_input_ids = full_tokenized["input_ids"][len(prompt_input_ids) :] + answer_attention_mask = full_tokenized["attention_mask"][len(prompt_input_ids) :] + + # Concat tokens to form `enc(a) + enc(a + b)[len(enc(a)):]` + full_concat_input_ids = np.concatenate([prompt_input_ids, answer_input_ids]) + + # Prepare input tokens for token by token comparison + full_input_ids = np.array(full_tokenized["input_ids"]) + + if len(full_input_ids) != len(full_concat_input_ids): + raise ValueError("Prompt input ids and answer input ids should have the same length.") + + # On some tokenizers, like Llama-2 tokenizer, there are occasions where tokens + # can be merged together when tokenizing prompt+answer. This could result + # on the last token from the prompt being different when tokenized on its own + # vs when done as prompt+answer. + response_token_ids_start_idx = len(prompt_input_ids) + + # If tokenized prompt is different than both prompt+answer, then it means the + # last token has changed due to merging. + if prompt_input_ids != full_tokenized["input_ids"][:response_token_ids_start_idx]: + response_token_ids_start_idx -= 1 + + prompt_input_ids = full_tokenized["input_ids"][:response_token_ids_start_idx] + prompt_attention_mask = full_tokenized["attention_mask"][:response_token_ids_start_idx] + + if len(prompt_input_ids) != len(prompt_attention_mask): + raise ValueError("Prompt input ids and attention mask should have the same length.") + + answer_input_ids = full_tokenized["input_ids"][response_token_ids_start_idx:] + answer_attention_mask = full_tokenized["attention_mask"][response_token_ids_start_idx:] + + return dict( + prompt_input_ids=prompt_input_ids, + prompt_attention_mask=prompt_attention_mask, + input_ids=answer_input_ids, + attention_mask=answer_attention_mask, + ) + + def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module]] = None) -> dict: + """Tokenize a single row from a CPO specific dataset. + + At this stage, we don't convert to PyTorch tensors yet; we just handle the truncation in case the prompt + + chosen or prompt + rejected responses is/are too long. First we truncate the prompt; if we're still too long, + we truncate the chosen/rejected. + + We also create the labels for the chosen/rejected responses, which are of length equal to the sum of the length + of the prompt and the chosen/rejected response, with label_pad_token_id for the prompt tokens. + """ + batch = {} + prompt = feature["prompt"] + chosen = feature["chosen"] + rejected = feature["rejected"] + + if not self.is_encoder_decoder: + # Check issues below for more details + # 1. https://github.com/huggingface/trl/issues/907 + # 2. https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 + # 3. https://github.com/LianjiaTech/BELLE/issues/337 + + if not isinstance(prompt, str): + raise ValueError(f"prompt should be an str but got {type(prompt)}") + prompt_tokens = self.processing_class(prompt, add_special_tokens=False) + prompt_tokens = {f"prompt_{k}": v for k, v in prompt_tokens.items()} + + if not isinstance(chosen, str): + raise ValueError(f"chosen should be an str but got {type(chosen)}") + chosen_tokens = self.build_tokenized_answer(prompt, chosen) + + if not isinstance(rejected, str): + raise ValueError(f"rejected should be an str but got {type(rejected)}") + rejected_tokens = self.build_tokenized_answer(prompt, rejected) + + # Last prompt token might get merged by tokenizer and + # it should not be included for generation if that happens + prompt_len_input_ids = len(prompt_tokens["prompt_input_ids"]) + + chosen_prompt_len_input_ids = len(chosen_tokens["prompt_input_ids"]) + rejected_prompt_len_input_ids = len(rejected_tokens["prompt_input_ids"]) + prompt_len_input_ids = min(chosen_prompt_len_input_ids, rejected_prompt_len_input_ids) + + for k, v in prompt_tokens.items(): + prompt_tokens[k] = v[:prompt_len_input_ids] + + # Make sure prompts only have one different token at most an + # and length only differs by 1 at most + num_diff_tokens = sum( + [a != b for a, b in zip(chosen_tokens["prompt_input_ids"], rejected_tokens["prompt_input_ids"])] + ) + num_diff_len = abs(chosen_prompt_len_input_ids - rejected_prompt_len_input_ids) + if num_diff_tokens > 1 or num_diff_len > 1: + raise ValueError( + "Chosen and rejected prompt_input_ids might only differ on the " + "last token due to tokenizer merge ops." + ) + + # add BOS token to head of prompt. Avoid adding if it's already there + prompt_tokens, chosen_tokens, rejected_tokens = add_bos_token_if_needed( + self.processing_class.bos_token_id, + prompt_len_input_ids, + prompt_tokens, + chosen_prompt_len_input_ids, + chosen_tokens, + rejected_prompt_len_input_ids, + rejected_tokens, + ) + + # add EOS token to end of answer. Avoid adding if it's already there + chosen_tokens, rejected_tokens = add_eos_token_if_needed( + self.processing_class.eos_token_id, chosen_tokens, rejected_tokens + ) + + longer_response_length = max(len(chosen_tokens["input_ids"]), len(rejected_tokens["input_ids"])) + + # if combined sequence is too long, truncate the prompt + for answer_tokens in [chosen_tokens, rejected_tokens, prompt_tokens]: + if len(answer_tokens["prompt_input_ids"]) + longer_response_length > self.max_length: + if self.truncation_mode == "keep_start": + for k in ["prompt_input_ids", "prompt_attention_mask"]: + answer_tokens[k] = answer_tokens[k][: self.max_prompt_length] + elif self.truncation_mode == "keep_end": + for k in ["prompt_input_ids", "prompt_attention_mask"]: + answer_tokens[k] = answer_tokens[k][-self.max_prompt_length :] + else: + raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") + + # if that's still too long, truncate the response + for answer_tokens in [chosen_tokens, rejected_tokens]: + if len(answer_tokens["prompt_input_ids"]) + longer_response_length > self.max_length: + for k in ["input_ids", "attention_mask"]: + answer_tokens[k] = answer_tokens[k][: self.max_length - self.max_prompt_length] + + # Create labels + chosen_sequence_tokens = { + k: chosen_tokens[f"prompt_{k}"] + chosen_tokens[k] for k in ["input_ids", "attention_mask"] + } + rejected_sequence_tokens = { + k: rejected_tokens[f"prompt_{k}"] + rejected_tokens[k] for k in ["input_ids", "attention_mask"] + } + chosen_sequence_tokens["labels"] = chosen_sequence_tokens["input_ids"][:] + chosen_sequence_tokens["labels"][: len(chosen_tokens["prompt_input_ids"])] = [ + self.label_pad_token_id + ] * len(chosen_tokens["prompt_input_ids"]) + rejected_sequence_tokens["labels"] = rejected_sequence_tokens["input_ids"][:] + rejected_sequence_tokens["labels"][: len(rejected_tokens["prompt_input_ids"])] = [ + self.label_pad_token_id + ] * len(rejected_tokens["prompt_input_ids"]) + + for k, toks in { + "chosen_": chosen_sequence_tokens, + "rejected_": rejected_sequence_tokens, + "": prompt_tokens, + }.items(): + for type_key, tokens in toks.items(): + if type_key == "token_type_ids": + continue + batch[f"{k}{type_key}"] = tokens + + else: + chosen_tokens = self.processing_class( + chosen, truncation=True, max_length=self.max_completion_length, add_special_tokens=True + ) + rejected_tokens = self.processing_class( + rejected, truncation=True, max_length=self.max_completion_length, add_special_tokens=True + ) + prompt_tokens = self.processing_class( + prompt, truncation=True, max_length=self.max_prompt_length, add_special_tokens=True + ) + + batch["chosen_labels"] = chosen_tokens["input_ids"] + batch["rejected_labels"] = rejected_tokens["input_ids"] + batch["prompt_input_ids"] = prompt_tokens["input_ids"] + batch["prompt_attention_mask"] = prompt_tokens["attention_mask"] + + if model is not None and hasattr(model, "prepare_decoder_input_ids_from_labels"): + batch["rejected_decoder_input_ids"] = model.prepare_decoder_input_ids_from_labels( + labels=torch.tensor(batch["rejected_labels"]) + ) + batch["chosen_decoder_input_ids"] = model.prepare_decoder_input_ids_from_labels( + labels=torch.tensor(batch["chosen_labels"]) + ) + + return batch + + @staticmethod + def concatenated_inputs( + batch: dict[str, Union[list, torch.LongTensor]], + is_encoder_decoder: bool = False, + label_pad_token_id: int = -100, + padding_value: int = 0, + device: Optional[torch.device] = None, + ) -> dict[str, torch.LongTensor]: + """Concatenate the chosen and rejected inputs into a single tensor. + + Args: + batch: + A batch of data. Must contain the keys 'chosen_input_ids' and 'rejected_input_ids', which are tensors + of shape (batch_size, sequence_length). + is_encoder_decoder: + Whether the model is an encoder-decoder model. + label_pad_token_id: + The label pad token id. + padding_value: + The padding value to use for the concatenated inputs_ids. + device: + The device for the concatenated inputs. + + Returns: + A dictionary containing the concatenated inputs under the key 'concatenated_input_ids'. + """ + concatenated_batch = {} + + if is_encoder_decoder: + max_length = max(batch["chosen_labels"].shape[1], batch["rejected_labels"].shape[1]) + else: + max_length = max(batch["chosen_input_ids"].shape[1], batch["rejected_input_ids"].shape[1]) + + for k in batch: + if k.startswith("chosen") and isinstance(batch[k], torch.Tensor): + if "labels" in k or is_encoder_decoder: + pad_value = label_pad_token_id + elif k.endswith("_input_ids"): + pad_value = padding_value + elif k.endswith("_attention_mask"): + pad_value = 0 + concatenated_key = k.replace("chosen", "concatenated") + concatenated_batch[concatenated_key] = pad_to_length(batch[k], max_length, pad_value=pad_value) + for k in batch: + if k.startswith("rejected") and isinstance(batch[k], torch.Tensor): + if "labels" in k or is_encoder_decoder: + pad_value = label_pad_token_id + elif k.endswith("_input_ids"): + pad_value = padding_value + elif k.endswith("_attention_mask"): + pad_value = 0 + concatenated_key = k.replace("rejected", "concatenated") + concatenated_batch[concatenated_key] = torch.cat( + ( + concatenated_batch[concatenated_key], + pad_to_length(batch[k], max_length, pad_value=pad_value), + ), + dim=0, + ).to(device=device) + + if is_encoder_decoder: + concatenated_batch["concatenated_input_ids"] = batch["prompt_input_ids"].repeat(2, 1).to(device=device) + concatenated_batch["concatenated_attention_mask"] = ( + batch["prompt_attention_mask"].repeat(2, 1).to(device=device) + ) + + return concatenated_batch + + def cpo_loss( + self, + policy_chosen_logps: torch.FloatTensor, + policy_rejected_logps: torch.FloatTensor, + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Compute the CPO loss for a batch of policy and reference model log probabilities. + + Args: + policy_chosen_logps: + Log probabilities of the policy model for the chosen responses. Shape: (batch_size,) + policy_rejected_logps: + Log probabilities of the policy model for the rejected responses. Shape: (batch_size,) + + Returns: + A tuple of three tensors: (losses, chosen_rewards, rejected_rewards). The losses tensor contains the CPO + loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for + the chosen and rejected responses, respectively. + """ + # Apply AlphaPO reward transformation if alpha != 0 + if self.alpha != 0.0: + # Compute probabilities + chosen_probs = torch.exp(policy_chosen_logps) + rejected_probs = torch.exp(policy_rejected_logps) + + # Apply AlphaPO transformation: r = (1 - p^(-alpha)) / alpha + policy_chosen_rewards = (1 - chosen_probs.pow(-self.alpha)) / self.alpha + policy_rejected_rewards = (1 - rejected_probs.pow(-self.alpha)) / self.alpha + + logits = (policy_chosen_rewards - policy_rejected_rewards).to(self.accelerator.device) + else: + # Standard log probability rewards when alpha = 0 + logits = (policy_chosen_logps - policy_rejected_logps).to(self.accelerator.device) + + # The beta is a temperature parameter for the CPO loss, typically something in the range of 0.1 to 0.5. + # We ignore the reference model as beta -> 0. The label_smoothing parameter encodes our uncertainty about the labels and + # calculates a conservative CPO loss. + + if self.loss_type == "simpo": + gamma_logratios = self.simpo_gamma / self.beta + logits = logits - gamma_logratios + # This reduces to Equation 3 from the CPO paper when label_smoothing -> 0. + losses = ( + -F.logsigmoid(self.beta * logits) * (1 - self.label_smoothing) + - F.logsigmoid(-self.beta * logits) * self.label_smoothing + ) + elif self.loss_type == "sigmoid": + # This reduces to Equation 3 from the CPO paper when label_smoothing -> 0. + losses = ( + -F.logsigmoid(self.beta * logits) * (1 - self.label_smoothing) + - F.logsigmoid(-self.beta * logits) * self.label_smoothing + ) + elif self.loss_type == "hinge": + losses = torch.relu(1 - self.beta * logits) + elif self.loss_type == "ipo": + # eqn (17) of the paper where beta is the regularization parameter for the IPO loss, denoted by tau in the paper. + losses = (logits - 1 / (2 * self.beta)) ** 2 + else: + raise ValueError( + f"Unknown loss type: {self.loss_type}. Should be one of ['sigmoid', 'hinge', 'ipo', 'simpo']" + ) + + # Calculate rewards for logging + if self.alpha != 0.0: + # When using AlphaPO transformation, use the transformed rewards + chosen_rewards = self.beta * policy_chosen_rewards.to(self.accelerator.device).detach() + rejected_rewards = self.beta * policy_rejected_rewards.to(self.accelerator.device).detach() + else: + # Standard log probability rewards + chosen_rewards = self.beta * (policy_chosen_logps.to(self.accelerator.device)).detach() + rejected_rewards = self.beta * (policy_rejected_logps.to(self.accelerator.device)).detach() + + return losses, chosen_rewards, rejected_rewards + + @staticmethod + def get_batch_logps( + logits: torch.FloatTensor, + labels: torch.LongTensor, + average_log_prob: bool = False, + label_pad_token_id: int = -100, + is_encoder_decoder: bool = False, + ) -> torch.FloatTensor: + """Compute the log probabilities of the given labels under the given logits. + + Args: + logits: Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size) + labels: + Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are + ignored. Shape: (batch_size, sequence_length) + average_log_prob: + If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the + log probabilities of the (non-masked) tokens. + label_pad_token_id: The label pad token id. + is_encoder_decoder: Whether the model is an encoder-decoder model. + + Returns: + A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the + given logits. + """ + if logits.shape[:-1] != labels.shape: + raise ValueError("Logits (batch and sequence length dim) and labels must have the same shape.") + + if not is_encoder_decoder: + labels = labels[:, 1:].clone() + logits = logits[:, :-1, :] + loss_mask = labels != label_pad_token_id + + # dummy token; we'll ignore the losses on these tokens later + labels[labels == label_pad_token_id] = 0 + + per_token_logps = selective_log_softmax(logits, labels) + + if average_log_prob: + return (per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1) + else: + return (per_token_logps * loss_mask).sum(-1) + + def concatenated_forward( + self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]] + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Run the given model on the given batch of inputs, concatenating the chosen and rejected inputs together. + + We do this to avoid doing two forward passes, because it's faster for FSDP. + """ + concatenated_batch = self.concatenated_inputs( + batch, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + padding_value=self.padding_value, + device=self.accelerator.device, + ) + len_chosen = batch["chosen_labels"].shape[0] + + model_kwargs = ( + { + "decoder_input_ids": self._shift_right(concatenated_batch["concatenated_labels"]), + } + if self.is_encoder_decoder + else {} + ) + + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + outputs = model( + concatenated_batch["concatenated_input_ids"], + attention_mask=concatenated_batch["concatenated_attention_mask"], + use_cache=False, + **model_kwargs, + ) + all_logits = outputs.logits + + def cross_entropy_loss(logits, labels): + if not self.is_encoder_decoder: + # Shift so that tokens < n predict n + logits = logits[..., :-1, :].contiguous() + labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = nn.CrossEntropyLoss() + logits = logits.view(-1, logits.shape[-1]) + labels = labels.view(-1) + # Enable model parallelism + labels = labels.to(logits.device) + loss = loss_fct(logits, labels) + return loss + + labels = concatenated_batch["concatenated_labels"].clone() + + if self.cpo_alpha == 0: + nll_loss = torch.tensor(0.0).to(self.accelerator.device) + else: + nll_loss = cross_entropy_loss(all_logits[:len_chosen], labels[:len_chosen]) + + all_logps = self.get_batch_logps( + all_logits, + concatenated_batch["concatenated_labels"], + average_log_prob=self.loss_type in ["ipo", "simpo"], + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + + chosen_logps = all_logps[:len_chosen] + rejected_logps = all_logps[len_chosen:] + + chosen_logits = all_logits[:len_chosen] + rejected_logits = all_logits[len_chosen:] + + if self.aux_loss_enabled: + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, nll_loss, outputs.aux_loss) + + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, nll_loss) + + def get_batch_loss_metrics( + self, + model, + batch: dict[str, Union[list, torch.LongTensor]], + train_eval: Literal["train", "eval"] = "train", + ): + """Compute the CPO loss and other metrics for the given batch of inputs for train or test.""" + metrics = {} + + forward_output = self.concatenated_forward(model, batch) + ( + policy_chosen_logps, + policy_rejected_logps, + policy_chosen_logits, + policy_rejected_logits, + policy_nll_loss, + ) = forward_output[:5] + if self.aux_loss_enabled: + aux_loss = forward_output[5] + + losses, chosen_rewards, rejected_rewards = self.cpo_loss( + policy_chosen_logps, + policy_rejected_logps, + ) + + loss = losses.mean() + self.cpo_alpha * policy_nll_loss + reward_accuracies = (chosen_rewards > rejected_rewards).float() + + prefix = "eval_" if train_eval == "eval" else "" + metrics[f"{prefix}rewards/chosen"] = self.accelerator.gather_for_metrics(chosen_rewards).mean().item() + metrics[f"{prefix}rewards/rejected"] = self.accelerator.gather_for_metrics(rejected_rewards).mean().item() + metrics[f"{prefix}rewards/accuracies"] = self.accelerator.gather_for_metrics(reward_accuracies).mean().item() + metrics[f"{prefix}rewards/margins"] = ( + self.accelerator.gather_for_metrics(chosen_rewards - rejected_rewards).mean().item() + ) + metrics[f"{prefix}logps/rejected"] = ( + self.accelerator.gather_for_metrics(policy_rejected_logps).detach().mean().item() + ) + metrics[f"{prefix}logps/chosen"] = ( + self.accelerator.gather_for_metrics(policy_chosen_logps).detach().mean().item() + ) + metrics[f"{prefix}logits/rejected"] = ( + self.accelerator.gather_for_metrics(policy_rejected_logits.detach().mean()).mean().item() + ) + metrics[f"{prefix}logits/chosen"] = ( + self.accelerator.gather_for_metrics(policy_chosen_logits.detach().mean()).mean().item() + ) + metrics[f"{prefix}nll_loss"] = self.accelerator.gather_for_metrics(policy_nll_loss).detach().mean().item() + + if self.aux_loss_enabled: + loss += self.aux_loss_coef * aux_loss + + return loss, metrics + + def compute_loss( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + return_outputs=False, + num_items_in_batch=None, + ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, torch.Tensor]]]: + compute_loss_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with compute_loss_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + + # force log the metrics + self.store_metrics(metrics, train_eval="train") + + if return_outputs: + return (loss, metrics) + return loss + + def generate_from_model(self, model, batch: dict[str, torch.LongTensor]) -> str: + """Generate samples from the model and reference model for the given batch of inputs.""" + + # If one uses `generate_during_eval` with peft + bf16, we need to explicitly call generate with + # the torch amp context manager as some hidden states are silently casted to full precision. + generate_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with generate_context_manager: + policy_output = model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + + policy_output = pad_to_length(policy_output, self.max_length, self.processing_class.pad_token_id) + policy_output_decoded = self.processing_class.batch_decode(policy_output, skip_special_tokens=True) + + return policy_output_decoded + + def prediction_step( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[list[str]] = None, + ): + if ignore_keys is None: + if hasattr(model, "config"): + ignore_keys = getattr(model.config, "keys_to_ignore_at_inference", []) + else: + ignore_keys = [] + + prediction_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with torch.no_grad(), prediction_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="eval") + + # force log the metrics + self.store_metrics(metrics, train_eval="eval") + + if prediction_loss_only: + return (loss.detach(), None, None) + + # logits for the chosen and rejected samples from model + logits_dict = { + "eval_logits/chosen": metrics["eval_logits/chosen"], + "eval_logits/rejected": metrics["eval_logits/rejected"], + } + logits = [v for k, v in logits_dict.items() if k not in ignore_keys] + logits = torch.tensor(logits, device=self.accelerator.device) + labels = torch.zeros(logits.shape[0], device=self.accelerator.device) + + return (loss.detach(), logits, labels) + + def store_metrics(self, metrics: dict[str, float], train_eval: Literal["train", "eval"] = "train") -> None: + for key, value in metrics.items(): + self._stored_metrics[train_eval][key].append(value) + + def evaluation_loop( + self, + dataloader: DataLoader, + description: str, + prediction_loss_only: Optional[bool] = None, + ignore_keys: Optional[list[str]] = None, + metric_key_prefix: str = "eval", + ) -> EvalLoopOutput: + """ + Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by + `Trainer.evaluate()` and `Trainer.predict()`. + + Works both with or without labels. + """ + + # Sample and save to game log if requested (for one batch to save time) + if self.generate_during_eval: + # Generate random indices within the range of the total number of samples + num_samples = len(dataloader.dataset) + random_indices = random.sample(range(num_samples), k=self.args.eval_batch_size) + + # Use dataloader.dataset.select to get the random batch without iterating over the DataLoader + random_batch_dataset = dataloader.dataset.select(random_indices) + random_batch = self.data_collator(random_batch_dataset) + random_batch = self._prepare_inputs(random_batch) + + policy_output_decoded = self.generate_from_model(self.model, random_batch) + + table = pd.DataFrame( + columns=["Prompt", "Policy"], + data=[ + [prompt, pol[len(prompt) :]] for prompt, pol in zip(random_batch["prompt"], policy_output_decoded) + ], + ) + if "wandb" in self.args.report_to: + wandb.log({"game_log": wandb.Table(data=table)}) + + if "comet_ml" in self.args.report_to: + log_table_to_comet_experiment( + name="game_log.csv", + table=table, + ) + + # Base evaluation + initial_output = super().evaluation_loop( + dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix + ) + + return initial_output + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + """ + Log `logs` on the various objects watching training, including stored metrics. + + Args: + logs (`dict[str, float]`): + The values to log. + start_time (`float` or `None`, *optional*, defaults to `None`): + Start time of the training. + """ + # logs either has 'loss' or 'eval_loss' + train_eval = "train" if "loss" in logs else "eval" + # Add averaged stored metrics to logs + for key, metrics in self._stored_metrics[train_eval].items(): + logs[key] = torch.tensor(metrics).mean().item() + del self._stored_metrics[train_eval] + return super().log(logs, start_time) + + def _shift_right(self, input_ids): + if self.decoder_start_token_id is None: + raise ValueError( + "model.config.decoder_start_token_id has to be defined. It is usually set to the pad_token_id." + ) + + # shift inputs to the right + if is_torch_fx_proxy(input_ids): + # Item assignment is not supported natively for proxies. + shifted_input_ids = torch.full(input_ids.shape[:-1] + (1,), self.decoder_start_token_id) + shifted_input_ids = torch.cat([shifted_input_ids, input_ids[..., :-1]], dim=-1) + else: + shifted_input_ids = input_ids.new_zeros(input_ids.shape) + shifted_input_ids[..., 1:] = input_ids[..., :-1].clone() + shifted_input_ids[..., 0] = self.decoder_start_token_id + + if self.pad_token_id is None: + raise ValueError("model.config.pad_token_id has to be defined.") + # replace possible -100 values in labels by `pad_token_id` + shifted_input_ids.masked_fill_(shifted_input_ids == -100, self.pad_token_id) + + return shifted_input_ids + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @inproceedings{xu2024contrastive, + title = {{Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation}}, + author = {Haoran Xu and Amr Sharaf and Yunmo Chen and Weiting Tan and Lingfeng Shen and Benjamin Van Durme and Kenton Murray and Young Jin Kim}, + year = 2024, + booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, + publisher = {OpenReview.net}, + url = {https://openreview.net/forum?id=51iwkioZpn} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="CPO", + trainer_citation=citation, + paper_title="Contrastive Preference Optimization: Pushing the Boundaries of LLM Performance in Machine Translation", + paper_id="2401.08417", + ) + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothCPOTrainer(_UnslothCPOTrainer): + """ + +Initialize CPOTrainer. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + args (`CPOConfig`): + The CPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + + """ + def __init__( + self, + model = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + model_init = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + compute_metrics = None, + **kwargs + ): + if args is None: args = UnslothCPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('cpo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + model_init = model_init, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config, + compute_metrics = compute_metrics,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothDDPOTrainer.py b/unsloth_compiled_cache/UnslothDDPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..b4aae1277c953246c182ac42ee1384ba7291ffab --- /dev/null +++ b/unsloth_compiled_cache/UnslothDDPOTrainer.py @@ -0,0 +1,1081 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.ddpo_trainer import (Accelerator, Any, Callable, DDPOConfig, DDPOStableDiffusionPipeline, DDPOTrainer, Optional, Path, PerPromptStatTracker, ProjectConfiguration, PyTorchModelHubMixin, Union, defaultdict, futures, generate_model_card, get_comet_experiment_url, is_wandb_available, logger, logging, os, set_seed, textwrap, torch, warnings) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothDDPOConfig(DDPOConfig): + """ + +Configuration class for the [`DDPOTrainer`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + exp_name (`str`, *optional*, defaults to `os.path.basename(sys.argv[0])[: -len(".py")]`): + Name of this experiment (by default is the file name without the extension name). + run_name (`str`, *optional*, defaults to `""`): + Name of this run. + seed (`int`, *optional*, defaults to `0`): + Random seed. + log_with (`Literal["wandb", "tensorboard"]]` or `None`, *optional*, defaults to `None`): + Log with either 'wandb' or 'tensorboard', check + https://huggingface.co/docs/accelerate/usage_guides/tracking for more details. + tracker_kwargs (`Dict`, *optional*, defaults to `{}`): + Keyword arguments for the tracker (e.g. wandb_project). + accelerator_kwargs (`Dict`, *optional*, defaults to `{}`): + Keyword arguments for the accelerator. + project_kwargs (`Dict`, *optional*, defaults to `{}`): + Keyword arguments for the accelerator project config (e.g. `logging_dir`). + tracker_project_name (`str`, *optional*, defaults to `"trl"`): + Name of project to use for tracking. + logdir (`str`, *optional*, defaults to `"logs"`): + Top-level logging directory for checkpoint saving. + num_epochs (`int`, *optional*, defaults to `100`): + Number of epochs to train. + save_freq (`int`, *optional*, defaults to `1`): + Number of epochs between saving model checkpoints. + num_checkpoint_limit (`int`, *optional*, defaults to `5`): + Number of checkpoints to keep before overwriting old ones. + mixed_precision (`str`, *optional*, defaults to `"fp16"`): + Mixed precision training. + allow_tf32 (`bool`, *optional*, defaults to `True`): + Allow `tf32` on Ampere GPUs. + resume_from (`str`, *optional*, defaults to `""`): + Resume training from a checkpoint. + sample_num_steps (`int`, *optional*, defaults to `50`): + Number of sampler inference steps. + sample_eta (`float`, *optional*, defaults to `1.0`): + Eta parameter for the DDIM sampler. + sample_guidance_scale (`float`, *optional*, defaults to `5.0`): + Classifier-free guidance weight. + sample_batch_size (`int`, *optional*, defaults to `1`): + Batch size (per GPU) to use for sampling. + sample_num_batches_per_epoch (`int`, *optional*, defaults to `2`): + Number of batches to sample per epoch. + train_batch_size (`int`, *optional*, defaults to `1`): + Batch size (per GPU) to use for training. + train_use_8bit_adam (`bool`, *optional*, defaults to `False`): + Use 8bit Adam optimizer from bitsandbytes. + train_learning_rate (`float`, *optional*, defaults to `3e-4`): + Learning rate. + train_adam_beta1 (`float`, *optional*, defaults to `0.9`): + Adam beta1. + train_adam_beta2 (`float`, *optional*, defaults to `0.999`): + Adam beta2. + train_adam_weight_decay (`float`, *optional*, defaults to `1e-4`): + Adam weight decay. + train_adam_epsilon (`float`, *optional*, defaults to `1e-8`): + Adam epsilon. + train_gradient_accumulation_steps (`int`, *optional*, defaults to `1`): + Number of gradient accumulation steps. + train_max_grad_norm (`float`, *optional*, defaults to `1.0`): + Maximum gradient norm for gradient clipping. + train_num_inner_epochs (`int`, *optional*, defaults to `1`): + Number of inner epochs per outer epoch. + train_cfg (`bool`, *optional*, defaults to `True`): + Whether to use classifier-free guidance during training. + train_adv_clip_max (`float`, *optional*, defaults to `5.0`): + Clip advantages to the range. + train_clip_range (`float`, *optional*, defaults to `1e-4`): + PPO clip range. + train_timestep_fraction (`float`, *optional*, defaults to `1.0`): + Fraction of timesteps to train on. + per_prompt_stat_tracking (`bool`, *optional*, defaults to `False`): + Whether to track statistics for each prompt separately. + per_prompt_stat_tracking_buffer_size (`int`, *optional*, defaults to `16`): + Number of reward values to store in the buffer for each prompt. + per_prompt_stat_tracking_min_count (`int`, *optional*, defaults to `16`): + Minimum number of reward values to store in the buffer. + async_reward_computation (`bool`, *optional*, defaults to `False`): + Whether to compute rewards asynchronously. + max_workers (`int`, *optional*, defaults to `2`): + Maximum number of workers to use for async reward computation. + negative_prompts (`str`, *optional*, defaults to `""`): + Comma-separated list of prompts to use as negative examples. + push_to_hub (`bool`, *optional*, defaults to `False`): + Whether to push the final model checkpoint to the Hub. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + + def __init__( + self, + exp_name = 'train_grpo', + run_name = '', + seed = 3407, + log_with = None, + tracker_project_name = 'trl', + logdir = 'logs', + num_epochs = 100, + save_freq = 1, + num_checkpoint_limit = 5, + mixed_precision = 'fp16', + allow_tf32 = True, + resume_from = '', + sample_num_steps = 50, + sample_eta = 1.0, + sample_guidance_scale = 5.0, + sample_batch_size = 1, + sample_num_batches_per_epoch = 2, + train_batch_size = 1, + train_use_8bit_adam = False, + train_learning_rate = 5e-05, + train_adam_beta1 = 0.9, + train_adam_beta2 = 0.999, + train_adam_weight_decay = 0.01, + train_adam_epsilon = 1e-08, + train_gradient_accumulation_steps = 2, + train_max_grad_norm = 1.0, + train_num_inner_epochs = 1, + train_cfg = True, + train_adv_clip_max = 5.0, + train_clip_range = 0.0001, + train_timestep_fraction = 1.0, + per_prompt_stat_tracking = False, + per_prompt_stat_tracking_buffer_size = 16, + per_prompt_stat_tracking_min_count = 16, + async_reward_computation = False, + max_workers = 2, + negative_prompts = '', + push_to_hub = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + + super().__init__( + exp_name = exp_name, + run_name = run_name, + seed = seed, + log_with = log_with, + tracker_project_name = tracker_project_name, + logdir = logdir, + num_epochs = num_epochs, + save_freq = save_freq, + num_checkpoint_limit = num_checkpoint_limit, + mixed_precision = mixed_precision, + allow_tf32 = allow_tf32, + resume_from = resume_from, + sample_num_steps = sample_num_steps, + sample_eta = sample_eta, + sample_guidance_scale = sample_guidance_scale, + sample_batch_size = sample_batch_size, + sample_num_batches_per_epoch = sample_num_batches_per_epoch, + train_batch_size = train_batch_size, + train_use_8bit_adam = train_use_8bit_adam, + train_learning_rate = train_learning_rate, + train_adam_beta1 = train_adam_beta1, + train_adam_beta2 = train_adam_beta2, + train_adam_weight_decay = train_adam_weight_decay, + train_adam_epsilon = train_adam_epsilon, + train_gradient_accumulation_steps = train_gradient_accumulation_steps, + train_max_grad_norm = train_max_grad_norm, + train_num_inner_epochs = train_num_inner_epochs, + train_cfg = train_cfg, + train_adv_clip_max = train_adv_clip_max, + train_clip_range = train_clip_range, + train_timestep_fraction = train_timestep_fraction, + per_prompt_stat_tracking = per_prompt_stat_tracking, + per_prompt_stat_tracking_buffer_size = per_prompt_stat_tracking_buffer_size, + per_prompt_stat_tracking_min_count = per_prompt_stat_tracking_min_count, + async_reward_computation = async_reward_computation, + max_workers = max_workers, + negative_prompts = negative_prompts, + push_to_hub = push_to_hub,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + +pass + +class _UnslothDDPOTrainer(PyTorchModelHubMixin): + """ + The DDPOTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is heavily + inspired by the work here: https://github.com/kvablack/ddpo-pytorch As of now only Stable Diffusion based pipelines + are supported + + Args: + config ([`DDPOConfig`]): + Configuration object for DDPOTrainer. Check the documentation of [`PPOConfig`] for more details. + reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`): + Reward function to be used. + prompt_function (`Callable[[], tuple[str, Any]]`): Function to generate prompts to guide model + sd_pipeline ([`DDPOStableDiffusionPipeline`]): Stable Diffusion pipeline to be used for training. + image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`): Hook to be called to log images. + """ + + _tag_names = ["trl", "ddpo"] + + def __init__( + self, + config: DDPOConfig, + reward_function: Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor], + prompt_function: Callable[[], tuple[str, Any]], + sd_pipeline: DDPOStableDiffusionPipeline, + image_samples_hook: Optional[Callable[[Any, Any, Any], Any]] = None, + ): + warnings.warn( + "DDPOTrainer is deprecated and will be removed in version 0.23.0.", + DeprecationWarning, + ) + if image_samples_hook is None: + logger.warning("No image_samples_hook provided; no images will be logged") + + self.prompt_fn = prompt_function + self.reward_fn = reward_function + self.config = config + self.image_samples_callback = image_samples_hook + + accelerator_project_config = ProjectConfiguration(**self.config.project_kwargs) + + if self.config.resume_from: + self.config.resume_from = os.path.normpath(os.path.expanduser(self.config.resume_from)) + if "checkpoint_" not in os.path.basename(self.config.resume_from): + # get the most recent checkpoint in this directory + checkpoints = list( + filter( + lambda x: "checkpoint_" in x, + os.listdir(self.config.resume_from), + ) + ) + if len(checkpoints) == 0: + raise ValueError(f"No checkpoints found in {self.config.resume_from}") + checkpoint_numbers = sorted([int(x.split("_")[-1]) for x in checkpoints]) + self.config.resume_from = os.path.join( + self.config.resume_from, + f"checkpoint_{checkpoint_numbers[-1]}", + ) + + accelerator_project_config.iteration = checkpoint_numbers[-1] + 1 + + # number of timesteps within each trajectory to train on + self.num_train_timesteps = int(self.config.sample_num_steps * self.config.train_timestep_fraction) + + self.accelerator = Accelerator( + log_with=self.config.log_with, + mixed_precision=self.config.mixed_precision, + project_config=accelerator_project_config, + # we always accumulate gradients across timesteps; we want config.train.gradient_accumulation_steps to be the + # number of *samples* we accumulate across, so we need to multiply by the number of training timesteps to get + # the total number of optimizer steps to accumulate across. + gradient_accumulation_steps=self.config.train_gradient_accumulation_steps * self.num_train_timesteps, + **self.config.accelerator_kwargs, + ) + + is_okay, message = self._config_check() + if not is_okay: + raise ValueError(message) + + is_using_tensorboard = config.log_with is not None and config.log_with == "tensorboard" + + if self.accelerator.is_main_process: + self.accelerator.init_trackers( + self.config.tracker_project_name, + config=dict(ddpo_trainer_config=config.to_dict()) if not is_using_tensorboard else config.to_dict(), + init_kwargs=self.config.tracker_kwargs, + ) + + logger.info(f"\n{config}") + + set_seed(self.config.seed, device_specific=True) + + self.sd_pipeline = sd_pipeline + + self.sd_pipeline.set_progress_bar_config( + position=1, + disable=not self.accelerator.is_local_main_process, + leave=False, + desc="Timestep", + dynamic_ncols=True, + ) + + # For mixed precision training we cast all non-trainable weights [vae, non-lora text_encoder and non-lora unet] to half-precision + # as these weights are only used for inference, keeping weights in full precision is not required. + if self.accelerator.mixed_precision == "fp16": + inference_dtype = torch.float16 + elif self.accelerator.mixed_precision == "bf16": + inference_dtype = torch.bfloat16 + else: + inference_dtype = torch.float32 + + self.sd_pipeline.vae.to(self.accelerator.device, dtype=inference_dtype) + self.sd_pipeline.text_encoder.to(self.accelerator.device, dtype=inference_dtype) + self.sd_pipeline.unet.to(self.accelerator.device, dtype=inference_dtype) + + trainable_layers = self.sd_pipeline.get_trainable_layers() + + self.accelerator.register_save_state_pre_hook(self._save_model_hook) + self.accelerator.register_load_state_pre_hook(self._load_model_hook) + + # Enable TF32 for faster training on Ampere GPUs, + # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices + if self.config.allow_tf32 and torch.cuda.is_available(): + torch.backends.cuda.matmul.allow_tf32 = True + + self.optimizer = self._setup_optimizer( + trainable_layers.parameters() if not isinstance(trainable_layers, list) else trainable_layers + ) + + self.neg_prompt_embed = self.sd_pipeline.text_encoder( + self.sd_pipeline.tokenizer( + [""] if self.config.negative_prompts is None else self.config.negative_prompts, + return_tensors="pt", + padding="max_length", + truncation=True, + max_length=self.sd_pipeline.tokenizer.model_max_length, + ).input_ids.to(self.accelerator.device) + )[0] + + if config.per_prompt_stat_tracking: + self.stat_tracker = PerPromptStatTracker( + config.per_prompt_stat_tracking_buffer_size, + config.per_prompt_stat_tracking_min_count, + ) + + # NOTE: for some reason, autocast is necessary for non-lora training but for lora training it isn't necessary and it uses + # more memory + self.autocast = self.sd_pipeline.autocast or self.accelerator.autocast + + if hasattr(self.sd_pipeline, "use_lora") and self.sd_pipeline.use_lora: + unet, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer) + self.trainable_layers = list(filter(lambda p: p.requires_grad, unet.parameters())) + else: + self.trainable_layers, self.optimizer = self.accelerator.prepare(trainable_layers, self.optimizer) + + if self.config.async_reward_computation: + self.executor = futures.ThreadPoolExecutor(max_workers=config.max_workers) + + if config.resume_from: + logger.info(f"Resuming from {config.resume_from}") + self.accelerator.load_state(config.resume_from) + self.first_epoch = int(config.resume_from.split("_")[-1]) + 1 + else: + self.first_epoch = 0 + + def compute_rewards(self, prompt_image_pairs, is_async=False): + if not is_async: + rewards = [] + for images, prompts, prompt_metadata in prompt_image_pairs: + reward, reward_metadata = self.reward_fn(images, prompts, prompt_metadata) + rewards.append( + ( + torch.as_tensor(reward, device=self.accelerator.device), + reward_metadata, + ) + ) + else: + rewards = self.executor.map(lambda x: self.reward_fn(*x), prompt_image_pairs) + rewards = [ + (torch.as_tensor(reward.result(), device=self.accelerator.device), reward_metadata.result()) + for reward, reward_metadata in rewards + ] + + return zip(*rewards) + + def step(self, epoch: int, global_step: int): + """ + Perform a single step of training. + + Args: + epoch (int): The current epoch. + global_step (int): The current global step. + + Side Effects: + - Model weights are updated + - Logs the statistics to the accelerator trackers. + - If `self.image_samples_callback` is not None, it will be called with the prompt_image_pairs, global_step, + and the accelerator tracker. + + Returns: + global_step (int): The updated global step. + + """ + samples, prompt_image_data = self._generate_samples( + iterations=self.config.sample_num_batches_per_epoch, + batch_size=self.config.sample_batch_size, + ) + + # collate samples into dict where each entry has shape (num_batches_per_epoch * sample.batch_size, ...) + samples = {k: torch.cat([s[k] for s in samples]) for k in samples[0].keys()} + rewards, rewards_metadata = self.compute_rewards( + prompt_image_data, is_async=self.config.async_reward_computation + ) + + for i, image_data in enumerate(prompt_image_data): + image_data.extend([rewards[i], rewards_metadata[i]]) + + if self.image_samples_callback is not None: + self.image_samples_callback(prompt_image_data, global_step, self.accelerator.trackers[0]) + + rewards = torch.cat(rewards) + rewards = self.accelerator.gather(rewards).cpu().numpy() + + self.accelerator.log( + { + "reward": rewards, + "epoch": epoch, + "reward_mean": rewards.mean(), + "reward_std": rewards.std(), + }, + step=global_step, + ) + + if self.config.per_prompt_stat_tracking: + # gather the prompts across processes + prompt_ids = self.accelerator.gather(samples["prompt_ids"]).cpu().numpy() + prompts = self.sd_pipeline.tokenizer.batch_decode(prompt_ids, skip_special_tokens=True) + advantages = self.stat_tracker.update(prompts, rewards) + else: + advantages = (rewards - rewards.mean()) / (rewards.std() + 1e-8) + + # ungather advantages; keep the entries corresponding to the samples on this process + samples["advantages"] = ( + torch.as_tensor(advantages) + .reshape(self.accelerator.num_processes, -1)[self.accelerator.process_index] + .to(self.accelerator.device) + ) + + del samples["prompt_ids"] + + total_batch_size, num_timesteps = samples["timesteps"].shape + + for inner_epoch in range(self.config.train_num_inner_epochs): + # shuffle samples along batch dimension + perm = torch.randperm(total_batch_size, device=self.accelerator.device) + samples = {k: v[perm] for k, v in samples.items()} + + # shuffle along time dimension independently for each sample + # still trying to understand the code below + perms = torch.stack( + [torch.randperm(num_timesteps, device=self.accelerator.device) for _ in range(total_batch_size)] + ) + + for key in ["timesteps", "latents", "next_latents", "log_probs"]: + samples[key] = samples[key][ + torch.arange(total_batch_size, device=self.accelerator.device)[:, None], + perms, + ] + + original_keys = samples.keys() + original_values = samples.values() + # rebatch them as user defined train_batch_size is different from sample_batch_size + reshaped_values = [v.reshape(-1, self.config.train_batch_size, *v.shape[1:]) for v in original_values] + + # Transpose the list of original values + transposed_values = zip(*reshaped_values) + # Create new dictionaries for each row of transposed values + samples_batched = [dict(zip(original_keys, row_values)) for row_values in transposed_values] + + self.sd_pipeline.unet.train() + global_step = self._train_batched_samples(inner_epoch, epoch, global_step, samples_batched) + # ensure optimization step at the end of the inner epoch + if not self.accelerator.sync_gradients: + raise ValueError( + "Optimization step should have been performed by this point. Please check calculated gradient accumulation settings." + ) + + if epoch != 0 and epoch % self.config.save_freq == 0 and self.accelerator.is_main_process: + self.accelerator.save_state() + + return global_step + + def calculate_loss(self, latents, timesteps, next_latents, log_probs, advantages, embeds): + """ + Calculate the loss for a batch of an unpacked sample + + Args: + latents (torch.Tensor): + The latents sampled from the diffusion model, shape: [batch_size, num_channels_latents, height, width] + timesteps (torch.Tensor): + The timesteps sampled from the diffusion model, shape: [batch_size] + next_latents (torch.Tensor): + The next latents sampled from the diffusion model, shape: [batch_size, num_channels_latents, height, + width] + log_probs (torch.Tensor): + The log probabilities of the latents, shape: [batch_size] + advantages (torch.Tensor): + The advantages of the latents, shape: [batch_size] + embeds (torch.Tensor): + The embeddings of the prompts, shape: [2*batch_size or batch_size, ...] Note: the "or" is because if + train_cfg is True, the expectation is that negative prompts are concatenated to the embeds + + Returns: + loss (torch.Tensor), approx_kl (torch.Tensor), clipfrac (torch.Tensor) (all of these are of shape (1,)) + """ + with self.autocast(): + if self.config.train_cfg: + noise_pred = self.sd_pipeline.unet( + torch.cat([latents] * 2), + torch.cat([timesteps] * 2), + embeds, + ).sample + noise_pred_uncond, noise_pred_text = noise_pred.chunk(2) + noise_pred = noise_pred_uncond + self.config.sample_guidance_scale * ( + noise_pred_text - noise_pred_uncond + ) + else: + noise_pred = self.sd_pipeline.unet( + latents, + timesteps, + embeds, + ).sample + # compute the log prob of next_latents given latents under the current model + + scheduler_step_output = self.sd_pipeline.scheduler_step( + noise_pred, + timesteps, + latents, + eta=self.config.sample_eta, + prev_sample=next_latents, + ) + + log_prob = scheduler_step_output.log_probs + + advantages = torch.clamp( + advantages, + -self.config.train_adv_clip_max, + self.config.train_adv_clip_max, + ) + + ratio = torch.exp(log_prob - log_probs) + + loss = self.loss(advantages, self.config.train_clip_range, ratio) + + approx_kl = 0.5 * torch.mean((log_prob - log_probs) ** 2) + + clipfrac = torch.mean((torch.abs(ratio - 1.0) > self.config.train_clip_range).float()) + + return loss, approx_kl, clipfrac + + def loss( + self, + advantages: torch.Tensor, + clip_range: float, + ratio: torch.Tensor, + ): + unclipped_loss = -advantages * ratio + clipped_loss = -advantages * torch.clamp( + ratio, + 1.0 - clip_range, + 1.0 + clip_range, + ) + return torch.mean(torch.maximum(unclipped_loss, clipped_loss)) + + def _setup_optimizer(self, trainable_layers_parameters): + if self.config.train_use_8bit_adam: + import bitsandbytes + + optimizer_cls = bitsandbytes.optim.AdamW8bit + else: + optimizer_cls = torch.optim.AdamW + + return optimizer_cls( + trainable_layers_parameters, + lr=self.config.train_learning_rate, + betas=(self.config.train_adam_beta1, self.config.train_adam_beta2), + weight_decay=self.config.train_adam_weight_decay, + eps=self.config.train_adam_epsilon, + ) + + def _save_model_hook(self, models, weights, output_dir): + self.sd_pipeline.save_checkpoint(models, weights, output_dir) + weights.pop() # ensures that accelerate doesn't try to handle saving of the model + + def _load_model_hook(self, models, input_dir): + self.sd_pipeline.load_checkpoint(models, input_dir) + models.pop() # ensures that accelerate doesn't try to handle loading of the model + + def _generate_samples(self, iterations, batch_size): + """ + Generate samples from the model + + Args: + iterations (int): Number of iterations to generate samples for + batch_size (int): Batch size to use for sampling + + Returns: + samples (list[dict[str, torch.Tensor]]), prompt_image_pairs (list[list[Any]]) + """ + samples = [] + prompt_image_pairs = [] + self.sd_pipeline.unet.eval() + + sample_neg_prompt_embeds = self.neg_prompt_embed.repeat(batch_size, 1, 1) + + for _ in range(iterations): + prompts, prompt_metadata = zip(*[self.prompt_fn() for _ in range(batch_size)]) + + prompt_ids = self.sd_pipeline.tokenizer( + prompts, + return_tensors="pt", + padding="max_length", + truncation=True, + max_length=self.sd_pipeline.tokenizer.model_max_length, + ).input_ids.to(self.accelerator.device) + prompt_embeds = self.sd_pipeline.text_encoder(prompt_ids)[0] + + with self.autocast(): + sd_output = self.sd_pipeline( + prompt_embeds=prompt_embeds, + negative_prompt_embeds=sample_neg_prompt_embeds, + num_inference_steps=self.config.sample_num_steps, + guidance_scale=self.config.sample_guidance_scale, + eta=self.config.sample_eta, + output_type="pt", + ) + + images = sd_output.images + latents = sd_output.latents + log_probs = sd_output.log_probs + + latents = torch.stack(latents, dim=1) # (batch_size, num_steps + 1, ...) + log_probs = torch.stack(log_probs, dim=1) # (batch_size, num_steps, 1) + timesteps = self.sd_pipeline.scheduler.timesteps.repeat(batch_size, 1) # (batch_size, num_steps) + + samples.append( + { + "prompt_ids": prompt_ids, + "prompt_embeds": prompt_embeds, + "timesteps": timesteps, + "latents": latents[:, :-1], # each entry is the latent before timestep t + "next_latents": latents[:, 1:], # each entry is the latent after timestep t + "log_probs": log_probs, + "negative_prompt_embeds": sample_neg_prompt_embeds, + } + ) + prompt_image_pairs.append([images, prompts, prompt_metadata]) + + return samples, prompt_image_pairs + + def _train_batched_samples(self, inner_epoch, epoch, global_step, batched_samples): + """ + Train on a batch of samples. Main training segment + + Args: + inner_epoch (int): The current inner epoch + epoch (int): The current epoch + global_step (int): The current global step + batched_samples (list[dict[str, torch.Tensor]]): The batched samples to train on + + Side Effects: + - Model weights are updated + - Logs the statistics to the accelerator trackers. + + Returns: + global_step (int): The updated global step + """ + info = defaultdict(list) + for _i, sample in enumerate(batched_samples): + if self.config.train_cfg: + # concat negative prompts to sample prompts to avoid two forward passes + embeds = torch.cat([sample["negative_prompt_embeds"], sample["prompt_embeds"]]) + else: + embeds = sample["prompt_embeds"] + + for j in range(self.num_train_timesteps): + with self.accelerator.accumulate(self.sd_pipeline.unet): + loss, approx_kl, clipfrac = self.calculate_loss( + sample["latents"][:, j], + sample["timesteps"][:, j], + sample["next_latents"][:, j], + sample["log_probs"][:, j], + sample["advantages"], + embeds, + ) + info["approx_kl"].append(approx_kl) + info["clipfrac"].append(clipfrac) + info["loss"].append(loss) + + self.accelerator.backward(loss) + if self.accelerator.sync_gradients: + self.accelerator.clip_grad_norm_( + self.trainable_layers.parameters() + if not isinstance(self.trainable_layers, list) + else self.trainable_layers, + self.config.train_max_grad_norm, + ) + self.optimizer.step() + self.optimizer.zero_grad() + + # Checks if the accelerator has performed an optimization step behind the scenes + if self.accelerator.sync_gradients: + # log training-related stuff + info = {k: torch.mean(torch.stack(v)) for k, v in info.items()} + info = self.accelerator.reduce(info, reduction="mean") + info.update({"epoch": epoch, "inner_epoch": inner_epoch}) + self.accelerator.log(info, step=global_step) + global_step += 1 + info = defaultdict(list) + return global_step + + def _config_check(self) -> tuple[bool, str]: + samples_per_epoch = ( + self.config.sample_batch_size * self.accelerator.num_processes * self.config.sample_num_batches_per_epoch + ) + total_train_batch_size = ( + self.config.train_batch_size + * self.accelerator.num_processes + * self.config.train_gradient_accumulation_steps + ) + + if not self.config.sample_batch_size >= self.config.train_batch_size: + return ( + False, + f"Sample batch size ({self.config.sample_batch_size}) must be greater than or equal to the train batch size ({self.config.train_batch_size})", + ) + if not self.config.sample_batch_size % self.config.train_batch_size == 0: + return ( + False, + f"Sample batch size ({self.config.sample_batch_size}) must be divisible by the train batch size ({self.config.train_batch_size})", + ) + if not samples_per_epoch % total_train_batch_size == 0: + return ( + False, + f"Number of samples per epoch ({samples_per_epoch}) must be divisible by the total train batch size ({total_train_batch_size})", + ) + return True, "" + + def train(self, epochs: Optional[int] = None): + """ + Train the model for a given number of epochs + """ + global_step = 0 + if epochs is None: + epochs = self.config.num_epochs + for epoch in range(self.first_epoch, epochs): + global_step = self.step(epoch, global_step) + + def _save_pretrained(self, save_directory): + self.sd_pipeline.save_pretrained(save_directory) + self.create_model_card() + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @inproceedings{black2024training, + title = {{Training Diffusion Models with Reinforcement Learning}}, + author = {Kevin Black and Michael Janner and Yilun Du and Ilya Kostrikov and Sergey Levine}, + year = 2024, + booktitle = {The Twelfth International Conference on Learning Representations, {ICLR} 2024, Vienna, Austria, May 7-11, 2024}, + publisher = {OpenReview.net}, + url = {https://openreview.net/forum?id=YCWjhGrJFD}, + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="DDPO", + trainer_citation=citation, + paper_title="Training Diffusion Models with Reinforcement Learning", + paper_id="2305.13301", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothDDPOTrainer(_UnslothDDPOTrainer): + """ + +The DDPOTrainer uses Deep Diffusion Policy Optimization to optimise diffusion models. Note, this trainer is heavily +inspired by the work here: https://github.com/kvablack/ddpo-pytorch As of now only Stable Diffusion based pipelines +are supported + +Args: + config ([`DDPOConfig`]): + Configuration object for DDPOTrainer. Check the documentation of [`PPOConfig`] for more details. + reward_function (`Callable[[torch.Tensor, tuple[str], tuple[Any]], torch.Tensor]`): + Reward function to be used. + prompt_function (`Callable[[], tuple[str, Any]]`): Function to generate prompts to guide model + sd_pipeline ([`DDPOStableDiffusionPipeline`]): Stable Diffusion pipeline to be used for training. + image_samples_hook (`Optional[Callable[[Any, Any, Any], Any]]`): Hook to be called to log images. + + """ + def __init__( + self, + config, + reward_function, + prompt_function, + sd_pipeline, + image_samples_hook = None, + **kwargs + ): + if args is None: args = UnslothDDPOConfig() + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('ddpo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + config = config, + reward_function = reward_function, + prompt_function = prompt_function, + sd_pipeline = sd_pipeline, + image_samples_hook = image_samples_hook,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothDPOTrainer.py b/unsloth_compiled_cache/UnslothDPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..330d0c2097e5e078dfec9ae8b5665697f9f73be4 --- /dev/null +++ b/unsloth_compiled_cache/UnslothDPOTrainer.py @@ -0,0 +1,2796 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.dpo_trainer import (Any, AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, DPOConfig, DPOTrainer, DataCollator, DataCollatorForPreference, DataLoader, Dataset, EvalLoopOutput, F, FDivergenceConstants, FDivergenceType, FeatureExtractionMixin, IterableDataset, Literal, MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, Optional, PartialState, Path, PeftConfig, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, RunningMoments, SyncRefModelCallback, Trainer, TrainerCallback, Union, autocast, cap_exp, contextmanager, create_reference_model, dataclass, defaultdict, disable_dropout_in_model, empty_cache, flush_left, flush_right, generate_model_card, get_comet_experiment_url, get_peft_model, inspect, is_comet_available, is_liger_kernel_available, is_mlflow_available, is_peft_available, is_wandb_available, log_table_to_comet_experiment, logger, logging, maybe_apply_chat_template, maybe_extract_prompt, nn, nullcontext, os, pad, pad_to_length, pd, peft_module_casting_to_bf16, prepare_deepspeed, prepare_fsdp, prepare_model_for_kbit_training, random, selective_log_softmax, shift_tokens_right, textwrap, torch, tqdm, F, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothDPOConfig(DPOConfig): + """ + +Configuration class for the [`DPOTrainer`]. + +This class includes only the parameters that are specific to DPO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + > Parameters that control the model and reference model + + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `model` argument of the + [`DPOTrainer`] is provided as a string. + ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `ref_model` argument of the + [`DPOTrainer`] is provided as a string. + model_adapter_name (`str` or `None`, *optional*, defaults to `None`): + Name of the train target PEFT adapter, when using LoRA with multiple adapters. + ref_adapter_name (`str` or `None`, *optional*, defaults to `None`): + Name of the reference PEFT adapter, when using LoRA with multiple adapters. + force_use_ref_model (`bool`, *optional*, defaults to `False`): + If you provide a PEFT model as the active model and wish to use a different model for the `ref_model`, set + this flag to `True`. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model and reference model. + use_logits_to_keep (`bool`, *optional*, defaults to `False`): + If `True`, only a specified number of logits are computed in the forward pass. This can be useful for + saving memory and speeding up training by not computing the logits for all tokens, especially in scenarios + when working with very long prompts where labels are ignored (-100). + + > Parameters that control the data preprocessing + + dataset_num_proc (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + padding_value (`int` or `None`, *optional*, defaults to `None`): + Padding value to use. If `None`, the padding value of the tokenizer is used. + label_pad_token_id (`int`, *optional*, defaults to `-100`): + Padding value to use for labels. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. + max_completion_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the completion. + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the full sequence (prompt + completion). + truncation_mode (`str`, *optional*, defaults to `"keep_end"`): + Truncation mode to use when the sequence exceeds `max_length`. Possible values are `"keep_end"` and + `"keep_start"`. + padding_free (`bool`, *optional*, defaults to `False`): + Whether to perform forward passes without padding by flattening all sequences in the batch into a single + continuous sequence. This reduces memory usage by eliminating padding overhead. Currently, this is only + supported with the `flash_attention_2` attention implementation, which can efficiently handle the flattened + batch structure. + precompute_ref_log_probs (`bool`, *optional*, defaults to `False`): + Whether to precompute the log probabilities from the reference model. Setting this to `True` allows + training without needing the reference model during training, which can help reduce GPU memory usage. If + set to `False` (default), the reference model will be used during training to compute log probabilities + on-the-fly. + precompute_ref_batch_size (`int` or `None`, *optional*, defaults to `None`): + Batch size to use when precomputing reference model log probabilities. This can be set higher than the + training batch size to speed up preprocessing. If `None`, defaults to `per_device_train_batch_size` for + training and `per_device_eval_batch_size` for evaluation. + tools (`Optional[list[Union[dict, Callable]]]`, *optional*, defaults to `None`): + List of tools (callable functions) that will be accessible to the model. If the template does not support + function calling, this argument will have no effect. + + > Parameters that control the training + + loss_type (`str` or `list[str]`, *optional*, defaults to `"sigmoid"`): + Type of loss to use. Possible values are: + + - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper. + - `"hinge"`: hinge loss on the normalized likelihood from the + [SLiC](https://huggingface.co/papers/2305.10425) paper. + - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper. + - `"exo_pair"`: pairwise EXO loss from the [EXO](https://huggingface.co/papers/2402.00856) paper. + - `"nca_pair"`: pairwise NCA loss from the [NCA](https://huggingface.co/papers/2402.05369) paper. + - `"robust"`: unbiased estimate of the DPO loss that is robust to preference noise from the [Robust + DPO](https://huggingface.co/papers/2403.00409) paper. + - `"bco_pair"`: pairwise BCO loss from the [BCO](https://huggingface.co/papers/2404.04656) paper. + - `"sppo_hard"`: SPPO loss with hard label from the [SPPO](https://huggingface.co/papers/2405.00675) + paper. + - `"aot"`: AOT loss for paired datasets from the [AOT](https://huggingface.co/papers/2406.05882) paper. + - `"aot_pair"`: AOT loss for unpaired datasets from the [AOT](https://huggingface.co/papers/2406.05882) + paper. + - `"discopop"`: DiscoPOP (a.k.a Log-Ratio Modulated Loss, LRML) loss from the + [DiscoPOP](https://huggingface.co/papers/2406.08414) paper. + - `"apo_zero"`: APO-zero loss from the [APO](https://huggingface.co/papers/2408.06266) paper. + - `"apo_down"`: APO-down loss from the [APO](https://huggingface.co/papers/2408.06266) paper. + - `"sft"`: Negative log-likelihood loss (standard supervised fine-tuning loss). + + Multiple loss types can be combined using comma separation (e.g., `["sigmoid", "bco_pair", "sft"]` for + [MPO](https://huggingface.co/papers/2411.10442)). The `loss_weights` parameter can be used to specify + corresponding weights for each loss type. + + use_liger_loss (`bool`, *optional*, defaults to `False`): + Whether to use Liger loss. + base_model_attribute_name (`str`, *optional*, defaults to `"model"`): + Name of the attribute in the model that contains the base model. This is used to get the base model from + the model when the model does not have a `get_decoder` method in the case when `use_liger_loss` is `True`. + beta (`float`, *optional*, defaults to `0.1`): + Parameter controlling the deviation from the reference model. Higher β means less deviation from the + reference model. For the IPO loss (`loss_type="ipo"`), β is the regularization parameter denoted by τ in + the [paper](https://huggingface.co/papers/2310.12036). + f_divergence_type (`str`, *optional*, defaults to `FDivergenceType.REVERSE_KL`): + Type of f-divergence regularization function to compute divergence between policy and reference model. + f_alpha_divergence_coef (`float`, *optional*, defaults to `1.0`): + α coefficient in the α-divergence u^-α regularization function for DPO loss. + reference_free (`bool`, *optional*, defaults to `False`): + Whether to ignore the provided reference model and implicitly use a reference model that assigns equal + probability to all responses. + label_smoothing (`float`, *optional*, defaults to `0.0`): + Robust DPO label smoothing parameter from the [cDPO report](https://ericmitchell.ai/cdpo.pdf) and [Robust + DPO](https://huggingface.co/papers/2403.00409) paper that should be between `0.0` and `0.5`. + use_weighting (`bool`, *optional*, defaults to `False`): + Whether to weight the loss as done in the [WPO paper](https://huggingface.co/papers/2406.11827). + rpo_alpha (`float`, *optional*, defaults to `None`): + α parameter from the [RPO paper](https://huggingface.co/papers/2404.19733) (v3), which controls the + weighting of the NLL term in the loss. If `None`, no weighting is applied and the loss is the same as the + DPO loss. The paper recommends `rpo_alpha=1.0`. + ld_alpha (`float` or `None`, *optional*, defaults to `None`): + α parameter from the [LD-DPO paper](https://huggingface.co/papers/2409.06411), which controls the weighting + of the verbose token log-probabilities in responses. If `None`, no weighting is applied to the verbose + part, and the loss is equivalent to the standard DPO loss. The paper recommends setting `ld_alpha` between + `0.0` and `1.0`. + discopop_tau (`float`, *optional*, defaults to `0.05`): + τ/temperature parameter from the [DiscoPOP](https://huggingface.co/papers/2406.08414) paper, which controls + the shape of log ratio modulated loss. The paper recommends the default value `discopop_tau=0.05`. + loss_weights (`list[float]` or `None`, *optional*, defaults to `None`): + List of loss weights for multi-loss combinations. Used when combining multiple loss types. Example: `[0.8, + 0.2, 1.0]` for [MPO](https://huggingface.co/papers/2411.10442). If not provided, defaults to equal weights + (`1.0`) for all loss types. + sync_ref_model (`bool`, *optional*, defaults to `False`): + Whether to synchronize the reference model with the active model every `ref_model_sync_steps` steps, using + the `ref_model_mixup_alpha` parameter. This synchronization originates from the + [TR-DPO](https://huggingface.co/papers/2404.09656) paper. + ref_model_mixup_alpha (`float`, *optional*, defaults to `0.6`): + α parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which controls the mix + between the current policy and the previous reference policy during updates. The reference policy is + updated according to the equation: `π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you + must set `sync_ref_model=True`. + ref_model_sync_steps (`int`, *optional*, defaults to `512`): + τ parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which determines how + frequently the current policy is synchronized with the reference policy. To use this parameter, you must + set `sync_ref_model=True`. + + > Parameters that control the logging + + generate_during_eval (`bool`, *optional*, defaults to `False`): + Whether to generate and log completions from both the model and the reference model to W&B or Comet during + evaluation. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + model_init_kwargs = None, + ref_model_init_kwargs = None, + model_adapter_name = None, + ref_adapter_name = None, + force_use_ref_model = False, + disable_dropout = True, + use_logits_to_keep = False, + dataset_num_proc = None, + padding_value = None, + label_pad_token_id = -100, + max_prompt_length = 512, + max_completion_length = None, + max_length = 1024, + truncation_mode = 'keep_end', + padding_free = False, + precompute_ref_log_probs = False, + precompute_ref_batch_size = None, + tools = None, + use_liger_loss = False, + base_model_attribute_name = 'model', + beta = 0.1, + f_alpha_divergence_coef = 1.0, + reference_free = False, + label_smoothing = 0.0, + use_weighting = False, + rpo_alpha = None, + ld_alpha = None, + discopop_tau = 0.05, + loss_weights = None, + sync_ref_model = False, + ref_model_mixup_alpha = 0.6, + ref_model_sync_steps = 512, + generate_during_eval = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + model_init_kwargs = model_init_kwargs, + ref_model_init_kwargs = ref_model_init_kwargs, + model_adapter_name = model_adapter_name, + ref_adapter_name = ref_adapter_name, + force_use_ref_model = force_use_ref_model, + disable_dropout = disable_dropout, + use_logits_to_keep = use_logits_to_keep, + dataset_num_proc = dataset_num_proc, + padding_value = padding_value, + label_pad_token_id = label_pad_token_id, + max_prompt_length = max_prompt_length, + max_completion_length = max_completion_length, + max_length = max_length, + truncation_mode = truncation_mode, + padding_free = padding_free, + precompute_ref_log_probs = precompute_ref_log_probs, + precompute_ref_batch_size = precompute_ref_batch_size, + tools = tools, + use_liger_loss = use_liger_loss, + base_model_attribute_name = base_model_attribute_name, + beta = beta, + f_alpha_divergence_coef = f_alpha_divergence_coef, + reference_free = reference_free, + label_smoothing = label_smoothing, + use_weighting = use_weighting, + rpo_alpha = rpo_alpha, + ld_alpha = ld_alpha, + discopop_tau = discopop_tau, + loss_weights = loss_weights, + sync_ref_model = sync_ref_model, + ref_model_mixup_alpha = ref_model_mixup_alpha, + ref_model_sync_steps = ref_model_sync_steps, + generate_during_eval = generate_during_eval,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothDPOTrainer(Trainer): + """ + Trainer for Direct Preference Optimization (DPO) method. + + This class is a wrapper around the [`transformers.Trainer`] class and inherits all of its attributes and methods. + + Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + args ([`DPOConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + data_collator (`DataCollator`, *optional*): + Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`. + Will default to [`DataCollatorForPreference`]. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. DPO supports [preference](#preference) type and. The format of the samples can + be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If `None`, the processing class is loaded from the model's name + with [`~transformers.AutoTokenizer.from_pretrained`]. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function that will be used to compute metrics at evaluation. Must take a [`EvalPrediction`] and return + a dictionary string to metric values. *Note* When passing TrainingArgs with `batch_eval_metrics` set to + `True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered + after the last eval batch to signal that the function needs to calculate and return the global summary + statistics rather than accumulating the batch-level statistics. + callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your + model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. + optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`): + A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in + `args`. Incompatible with the `optimizers` argument. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`): + A function that preprocess the logits right before caching them at each evaluation step. Must take two + tensors, the logits and the labels, and return the logits once processed as desired. The modifications made + by this function will be reflected in the predictions received by `compute_metrics`. + + Note that the labels (second parameter) will be `None` if the dataset does not have them. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + """ + + _tag_names = ["trl", "dpo"] + + def __init__( + self, + model: Union[str, nn.Module, PreTrainedModel], + ref_model: Optional[Union[PreTrainedModel, nn.Module, str]] = None, + args: Optional[DPOConfig] = None, + data_collator: Optional[DataCollator] = None, # type: ignore + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), + optimizer_cls_and_kwargs: Optional[tuple[type[torch.optim.Optimizer], dict[str, Any]]] = None, + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional["PeftConfig"] = None, + ): + # Args + model_id = model if isinstance(model, str) else model.config._name_or_path + if args is None: + model_name = model_id.split("/")[-1] + args = DPOConfig(f"{model_name}-DPO") + + # Handle the tokenizer + if processing_class is None: + processing_class = AutoTokenizer.from_pretrained(model_id) + + if args.padding_value is not None: + self.padding_value = args.padding_value + else: + if hasattr(processing_class, "pad_token_id") and processing_class.pad_token_id is not None: + self.padding_value = processing_class.pad_token_id + elif hasattr(processing_class, "tokenizer") and processing_class.tokenizer.pad_token_id is not None: + self.padding_value = processing_class.tokenizer.pad_token_id + else: + raise ValueError( + "`padding_value` is not specified in `DPOConfig`, and `pad_token_id` is missing in the " + "`processing_class`. Please either set the `padding_value` argument in `DPOConfig`, or set " + "`tokenizer.pad_token` (e.g., `tokenizer.pad_token = tokenizer.eos_token`) before instantiating " + "the trainer." + ) + + # Model + if not isinstance(model, str) and ref_model is model: + raise ValueError( + "`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the " + "same as `model`, you must mass a copy of it, or `None` if you use peft." + ) + + if args.model_init_kwargs is not None and not isinstance(model, str): + logger.warning( + "You passed model_init_kwargs to the `DPOConfig`, but your model is already instantiated. " + "The `model_init_kwargs` will be ignored." + ) + if isinstance(model, str): + model = self._create_model_from_path(model, args) + + if args.ref_model_init_kwargs is not None and not isinstance(ref_model, str): + logger.warning( + "You passed ref_model_init_kwargs to the `DPOConfig`, but your ref_model is already instantiated. " + "The `ref_model_init_kwargs` will be ignored." + ) + if isinstance(ref_model, str): + ref_model = self._create_model_from_path(ref_model, args, is_ref=True) + + # PEFT configuration and model wrapping + model = self._prepare_peft_model(model, ref_model, peft_config, args) + + if args.generate_during_eval and not (is_wandb_available() or is_comet_available() or is_mlflow_available()): + raise ValueError( + "`generate_during_eval=True` requires Weights and Biases, MLFlow or Comet to be installed." + " Please install `wandb`, `mlflow` or `comet-ml` to resolve." + ) + + self.is_encoder_decoder = model.config.is_encoder_decoder + self.is_vision_model = model.config.model_type in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.keys() + self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) + self.model_adapter_name = args.model_adapter_name + self.ref_adapter_name = args.ref_adapter_name + self.reference_free = args.reference_free + + if ref_model: + self.ref_model = ref_model + elif self.is_peft_model or args.precompute_ref_log_probs: + # The `model` with adapters turned off will be used as the reference model + self.ref_model = None + else: + self.ref_model = create_reference_model(model) + + # Disable dropout in the model and reference model + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) + + # Liger kernel + if args.use_liger_loss: + if not is_liger_kernel_available(): + raise ImportError( + "You set `use_liger_loss=True` but the liger kernel is not available. " + "Please install liger-kernel first: `pip install liger-kernel`" + ) + if args.loss_type not in ["sigmoid", "apo_zero", "apo_down", "sppo_hard", "nca_pair"]: + raise ValueError( + "You set `use_liger_loss=True` but the loss type is not from `[sigmoid, apo_zero, apo_down, sppo_hard, nca_pair`. " + "Please set `loss_type='[sigmoid | apo_zero | apo_down | sppo_hard | nca_pair]'` to use the liger kernel." + ) + self.dpo_loss_fn = LigerFusedLinearDPOLoss( + ignore_index=args.label_pad_token_id, + beta=args.beta, + use_ref_model=not args.reference_free, + average_log_prob=False, + loss_type=args.loss_type, + ) + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in DPO, the sampled data does not include the + # "input_ids" key. Instead, the available keys are "prompt_input_ids", "chosen_input_ids", and + # "rejected_input_ids". As a result, the trainer issues the warning: "Could not estimate the number of tokens + # of the input, floating-point operations will not be computed." To suppress this warning, we set the + # "estimate_tokens" key in the model's "warnings_issued" dictionary to True. This acts as a flag to indicate + # that the warning has already been issued. + model.warnings_issued["estimate_tokens"] = True + + # Data collator + if data_collator is None: + data_collator = DataCollatorForPreference(pad_token_id=self.padding_value) + + self.generate_during_eval = args.generate_during_eval + self.label_pad_token_id = args.label_pad_token_id + self.max_prompt_length = args.max_prompt_length + self.max_completion_length = args.max_completion_length + self.max_length = args.max_length + self.truncation_mode = args.truncation_mode + self.precompute_ref_log_probs = args.precompute_ref_log_probs + self.use_logits_to_keep = args.use_logits_to_keep + + if args.padding_free: + if model.config._attn_implementation != "flash_attention_2": + logger.warning( + "Padding-free training is enabled, but the attention implementation is not set to " + "'flash_attention_2'. Padding-free training flattens batches into a single sequence, and " + "'flash_attention_2' is the only known attention mechanism that reliably supports this. Using " + "other implementations may lead to unexpected behavior. To ensure compatibility, set " + "`attn_implementation='flash_attention_2'` in the model configuration, or verify that your " + "attention mechanism can handle flattened sequences." + ) + if args.per_device_train_batch_size == 1: + logger.warning( + "You are using a per_device_train_batch_size of 1 with padding-free training. Using a batch size " + "of 1 anihilate the benefits of padding-free training. Please consider increasing the batch size " + "to at least 2." + ) + self.padding_free = args.padding_free + + # Since ref_logs are precomputed on the first call to get_train/eval_dataloader + # keep track of first called to avoid computation of future calls + self._precomputed_train_ref_log_probs = False + self._precomputed_eval_ref_log_probs = False + + self.beta = args.beta + self.label_smoothing = args.label_smoothing + self.loss_type = args.loss_type if isinstance(args.loss_type, list) else [args.loss_type] + self.loss_weights = args.loss_weights + self.aux_loss_enabled = getattr(model.config, "output_router_logits", False) + self.use_weighting = args.use_weighting + self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0) + if self.aux_loss_enabled and self.aux_loss_coef == 0.0: + logger.warning( + "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to " + "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value " + "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary " + "loss.", + ) + for loss_type in self.loss_type: + if ( + loss_type in ["hinge", "ipo", "bco_pair", "sppo_hard", "nca_pair", "apo_zero", "apo_down"] + and args.label_smoothing > 0 + ): + logger.warning( + f"You are using the {loss_type} loss type that does not support label smoothing. The " + "`label_smoothing` parameter will be ignored. Set `label_smoothing` to `0.0` to remove this " + "warning.", + ) + if loss_type == "kto_pair": + raise ValueError("Support for kto_pair has been removed in DPOTrainer. Please use KTOTrainer.") + + self._stored_metrics = defaultdict(lambda: defaultdict(list)) + self.f_divergence_type = args.f_divergence_type + self.f_divergence_params = {FDivergenceConstants.ALPHA_DIVERGENCE_COEF_KEY: args.f_alpha_divergence_coef} + self.dataset_num_proc = args.dataset_num_proc + + # Dataset preparation + train_dataset = self._prepare_dataset(train_dataset, processing_class, args, "train") + if eval_dataset is not None: + if isinstance(eval_dataset, dict): + eval_dataset = { + key: self._prepare_dataset(dataset, processing_class, args, key) + for key, dataset in eval_dataset.items() + } + else: + eval_dataset = self._prepare_dataset(eval_dataset, processing_class, args, "eval") + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + optimizer_cls_and_kwargs=optimizer_cls_and_kwargs, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) + + # Deepspeed Zero-3 does not support precompute_ref_log_probs + if self.is_deepspeed_enabled: + if self.accelerator.state.deepspeed_plugin.zero_stage == 3 and self.precompute_ref_log_probs: + raise ValueError( + "You cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`." + ) + + if self.ref_model is None: + if not (self.is_peft_model or self.precompute_ref_log_probs): + raise ValueError( + "No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`" + ) + if args.sync_ref_model: + raise ValueError( + "You currently cannot use `ref_model=None` with TR-DPO method. Please provide `ref_model`." + ) + else: + if self.is_deepspeed_enabled: + self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator) + elif self.is_fsdp_enabled: + self.ref_model = prepare_fsdp(self.ref_model, self.accelerator) + else: + self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + + if args.sync_ref_model: + if self.precompute_ref_log_probs: + raise ValueError( + "You cannot use `precompute_ref_log_probs=True` with TR-DPO method. Please set `precompute_ref_log_probs=False`." + ) + + self.add_callback(SyncRefModelCallback(ref_model=self.ref_model, accelerator=self.accelerator)) + + if "bco_pair" in self.loss_type: + self.running = RunningMoments(self.accelerator) + + def _create_model_from_path(self, model_path: str, args: DPOConfig, is_ref: bool = False) -> PreTrainedModel: + """Creates a model from a path or model identifier.""" + if not is_ref: + model_init_kwargs = args.model_init_kwargs or {} + else: + model_init_kwargs = args.ref_model_init_kwargs or {} + + # Handle torch dtype + dtype = model_init_kwargs.get("dtype") + if isinstance(dtype, torch.dtype) or dtype == "auto" or dtype is None: + pass # dtype is already a torch.dtype or "auto" or None + elif isinstance(dtype, str): # it's a str, but not "auto" + dtype = getattr(torch, dtype) + model_init_kwargs["dtype"] = dtype + else: + raise ValueError( + "Invalid `dtype` passed to `DPOConfig`. Expected either 'auto' or a string representing " + f"a `torch.dtype` (e.g., 'float32'), but got {dtype}." + ) + + # Create model + model = AutoModelForCausalLM.from_pretrained(model_path, **model_init_kwargs) + return model + + def _prepare_peft_model( + self, model: PreTrainedModel, ref_model: PreTrainedModel, peft_config: Any, args: DPOConfig + ) -> PreTrainedModel: + """Prepares a model for PEFT training.""" + # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` + # has been called in order to properly call autocast if needed. + self._peft_has_been_casted_to_bf16 = False + + if not is_peft_available() and peft_config is not None: + raise ValueError( + "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT models" + ) + elif is_peft_available() and peft_config is not None: + # if model is a peft model and we have a peft_config, we merge and unload it first + if isinstance(model, PeftModel): + model = model.merge_and_unload() + + if ref_model is not None and not args.force_use_ref_model: + raise ValueError( + "You passed both a ref_model and a peft_config. For training PEFT adapters with DPO there is no need to pass a reference" + " model. Please pass `ref_model=None` in case you want to train PEFT adapters, or pass a ref_model with `force_use_ref_model=True` in DPOTrainer's init." + " if you want to use a different ref_model." + ) + + if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): + _support_gc_kwargs = hasattr( + args, "gradient_checkpointing_kwargs" + ) and "gradient_checkpointing_kwargs" in list( + inspect.signature(prepare_model_for_kbit_training).parameters + ) + + prepare_model_kwargs = {"use_gradient_checkpointing": args.gradient_checkpointing} + + if _support_gc_kwargs: + prepare_model_kwargs["gradient_checkpointing_kwargs"] = args.gradient_checkpointing_kwargs + + model = prepare_model_for_kbit_training(model, **prepare_model_kwargs) + + else: + model = self._prepare_gradient_checkpointing(model, args) + + # get peft model with the given config + model = get_peft_model(model, peft_config) + if args.bf16 and getattr(model, "is_loaded_in_4bit", False): + peft_module_casting_to_bf16(model) + # If args.bf16 we need to explicitly call `generate` with torch amp autocast context manager + self._peft_has_been_casted_to_bf16 = True + + else: + model = self._prepare_gradient_checkpointing(model, args) + + return model + + def _prepare_gradient_checkpointing(self, model: PreTrainedModel, args: DPOConfig): + """Prepare the gradienting checkpointing for the model.""" + # For models that use gradient_checkpointing, we need to attach a hook that enables input + # to explicitly have `requires_grad=True`, otherwise training will either silently + # fail or completely fail. + if args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + return model + + def _prepare_dataset( + self, + dataset: Union[Dataset, IterableDataset], + processing_class: Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin], + args: DPOConfig, + dataset_name: str, + ) -> Union[Dataset, IterableDataset]: + # Build the kwargs for the `map` function + map_kwargs = {} + if isinstance(dataset, Dataset): # IterableDataset does not support num_proc nor writer_batch_size + map_kwargs["num_proc"] = args.dataset_num_proc + map_kwargs["writer_batch_size"] = 10 + + with PartialState().main_process_first(): + # Extract prompt if needed + if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc` + map_kwargs["desc"] = f"Extracting prompt in {dataset_name} dataset" + dataset = dataset.map(maybe_extract_prompt, **map_kwargs) + + # Apply the chat template if needed + if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc` + map_kwargs["desc"] = f"Applying chat template to {dataset_name} dataset" + dataset = dataset.map( + maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class, "tools": args.tools}, **map_kwargs + ) + + # Tokenize the dataset + if isinstance(dataset, Dataset): # `IterableDataset.map` does not support `desc` + map_kwargs["desc"] = f"Tokenizing {dataset_name} dataset" + + dataset = dataset.map( + self.tokenize_row if not self.is_vision_model else self.process_row, + remove_columns=["chosen", "rejected"], + fn_kwargs={ + "processing_class": processing_class, + "max_prompt_length": args.max_prompt_length, + "max_completion_length": args.max_completion_length, + # for enc-dec, we add the special tokens ([bos_token] + prompt + [eos_token]; completion + [eos_token]) + "add_special_tokens": False, + }, + **map_kwargs, + ) + + return dataset + + @staticmethod + def tokenize_row( + features: dict[str, str], + processing_class: PreTrainedTokenizerBase, + max_prompt_length: Optional[int] = None, + max_completion_length: Optional[int] = None, + add_special_tokens: bool = True, + ) -> dict[str, list[int]]: + """ + Tokenize a row of the dataset. + + Args: + features (`dict[str, str]`): + Row of the dataset, should contain the keys `"prompt"`, `"chosen"`, and `"rejected"`. + processing_class (`PreTrainedTokenizerBase`): + Processing class used to process the data. + max_prompt_length (`int` or `None`): + Maximum length of the prompt sequence. If `None`, the prompt sequence is not truncated. + max_completion_length (`int` or `None`): + Maximum length of the completion sequences. If `None`, the completion sequences are not truncated. + add_special_tokens (`bool`): + Whether to add special tokens to the sequences. Typically used for encoder-decoder models. If `True`, + the prompt sequence will have a bos token prepended and an eos token appended. In any case, the + completion sequences will have an eos token appended. + + Returns: + `dict[str, list[int]]`: + Tokenized sequences with the keys `"prompt_input_ids"`, `"chosen_input_ids"`, and + `"rejected_input_ids". + + Example: + ```python + >>> from transformers import GPT2Tokenizer + + >>> tokenizer = GPT2Tokenizer.from_pretrained("gpt2") + >>> features = {"prompt": "The sky is", "chosen": " blue", "rejected": " green"} + >>> DPOTrainer.tokenize_row( + ... features, tokenizer, max_prompt_length=3, max_completion_length=3, add_special_tokens=False + ... ) + {'prompt_input_ids': [464, 6766, 318], 'chosen_input_ids': [4171, 50256], 'rejected_input_ids': [4077, 50256]} + ``` + """ + tokenizer = processing_class # the processing class is a tokenizer + prompt_input_ids = tokenizer(features["prompt"], add_special_tokens=False)["input_ids"] + chosen_input_ids = tokenizer(features["chosen"], add_special_tokens=False)["input_ids"] + rejected_input_ids = tokenizer(features["rejected"], add_special_tokens=False)["input_ids"] + + # Add special tokens (typically for encoder-decoder models) + if add_special_tokens: + if tokenizer.bos_token_id is not None: + prompt_input_ids = [tokenizer.bos_token_id] + prompt_input_ids + if tokenizer.eos_token_id is not None: + prompt_input_ids = prompt_input_ids + [tokenizer.eos_token_id] + chosen_input_ids = chosen_input_ids + [tokenizer.eos_token_id] + rejected_input_ids = rejected_input_ids + [tokenizer.eos_token_id] + + # Truncate prompt and completion sequences + if max_prompt_length is not None: + prompt_input_ids = prompt_input_ids[-max_prompt_length:] + if max_completion_length is not None: + chosen_input_ids = chosen_input_ids[:max_completion_length] + rejected_input_ids = rejected_input_ids[:max_completion_length] + + return { + "prompt_input_ids": prompt_input_ids, + "chosen_input_ids": chosen_input_ids, + "rejected_input_ids": rejected_input_ids, + } + + @staticmethod + def process_row( + features: dict[str, str], + processing_class: PreTrainedTokenizerBase, + max_prompt_length: Optional[int] = None, + max_completion_length: Optional[int] = None, + add_special_tokens: bool = True, + ) -> dict[str, list[int]]: + """ + Same as `tokenize_row` but for vision models. Please refer to `tokenize_row` for more information. + """ + processor, tokenizer = processing_class, processing_class.tokenizer # the processing class is a processor + processed_features = processor(images=features["images"], text=features["prompt"], add_special_tokens=False) + + prompt_input_ids = processed_features["input_ids"][0] + pixel_values = processed_features["pixel_values"][0] + chosen_input_ids = tokenizer(features["chosen"], add_special_tokens=False)["input_ids"] + rejected_input_ids = tokenizer(features["rejected"], add_special_tokens=False)["input_ids"] + + # Add special tokens (typically for encoder-decoder models) + if add_special_tokens: + if tokenizer.bos_token_id is not None: + prompt_input_ids = [tokenizer.bos_token_id] + prompt_input_ids + if tokenizer.eos_token_id is not None: + prompt_input_ids = prompt_input_ids + [tokenizer.eos_token_id] + chosen_input_ids = chosen_input_ids + [tokenizer.eos_token_id] + rejected_input_ids = rejected_input_ids + [tokenizer.eos_token_id] + + # Truncate prompt and completion sequences + if max_prompt_length is not None: + prompt_input_ids = prompt_input_ids[-max_prompt_length:] + if max_completion_length is not None: + chosen_input_ids = chosen_input_ids[:max_completion_length] + rejected_input_ids = rejected_input_ids[:max_completion_length] + + output = { + "prompt_input_ids": prompt_input_ids, + "pixel_values": pixel_values, + "chosen_input_ids": chosen_input_ids, + "rejected_input_ids": rejected_input_ids, + } + + if "pixel_attention_mask" in processed_features: + output["pixel_attention_mask"] = processed_features["pixel_attention_mask"][0] + if "image_sizes" in processed_features: + output["image_sizes"] = processed_features["image_sizes"][0] + + return output + + def _set_signature_columns_if_needed(self): + # If `self.args.remove_unused_columns` is True, non-signature columns are removed. + # By default, this method sets `self._signature_columns` to the model's expected inputs. + # In DPOTrainer, we preprocess data, so using the model's signature columns doesn't work. + # Instead, we set them to the columns expected by `DataCollatorForPreference`, hence the override. + if self._signature_columns is None: + self._signature_columns = [ + "prompt_input_ids", + "chosen_input_ids", + "rejected_input_ids", + "image_sizes", + "ref_chosen_logps", + "ref_rejected_logps", + ] + + def get_train_dataloader(self) -> DataLoader: + """ + Returns the training [`~torch.utils.data.DataLoader`]. + + Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`. + """ + + if self.precompute_ref_log_probs and not self._precomputed_train_ref_log_probs: + batch_size = self.args.precompute_ref_batch_size or self.args.per_device_train_batch_size + dataloader_params = { + "batch_size": batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(self.train_dataset, **dataloader_params)) + + ref_chosen_logps = [] + ref_rejected_logps = [] + for padded_batch in tqdm(iterable=data_loader, desc="Train dataset reference log probs"): + ref_chosen_logp, ref_rejected_logp = self.compute_ref_log_probs(padded_batch) + ref_chosen_logp, ref_rejected_logp = self.accelerator.gather_for_metrics( + (ref_chosen_logp, ref_rejected_logp) + ) + ref_chosen_logps.append(ref_chosen_logp.cpu()) + ref_rejected_logps.append(ref_rejected_logp.cpu()) + + # Unnecessary cache clearing to avoid OOM + empty_cache() + self.accelerator.free_memory() + + all_ref_chosen_logps = torch.cat(ref_chosen_logps).float().numpy() + all_ref_rejected_logps = torch.cat(ref_rejected_logps).float().numpy() + + self.train_dataset = self.train_dataset.add_column(name="ref_chosen_logps", column=all_ref_chosen_logps) + self.train_dataset = self.train_dataset.add_column( + name="ref_rejected_logps", column=all_ref_rejected_logps + ) + + self._precomputed_train_ref_log_probs = True + + return super().get_train_dataloader() + + def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoader: + """ + Returns the evaluation [`~torch.utils.data.DataLoader`]. + + Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`. + + Args: + eval_dataset (`torch.utils.data.Dataset`, *optional*): + If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted + by the `model.forward()` method are automatically removed. It must implement `__len__`. + """ + if eval_dataset is None and self.eval_dataset is None: + raise ValueError("Trainer: evaluation requires an eval_dataset.") + eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset + + if self.precompute_ref_log_probs and not self._precomputed_eval_ref_log_probs: + batch_size = self.args.precompute_ref_batch_size or self.args.per_device_eval_batch_size + dataloader_params = { + "batch_size": batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(eval_dataset, **dataloader_params)) + + ref_chosen_logps = [] + ref_rejected_logps = [] + for padded_batch in tqdm(iterable=data_loader, desc="Eval dataset reference log probs"): + ref_chosen_logp, ref_rejected_logp = self.compute_ref_log_probs(padded_batch) + ref_chosen_logp, ref_rejected_logp = self.accelerator.gather_for_metrics( + (ref_chosen_logp, ref_rejected_logp) + ) + ref_chosen_logps.append(ref_chosen_logp.cpu()) + ref_rejected_logps.append(ref_rejected_logp.cpu()) + + all_ref_chosen_logps = torch.cat(ref_chosen_logps).float().numpy() + all_ref_rejected_logps = torch.cat(ref_rejected_logps).float().numpy() + + eval_dataset = eval_dataset.add_column(name="ref_chosen_logps", column=all_ref_chosen_logps) + eval_dataset = eval_dataset.add_column(name="ref_rejected_logps", column=all_ref_rejected_logps) + + # Save calculated ref_chosen_logps and ref_rejected_logps to the eval_dataset for subsequent runs + if self.eval_dataset is not None: + self.eval_dataset = eval_dataset + self._precomputed_eval_ref_log_probs = True + + return super().get_eval_dataloader(eval_dataset=eval_dataset) + + @contextmanager + def null_ref_context(self): + """Context manager for handling null reference model (that is, peft adapter manipulation).""" + with ( + self.accelerator.unwrap_model(self.model).disable_adapter() + if self.is_peft_model and not self.ref_adapter_name + else nullcontext() + ): + if self.ref_adapter_name: + self.model.set_adapter(self.ref_adapter_name) + yield + if self.ref_adapter_name: + self.model.set_adapter(self.model_adapter_name or "default") + + def compute_ref_log_probs(self, batch: dict[str, torch.LongTensor]) -> tuple[torch.Tensor, torch.Tensor]: + """Computes log probabilities of the reference model for a single padded batch of a DPO specific dataset.""" + compte_ref_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + with torch.no_grad(), compte_ref_context_manager: + if self.ref_model is None: + with self.null_ref_context(): + ref_model_output = self.concatenated_forward(self.model, batch, is_ref_model=True) + else: + ref_model_output = self.concatenated_forward(self.ref_model, batch, is_ref_model=True) + return ref_model_output["chosen_logps"], ref_model_output["rejected_logps"] + + @staticmethod + def concatenated_inputs( + batch: dict[str, Union[list, torch.LongTensor]], padding_value: int + ) -> dict[str, torch.LongTensor]: + """ + Concatenate the `chosen` and `rejected` inputs from the batch into a single tensor for both the prompt and + completion sequences. + + Args: + batch (`dict[str, Union[list, torch.LongTensor]]`): + A batch of input data. The batch must contain the following keys: + + - `"prompt_input_ids"`: Tensor of shape `(batch_size, prompt_length)` representing the prompt input + IDs. + - `"chosen_input_ids"`: Tensor of shape `(batch_size, chosen_length)` representing the chosen + completion input IDs. + - `"rejected_input_ids"`: Tensor of shape `(batch_size, rejected_length)` representing the rejected + completion input IDs. + - `"prompt_pixel_values"` (optional): Tensor for pixel values, if available. + - `"prompt_pixel_attention_mask"` (optional): Tensor for pixel attention masks, if available. + + padding_value (`int`): + The padding value to use for the concatenated completion sequences (`chosen_input_ids` and + `rejected_input_ids`). + + Returns: + `dict[str, torch.LongTensor]`: A dictionary containing: + + - `"prompt_input_ids"`: Concatenated prompt input IDs of shape `(2 * batch_size, prompt_length)`. + - `"completion_input_ids"`: Concatenated chosen and rejected completion input IDs of shape `(2 * + batch_size, max_completion_length)`. + - `"prompt_attention_mask"`: Concatenated prompt attention masks of shape `(2 * batch_size, + prompt_length)`. + - `"completion_attention_mask"`: Concatenated chosen and rejected attention masks of shape `(2 * + batch_size, max_completion_length)`. + - `"pixel_values"` (optional): Concatenated pixel values if `"prompt_pixel_values"` are present. + - `"pixel_attention_mask"` (optional): Concatenated pixel attention masks if + `"prompt_pixel_attention_mask"` are present. + + Notes: + The completion input IDs and attention masks are padded to the maximum completion length of the chosen or + rejected sequences. + """ + output = {} + + # For the prompt, the input_ids are the same for both the chosen and rejected responses + output["prompt_input_ids"] = torch.cat([batch["prompt_input_ids"], batch["prompt_input_ids"]], dim=0) + output["prompt_attention_mask"] = torch.cat( + [batch["prompt_attention_mask"], batch["prompt_attention_mask"]], dim=0 + ) + if "pixel_values" in batch: + output["pixel_values"] = torch.cat([batch["pixel_values"], batch["pixel_values"]], dim=0) + + if "pixel_attention_mask" in batch: + output["pixel_attention_mask"] = torch.cat( + [batch["pixel_attention_mask"], batch["pixel_attention_mask"]], dim=0 + ) + if "image_sizes" in batch: + output["image_sizes"] = torch.cat([batch["image_sizes"], batch["image_sizes"]], dim=0) + + # Concatenate the chosen and rejected completions + max_completion_length = max(batch["chosen_input_ids"].shape[1], batch["rejected_input_ids"].shape[1]) + output["completion_input_ids"] = torch.cat( + ( + pad_to_length(batch["chosen_input_ids"], max_completion_length, pad_value=padding_value), + pad_to_length(batch["rejected_input_ids"], max_completion_length, pad_value=padding_value), + ), + ) + output["completion_attention_mask"] = torch.cat( + ( + pad_to_length(batch["chosen_attention_mask"], max_completion_length, pad_value=0), + pad_to_length(batch["rejected_attention_mask"], max_completion_length, pad_value=0), + ), + ) + + return output + + def dpo_loss( + self, + chosen_logps: torch.FloatTensor, + rejected_logps: torch.FloatTensor, + ref_chosen_logps: torch.FloatTensor, + ref_rejected_logps: torch.FloatTensor, + loss_type: str = "sigmoid", + model_output: dict[str, torch.FloatTensor] = None, + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """ + Compute the DPO loss for a batch of policy and reference model log probabilities. + + Args: + chosen_logps (`torch.FloatTensor`): + Log probabilities of the model for the chosen responses. Shape: `(batch_size,)`. + rejected_logps (`torch.FloatTensor`): + Log probabilities of the model for the rejected responses. Shape: `(batch_size,)`. + ref_chosen_logps (`torch.FloatTensor`): + Log probabilities of the reference model for the chosen responses. Shape: `(batch_size,)`. + ref_rejected_logps (`torch.FloatTensor`): + Log probabilities of the reference model for the rejected responses. Shape: `(batch_size,)`. + loss_type (`str`, defaults to `"sigmoid"`): + The type of loss to compute. One of: + - `"sigmoid"`: Sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper. + - `"hinge"`: Hinge loss on the normalized likelihood from the + [SLiC](https://huggingface.co/papers/2305.10425) paper. + - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper. + - `"exo_pair"`: Pairwise EXO loss from the [EXO](https://huggingface.co/papers/2402.00856) paper. + - `"nca_pair"`: Pairwise NCA loss from the [NCA](https://huggingface.co/papers/2402.05369) paper. + - `"robust"`: Unbiased estimate of the DPO loss that is robust to preference noise from the [Robust + DPO](https://huggingface.co/papers/2403.00409) paper. + - `"bco_pair"`: Pairwise BCO loss from the [BCO](https://huggingface.co/papers/2404.04656) paper. + - `"sppo_hard"`: SPPO loss with hard label from the [SPPO](https://huggingface.co/papers/2405.00675) + paper. + - `"aot"`: AOT loss for paired datasets from the [AOT](https://huggingface.co/papers/2406.05882) paper. + - `"aot_pair"`: AOT loss for unpaired datasets from the [AOT](https://huggingface.co/papers/2406.05882) + paper. + - `"discopop"`: DiscoPOP (a.k.a Log-Ratio Modulated Loss, LRML) loss from the + [DiscoPOP](https://huggingface.co/papers/2406.08414) paper. + - `"apo_zero"`: APO-zero loss from the [APO](https://huggingface.co/papers/2408.06266) paper. + - `"apo_down"`: APO-down loss from the [APO](https://huggingface.co/papers/2408.06266) paper. + - `"sft"`: Negative log-likelihood loss (standard supervised fine-tuning loss). + model_output (`dict[str, torch.FloatTensor]`, *optional*): + The output of the model's forward pass. This is used to compute auxiliary losses if enabled. + + Returns: + A tuple of three tensors: `(losses, chosen_rewards, rejected_rewards)`. The losses tensor contains the DPO + loss for each example in the batch. The `chosen_rewards` and `rejected_rewards` tensors contain the rewards + for the chosen and rejected responses, respectively. + """ + device = self.accelerator.device + + # Get the log ratios for the chosen and rejected responses + chosen_logratios = chosen_logps.to(device) - (not self.reference_free) * ref_chosen_logps.to(device) + rejected_logratios = rejected_logps.to(device) - (not self.reference_free) * ref_rejected_logps.to(device) + + if self.f_divergence_type == FDivergenceType.ALPHA_DIVERGENCE.value: + # The alpha-divergence formula: (1 - u^-alpha) / alpha + # The divergence difference between the chosen and rejected sample is: + # (1 - u[w]^-alpha) / alpha - (1 - u[l]^-alpha) / alpha + # = (u[l]^-alpha - u[w]^-alpha) / alpha + # where u[w] and u[l] are the policy/reference probability ratios + # for the chosen and rejected samples, respectively. + alpha_coef = FDivergenceConstants.ALPHA_DIVERGENCE_COEF_DEFAULT + if self.f_divergence_params and FDivergenceConstants.ALPHA_DIVERGENCE_COEF_KEY in self.f_divergence_params: + alpha_coef = float(self.f_divergence_params[FDivergenceConstants.ALPHA_DIVERGENCE_COEF_KEY]) + logits = (cap_exp(rejected_logratios * -alpha_coef) - cap_exp(chosen_logratios * -alpha_coef)) / alpha_coef + else: + logratios = chosen_logps - rejected_logps + if self.reference_free: + ref_logratios = torch.tensor([0], dtype=logratios.dtype, device=logratios.device) + else: + ref_logratios = ref_chosen_logps - ref_rejected_logps + + logratios = logratios.to(self.accelerator.device) + ref_logratios = ref_logratios.to(self.accelerator.device) + logits = logratios - ref_logratios + + if self.f_divergence_type == FDivergenceType.JS_DIVERGENCE.value: + # The js-divergence formula: log(2 * u / (1 + u)) + # The divergence difference between the chosen and rejected sample is: + # log(2 * u[w] / (1 + u[w])) - log(2 * u[l] / (1 + u[l])) + # = log(u[w]) - log(u[l]) - (log(1 + u[w]) - log(1 + u[l])) + # where u[w] and u[l] are the policy/reference probability ratios + # for the chosen and rejected samples, respectively. + logits -= F.softplus(chosen_logratios) - F.softplus(rejected_logratios) + + # The beta is a temperature parameter for the DPO loss, typically something in the range of 0.1 to 0.5. + # We ignore the reference model as beta -> 0. The label_smoothing parameter encodes our uncertainty about the + # labels and calculates a conservative DPO loss. + if loss_type == "sigmoid": + losses = ( + -F.logsigmoid(self.beta * logits) * (1 - self.label_smoothing) + - F.logsigmoid(-self.beta * logits) * self.label_smoothing + ) + + elif loss_type == "robust": + losses = ( + -F.logsigmoid(self.beta * logits) * (1 - self.label_smoothing) + + F.logsigmoid(-self.beta * logits) * self.label_smoothing + ) / (1 - 2 * self.label_smoothing) + + elif loss_type == "exo_pair": + # eqn (16) of the EXO paper: https://huggingface.co/papers/2402.00856 + import math + + if self.label_smoothing == 0: + self.label_smoothing = 1e-3 + losses = (self.beta * logits).sigmoid() * ( + F.logsigmoid(self.beta * logits) - math.log(1 - self.label_smoothing) + ) + (-self.beta * logits).sigmoid() * (F.logsigmoid(-self.beta * logits) - math.log(self.label_smoothing)) + + elif loss_type == "hinge": + losses = torch.relu(1 - self.beta * logits) + + elif loss_type == "ipo": + # eqn (17) of the paper where beta is the regularization parameter for the IPO loss, denoted by tau in the paper. + losses = (logits - 1 / (2 * self.beta)) ** 2 + + elif loss_type == "bco_pair": + chosen_logratios = chosen_logps - ref_chosen_logps + rejected_logratios = rejected_logps - ref_rejected_logps + chosen_rewards = self.beta * chosen_logratios + rejected_rewards = self.beta * rejected_logratios + rewards = torch.cat((chosen_rewards, rejected_rewards), 0).mean().detach() + self.running.update(rewards) + delta = self.running.mean + losses = -F.logsigmoid((self.beta * chosen_logratios) - delta) - F.logsigmoid( + -(self.beta * rejected_logratios - delta) + ) + + elif loss_type == "sppo_hard": + # In the paper (https://huggingface.co/papers/2405.00675), SPPO employs a soft probability approach, + # estimated using the PairRM score. The probability calculation is conducted outside of the trainer class. + # The version described here is the hard probability version, where P in Equation (4.7) of Algorithm 1 is + # set to 1 for the winner and 0 for the loser. + a = chosen_logps - ref_chosen_logps + b = rejected_logps - ref_rejected_logps + losses = (a - 0.5 / self.beta) ** 2 + (b + 0.5 / self.beta) ** 2 + + elif loss_type == "nca_pair": + chosen_rewards = (chosen_logps - ref_chosen_logps) * self.beta + rejected_rewards = (rejected_logps - ref_rejected_logps) * self.beta + losses = ( + -F.logsigmoid(chosen_rewards) + - 0.5 * F.logsigmoid(-chosen_rewards) + - 0.5 * F.logsigmoid(-rejected_rewards) + ) + + elif loss_type == "aot_pair": + chosen_logratios = chosen_logps - ref_chosen_logps + rejected_logratios = rejected_logps - ref_rejected_logps + chosen_logratios_sorted, _ = torch.sort(chosen_logratios, dim=0) + rejected_logratios_sorted, _ = torch.sort(rejected_logratios, dim=0) + delta = chosen_logratios_sorted - rejected_logratios_sorted + losses = ( + -F.logsigmoid(self.beta * delta) * (1 - self.label_smoothing) + - F.logsigmoid(-self.beta * delta) * self.label_smoothing + ) + + elif loss_type == "aot": + logratios = chosen_logps - rejected_logps + ref_logratios = ref_chosen_logps - ref_rejected_logps + logratios_sorted, _ = torch.sort(logratios, dim=0) + ref_logratios_sorted, _ = torch.sort(ref_logratios, dim=0) + delta = logratios_sorted - ref_logratios_sorted + losses = ( + -F.logsigmoid(self.beta * delta) * (1 - self.label_smoothing) + - F.logsigmoid(-self.beta * delta) * self.label_smoothing + ) + + elif loss_type == "apo_zero": + # Eqn (7) of the APO paper (https://huggingface.co/papers/2408.06266) + # Use this loss when you believe the chosen outputs are better than your model's default output + losses_chosen = 1 - F.sigmoid(self.beta * chosen_logratios) # Increase chosen likelihood + losses_rejected = F.sigmoid(self.beta * rejected_logratios) # Decrease rejected likelihood + losses = losses_chosen + losses_rejected + + elif loss_type == "apo_down": + # Eqn (8) of the APO paper (https://huggingface.co/papers/2408.06266) + # Use this loss when you believe the chosen outputs are worse than your model's default output. + # Decrease chosen likelihood and decrease rejected likelihood more + losses_chosen = F.sigmoid(self.beta * chosen_logratios) + losses_rejected = 1 - F.sigmoid(self.beta * (chosen_logratios - rejected_logratios)) + losses = losses_chosen + losses_rejected + + elif loss_type == "discopop": + # Eqn (5) of the DiscoPOP paper (https://huggingface.co/papers/2406.08414) + # This loss was discovered with LLM discovery + logratios = chosen_logps - rejected_logps + ref_logratios = ref_chosen_logps - ref_rejected_logps + logits = logratios - ref_logratios + logits = logits * self.beta + # Modulate the mixing coefficient based on the log ratio magnitudes + log_ratio_modulation = torch.sigmoid(logits / self.args.discopop_tau) + logistic_component = -F.logsigmoid(logits) + exp_component = torch.exp(-logits) + # Blend between logistic and exponential component based on log ratio modulation + losses = logistic_component * (1 - log_ratio_modulation) + exp_component * log_ratio_modulation + + elif loss_type == "sft": + # SFT loss is the negative log likelihood loss on chosen responses + # This acts as the generation loss component in MPO + sft_loss = model_output["nll_loss"] + # Create losses tensor with same shape as other losses (per-sample) + batch_size = chosen_logps.shape[0] + losses = sft_loss.expand(batch_size) + # For SFT, we don't have preference rewards, so use zeros + chosen_rewards = torch.zeros_like(chosen_logps) + rejected_rewards = torch.zeros_like(rejected_logps) + + else: + raise ValueError( + f"Unknown loss type: {self.loss_type}. Should be one of ['sigmoid', 'hinge', 'ipo', 'exo_pair', " + "'nca_pair', 'robust', 'bco_pair', 'sppo_hard', 'aot', 'aot_pair', 'discopop', 'apo_zero', " + "'apo_down', 'sft']" + ) + + chosen_rewards = self.beta * (chosen_logps.to(device) - ref_chosen_logps.to(device)).detach() + rejected_rewards = self.beta * (rejected_logps.to(device) - ref_rejected_logps.to(device)).detach() + + return losses, chosen_rewards, rejected_rewards + + def _compute_loss_liger( + self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]] + ) -> dict[str, torch.Tensor]: + unwrapped_model = self.accelerator.unwrap_model(model) + concatenated_batch = self.concatenated_inputs(batch, padding_value=self.padding_value) + + model_kwargs = {} + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + # Add the pixel values and attention masks for vision models + if "pixel_values" in concatenated_batch: + model_kwargs["pixel_values"] = concatenated_batch["pixel_values"] + if "pixel_attention_mask" in concatenated_batch: + model_kwargs["pixel_attention_mask"] = concatenated_batch["pixel_attention_mask"] + if "image_sizes" in concatenated_batch: + model_kwargs["image_sizes"] = concatenated_batch["image_sizes"] + + prompt_attention_mask = concatenated_batch["prompt_attention_mask"] + completion_attention_mask = concatenated_batch["completion_attention_mask"] + + if self.is_encoder_decoder: + # 1. Get encoder outputs + encoder_outputs = unwrapped_model.get_encoder()( + concatenated_batch["prompt_input_ids"], + attention_mask=concatenated_batch["prompt_attention_mask"], + return_dict=True, + ) + # 2. Prepare decoder inputs + decoder_input_ids = shift_tokens_right( + concatenated_batch["completion_input_ids"], + unwrapped_model.config.decoder_start_token_id, + ) + # 3. Get decoder outputs + decoder_outputs = unwrapped_model.get_decoder()( + input_ids=decoder_input_ids, + attention_mask=concatenated_batch["completion_attention_mask"], + encoder_hidden_states=encoder_outputs.last_hidden_state, + encoder_attention_mask=concatenated_batch["prompt_attention_mask"], + use_cache=False, + ) + hidden_states = decoder_outputs.last_hidden_state + + ref_hidden_states = None + if not self.reference_free and self.ref_model is not None: + unwrapped_ref_model = self.accelerator.unwrap_model(self.ref_model) + ref_encoder_outputs = unwrapped_ref_model.get_encoder()( + concatenated_batch["prompt_input_ids"], + attention_mask=concatenated_batch["prompt_attention_mask"], + return_dict=True, + ) + ref_decoder_outputs = unwrapped_ref_model.get_decoder()( + input_ids=decoder_input_ids, + attention_mask=concatenated_batch["completion_attention_mask"], + encoder_hidden_states=ref_encoder_outputs.last_hidden_state, + encoder_attention_mask=concatenated_batch["prompt_attention_mask"], + use_cache=False, + ) + ref_hidden_states = ref_decoder_outputs.last_hidden_state + elif not self.reference_free: + with self.null_ref_context(): + ref_encoder_outputs = unwrapped_model.get_encoder()( + concatenated_batch["prompt_input_ids"], + attention_mask=concatenated_batch["prompt_attention_mask"], + return_dict=True, + ) + ref_decoder_outputs = unwrapped_model.get_decoder()( + input_ids=decoder_input_ids, + attention_mask=concatenated_batch["completion_attention_mask"], + encoder_hidden_states=ref_encoder_outputs.last_hidden_state, + encoder_attention_mask=concatenated_batch["prompt_attention_mask"], + use_cache=False, + ) + ref_hidden_states = ref_decoder_outputs.last_hidden_state + + labels = concatenated_batch["completion_input_ids"] + loss_mask = completion_attention_mask.bool() + else: + # For decoder-only models + input_ids = torch.cat( + (concatenated_batch["prompt_input_ids"], concatenated_batch["completion_input_ids"]), dim=1 + ) + attention_mask = torch.cat( + (concatenated_batch["prompt_attention_mask"], concatenated_batch["completion_attention_mask"]), + dim=1, + ) + # Mask the prompt but not the completion for the loss + loss_mask = torch.cat( + (torch.zeros_like(prompt_attention_mask), completion_attention_mask), + dim=1, + ) + + # Flush and truncate + if self.max_length is not None and self.max_length < attention_mask.size(1): + if self.truncation_mode == "keep_start": + # Flush left to reduce the memory usage + # [[0, 0, x, x, x, x], -> [[x, x, x, x], + # [0, x, x, x, 0, 0]] [x, x, x, 0]] + attention_mask, input_ids, loss_mask = flush_left(attention_mask, input_ids, loss_mask) + attention_mask = attention_mask[:, : self.max_length] + input_ids = input_ids[:, : self.max_length] + loss_mask = loss_mask[:, : self.max_length] + elif self.truncation_mode == "keep_end": + # Flush right before truncating left, then flush left + # [[0, 0, x, x, x, x], -> [[0, 0, x, x], + # [0, x, x, x, 0, 0]] [0, x, x, x]] + attention_mask, input_ids, loss_mask = flush_right(attention_mask, input_ids, loss_mask) + input_ids = input_ids[:, -self.max_length :] + attention_mask = attention_mask[:, -self.max_length :] + loss_mask = loss_mask[:, -self.max_length :] + attention_mask, input_ids, loss_mask = flush_left(attention_mask, input_ids, loss_mask) + else: + raise ValueError( + f"Unknown truncation mode: '{self.truncation_mode}'. Should be one of ['keep_end', " + "'keep_start']." + ) + else: + # Flush left to reduce the memory usage + # [[0, 0, x, x, x, x], -> [[x, x, x, x], + # [0, x, x, x, 0, 0]] [x, x, x, 0]] + attention_mask, input_ids, loss_mask = flush_left(attention_mask, input_ids, loss_mask) + + # Add logits_to_keep optimization + if self.use_logits_to_keep: + first_compute_index = loss_mask.nonzero(as_tuple=True)[1].min() + logits_to_keep = (loss_mask.shape[1] - first_compute_index).item() + 1 + model_kwargs["logits_to_keep"] = logits_to_keep + + model_kwargs["output_hidden_states"] = True + + # Add padding-free training support + if self.padding_free: + input_ids = input_ids[attention_mask.bool()].unsqueeze(0) + loss_mask = loss_mask[attention_mask.bool()].unsqueeze(0) + position_ids = attention_mask.cumsum(1)[attention_mask.bool()].unsqueeze(0) - 1 + model_kwargs["position_ids"] = position_ids + else: + model_kwargs["attention_mask"] = attention_mask + + # Get the base model outputs (before LM head) + if hasattr(unwrapped_model, "get_decoder") and unwrapped_model.get_decoder() is not None: + base_model = unwrapped_model.get_decoder() + else: + base_attr = getattr(unwrapped_model, "base_model_prefix", self.args.base_model_attribute_name) + base_model = getattr(unwrapped_model, base_attr, unwrapped_model) + + outputs = base_model( + input_ids, + use_cache=False, + **model_kwargs, + ) + hidden_states = outputs.last_hidden_state[:, :-1] + + # Get reference hidden states if needed + ref_hidden_states = None + if not self.reference_free and self.ref_model is not None: + unwrapped_ref_model = self.accelerator.unwrap_model(self.ref_model) + if hasattr(unwrapped_ref_model, "get_decoder") and unwrapped_ref_model.get_decoder() is not None: + ref_base_model = unwrapped_ref_model.get_decoder() + else: + ref_attr = getattr(unwrapped_ref_model, "base_model_prefix", self.args.base_model_attribute_name) + ref_base_model = getattr(unwrapped_ref_model, ref_attr, unwrapped_ref_model) + + ref_outputs = ref_base_model( + input_ids, + use_cache=False, + **model_kwargs, + ) + ref_hidden_states = ref_outputs.last_hidden_state[:, :-1] + elif not self.reference_free: + if hasattr(unwrapped_model, "get_decoder") and unwrapped_model.get_decoder() is not None: + ref_base_model = unwrapped_model.get_decoder() + else: + ref_attr = getattr(unwrapped_model, "base_model_prefix", self.args.base_model_attribute_name) + ref_base_model = getattr(unwrapped_model, ref_attr, unwrapped_model) + with self.null_ref_context(): + ref_outputs = ref_base_model( + input_ids, + use_cache=False, + **model_kwargs, + ) + ref_hidden_states = ref_outputs.last_hidden_state[:, :-1] + + masked_input_ids = torch.where(loss_mask != 0, input_ids, self.label_pad_token_id) + labels = masked_input_ids[:, 1:] # Shift right for casual LM + + # Get the LM head + lm_head = unwrapped_model.get_output_embeddings() + + # Get reference model weights if needed + ref_weight = None + ref_bias = None + if not self.reference_free: + if self.ref_model is not None: + unwrapped_ref_model = self.accelerator.unwrap_model(self.ref_model) + ref_lm_head = unwrapped_ref_model.get_output_embeddings() + else: + with self.null_ref_context(): + ref_lm_head = unwrapped_model.get_output_embeddings() + ref_weight = ref_lm_head.weight + ref_bias = ref_lm_head.bias if hasattr(ref_lm_head, "bias") else None + + # Compute loss using Liger kernel + loss_output = self.dpo_loss_fn( + lm_head.weight, + hidden_states, + labels, + bias=lm_head.bias if hasattr(lm_head, "bias") else None, + ref_input=ref_hidden_states if not self.reference_free else None, + ref_weight=ref_weight if not self.reference_free else None, + ref_bias=ref_bias if not self.reference_free else None, + ) + ( + loss, + (chosen_logps, rejected_logps, chosen_logits_mean, rejected_logits_mean, nll_loss, *aux_outputs), + ) = loss_output + + output = { + "loss": loss, + "chosen_logps": chosen_logps, + "rejected_logps": rejected_logps, + "mean_chosen_logits": chosen_logits_mean, + "mean_rejected_logits": rejected_logits_mean, + "nll_loss": nll_loss, + "chosen_rewards": aux_outputs[0], + "rejected_rewards": aux_outputs[1], + } + if self.aux_loss_enabled: + output["aux_loss"] = outputs.aux_loss + + return output + + def concatenated_forward( + self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]], is_ref_model: bool = False + ) -> dict[str, torch.Tensor]: + """ + Runs the given model on the given batch of inputs, concatenating the chosen and rejected inputs together. + + We do this to avoid doing two forward passes, because it's faster for FSDP. + + Args: + model: + Model to run the forward pass on. + batch: + Batch of input data. + is_ref_model: + Whether this method is being called for the reference model. If `True`, length desensitization is not + applied. + """ + num_examples = batch["prompt_input_ids"].shape[0] + + concatenated_batch = self.concatenated_inputs(batch, padding_value=self.padding_value) + + model_kwargs = {"use_cache": False} + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + # Add the pixel values and attention masks for vision models + if "pixel_values" in concatenated_batch: + model_kwargs["pixel_values"] = concatenated_batch["pixel_values"] + if "pixel_attention_mask" in concatenated_batch: + model_kwargs["pixel_attention_mask"] = concatenated_batch["pixel_attention_mask"] + if "image_sizes" in concatenated_batch: + model_kwargs["image_sizes"] = concatenated_batch["image_sizes"] + + prompt_input_ids = concatenated_batch["prompt_input_ids"] + prompt_attention_mask = concatenated_batch["prompt_attention_mask"] + completion_input_ids = concatenated_batch["completion_input_ids"] + completion_attention_mask = concatenated_batch["completion_attention_mask"] + if self.is_encoder_decoder: + labels = completion_input_ids + labels[completion_attention_mask == 0] = self.label_pad_token_id + outputs = model( + input_ids=prompt_input_ids, + attention_mask=prompt_attention_mask, + labels=labels, # we need the labels for the logits to be returned + **model_kwargs, + ) + logits = outputs.logits + loss_mask = completion_attention_mask.bool() + else: + # Concatenate the prompt and completion inputs + input_ids = torch.cat((prompt_input_ids, completion_input_ids), dim=1) + attention_mask = torch.cat((prompt_attention_mask, completion_attention_mask), dim=1) + # Mask the prompt but not the completion for the loss + loss_mask = torch.cat( + (torch.zeros_like(prompt_attention_mask), completion_attention_mask), + dim=1, + ) + + # Flush and truncate + if self.max_length is not None and self.max_length < attention_mask.size(1): + if self.truncation_mode == "keep_start": + # Flush left to reduce the memory usage + # [[0, 0, x, x, x, x], -> [[x, x, x, x], + # [0, x, x, x, 0, 0]] [x, x, x, 0]] + attention_mask, input_ids, loss_mask = flush_left(attention_mask, input_ids, loss_mask) + attention_mask = attention_mask[:, : self.max_length] + input_ids = input_ids[:, : self.max_length] + loss_mask = loss_mask[:, : self.max_length] + elif self.truncation_mode == "keep_end": + # Flush right before truncating left, then flush left + # [[0, 0, x, x, x, x], -> [[0, 0, x, x], + # [0, x, x, x, 0, 0]] [0, x, x, x]] + attention_mask, input_ids, loss_mask = flush_right(attention_mask, input_ids, loss_mask) + input_ids = input_ids[:, -self.max_length :] + attention_mask = attention_mask[:, -self.max_length :] + loss_mask = loss_mask[:, -self.max_length :] + attention_mask, input_ids, loss_mask = flush_left(attention_mask, input_ids, loss_mask) + else: + raise ValueError( + f"Unknown truncation mode: '{self.truncation_mode}'. Should be one of ['keep_end', " + "'keep_start']." + ) + else: + # Flush left to reduce the memory usage + # [[0, 0, x, x, x, x], -> [[x, x, x, x], + # [0, x, x, x, 0, 0]] [x, x, x, 0]] + attention_mask, input_ids, loss_mask = flush_left(attention_mask, input_ids, loss_mask) + + if self.use_logits_to_keep: + # Compute logits_to_keep based on loss_mask pattern: + # [[0, 0, 0, x, x, x, x], + # [0, 0, 0, x, x, x, 0]] + # ^ start computing logits from here ([:, -(7-3+1):]) + first_compute_index = loss_mask.nonzero(as_tuple=True)[1].min() + logits_to_keep = (loss_mask.shape[1] - first_compute_index).item() + 1 # +1 for the first label + model_kwargs["logits_to_keep"] = logits_to_keep + + model_kwargs["output_hidden_states"] = True + + if self.padding_free: + # Flatten the input_ids, position_ids, and loss_mask + # input_ids = [[a, b, c, 0], -> input_ids = [[a, b, c, d, e, f, g]] + # [d, e, f, g]] position_ids = [[0, 1, 2, 0, 1, 2, 3]] + input_ids = input_ids[attention_mask.bool()].unsqueeze(0) + loss_mask = loss_mask[attention_mask.bool()].unsqueeze(0) + position_ids = attention_mask.cumsum(1)[attention_mask.bool()].unsqueeze(0) - 1 + model_kwargs["position_ids"] = position_ids + else: + model_kwargs["attention_mask"] = attention_mask + + outputs = model(input_ids, **model_kwargs) + logits = outputs.logits + + # Offset the logits by one to align with the labels + labels = torch.roll(input_ids, shifts=-1, dims=1) + loss_mask = torch.roll(loss_mask, shifts=-1, dims=1).bool() + + if self.use_logits_to_keep: + # Align labels with logits + # logits: -, -, [x2, x3, x4, x5, x6] + # ^ --------- ^ after logits[:, :-1, :] + # labels: [y0, y1, y2, y3, y4, y5, y6] + # ^ --------- ^ with logits_to_keep=4, [:, -4:] + # loss_mask: [0, 0, 0, 1, 1, 1, 1] + labels = labels[:, -logits_to_keep:] + loss_mask = loss_mask[:, -logits_to_keep:] + + if logits.shape[:2] != labels.shape[:2]: + # for LLaVA, the returned logits include the image tokens (placed before the text tokens) + seq_len = labels.shape[1] + logits = logits[:, -seq_len:] + + # Compute the log probabilities of the labels + labels[~loss_mask] = 0 # dummy token; we'll ignore the losses on these tokens later + per_token_logps = selective_log_softmax(logits, labels) + per_token_logps[~loss_mask] = 0 + per_token_logps = torch.roll(per_token_logps, shifts=1, dims=1) + + if self.padding_free: + # Unflatten the per_token_logps (shape: [1, sum_seq_len] -> [batch_size, seq_len]) + batch_size, seq_len = attention_mask.shape + per_token_logps_ = torch.zeros( + batch_size, seq_len, device=outputs.logits.device, dtype=outputs.logits.dtype + ) + per_token_logps_[attention_mask.bool()] = per_token_logps + per_token_logps = per_token_logps_ + + all_logps = per_token_logps[:, 1:].sum(-1) + + output = {} + + if self.use_weighting: + with torch.no_grad(): + # Eq (2) of the WPO paper: https://huggingface.co/papers/2406.11827 + logprobs = F.log_softmax(logits, dim=-1) + weights_adjustment_factor = torch.logsumexp(2 * logprobs, dim=-1) # same as sum(probs**2) in log space + per_token_logps_adjusted = per_token_logps - weights_adjustment_factor + all_weights = (per_token_logps_adjusted * loss_mask).sum(-1) / loss_mask.sum(-1) + chosen_weights = all_weights[:num_examples] + rejected_weights = all_weights[num_examples:] + output["policy_weights"] = torch.clamp(torch.exp(chosen_weights + rejected_weights), max=1) + + if self.args.rpo_alpha is not None or "sft" in self.loss_type: + # Only use the chosen logits for the RPO loss or SFT loss + chosen_logits = logits[:num_examples, :-1] if not self.is_encoder_decoder else logits[:num_examples] + chosen_labels = labels[:num_examples, :-1] if not self.is_encoder_decoder else labels[:num_examples] + + # Compute the log probabilities of the labels + output["nll_loss"] = F.cross_entropy( + torch.flatten(chosen_logits, end_dim=1), torch.flatten(chosen_labels, end_dim=1), ignore_index=0 + ) + + if "ipo" in self.loss_type: + all_logps = all_logps / loss_mask.sum(-1) + + if self.args.ld_alpha is not None and not is_ref_model: + # Compute response lengths based on loss_mask + completion_lengths = loss_mask.sum(dim=1) + + chosen_lengths = completion_lengths[:num_examples] + rejected_lengths = completion_lengths[num_examples:] + public_lengths = torch.min(chosen_lengths, rejected_lengths) # l_p in the paper + public_lengths = torch.cat([public_lengths, public_lengths], dim=0) + + seq_len = per_token_logps.size(1) + position_ids = torch.arange(seq_len, device=per_token_logps.device).expand_as(per_token_logps) + + ld_mask = position_ids < public_lengths.unsqueeze(1) + mask = position_ids < completion_lengths.unsqueeze(1) + + front_mask = (ld_mask & mask).float() + rear_mask = (~ld_mask & mask).float() + front_logps = (per_token_logps * front_mask).sum(dim=1) + rear_logps = (per_token_logps * rear_mask).sum(dim=1) + + all_logps = front_logps + self.args.ld_alpha * rear_logps + + output["chosen_logps"] = all_logps[:num_examples] + output["rejected_logps"] = all_logps[num_examples:] + + # Compute the mean logits + if self.padding_free: + # position_ids contains a sequence of range identifiers (e.g., [[0, 1, 2, 0, 1, 2, 3, ...]]). + # There are 2*num_examples ranges in total: the first half corresponds to the chosen tokens, + # and the second half to the rejected tokens. + # To find the start of the rejected tokens, we look for the num_examples+1-th zero in pos_id. + split_idx = (position_ids == 0).nonzero(as_tuple=True)[1][num_examples] + mean_chosen_logits = logits[0, :split_idx][loss_mask[0, :split_idx]].mean() + mean_rejected_logits = logits[0, split_idx:][loss_mask[0, split_idx:]].mean() + else: + mean_chosen_logits = logits[:num_examples][loss_mask[:num_examples]].mean() + mean_rejected_logits = logits[num_examples:][loss_mask[num_examples:]].mean() + + output["mean_chosen_logits"] = mean_chosen_logits + output["mean_rejected_logits"] = mean_rejected_logits + + if self.aux_loss_enabled: + output["aux_loss"] = outputs.aux_loss + + return output + + def get_batch_loss_metrics( + self, + model: Union[PreTrainedModel, nn.Module], + batch: dict[str, Union[list, torch.LongTensor]], + train_eval: Literal["train", "eval"] = "train", + ) -> tuple[torch.Tensor, dict[str, float]]: + """Compute the DPO loss and other metrics for the given batch of inputs for train or test.""" + metrics = {} + + if self.args.use_liger_loss: + model_output = self._compute_loss_liger(model, batch) + losses = model_output["loss"] + chosen_rewards = model_output["chosen_rewards"] + rejected_rewards = model_output["rejected_rewards"] + else: + model_output = self.concatenated_forward(model, batch) + + # if ref_chosen_logps and ref_rejected_logps in batch use them, otherwise use the reference model + if "ref_chosen_logps" in batch and "ref_rejected_logps" in batch: + ref_chosen_logps = batch["ref_chosen_logps"] + ref_rejected_logps = batch["ref_rejected_logps"] + else: + ref_chosen_logps, ref_rejected_logps = self.compute_ref_log_probs(batch) + + # Initialize combined losses + losses = 0 + chosen_rewards = 0 + rejected_rewards = 0 + + # Compute losses for each loss type + for idx, loss_type in enumerate(self.loss_type): + # Compute individual loss using standard DPO loss function + _losses, _chosen_rewards, _rejected_rewards = self.dpo_loss( + model_output["chosen_logps"], + model_output["rejected_logps"], + ref_chosen_logps, + ref_rejected_logps, + loss_type, + model_output, + ) + + # Add weighted contributions + weight = self.loss_weights[idx] if self.loss_weights else 1.0 + losses = losses + _losses * weight + chosen_rewards = chosen_rewards + _chosen_rewards * weight + rejected_rewards = rejected_rewards + _rejected_rewards * weight + + reward_accuracies = (chosen_rewards > rejected_rewards).float() + + if self.args.rpo_alpha is not None: + losses = losses + self.args.rpo_alpha * model_output["nll_loss"] # RPO loss from V3 of the paper + + if self.use_weighting: + losses = losses * model_output["policy_weights"] + + if self.aux_loss_enabled: + losses = losses + self.aux_loss_coef * model_output["aux_loss"] + + prefix = "eval_" if train_eval == "eval" else "" + metrics[f"{prefix}rewards/chosen"] = self.accelerator.gather_for_metrics(chosen_rewards).mean().item() + metrics[f"{prefix}rewards/rejected"] = self.accelerator.gather_for_metrics(rejected_rewards).mean().item() + metrics[f"{prefix}rewards/accuracies"] = self.accelerator.gather_for_metrics(reward_accuracies).mean().item() + metrics[f"{prefix}rewards/margins"] = ( + self.accelerator.gather_for_metrics(chosen_rewards - rejected_rewards).mean().item() + ) + metrics[f"{prefix}logps/chosen"] = ( + self.accelerator.gather_for_metrics(model_output["chosen_logps"]).detach().mean().item() + ) + metrics[f"{prefix}logps/rejected"] = ( + self.accelerator.gather_for_metrics(model_output["rejected_logps"]).detach().mean().item() + ) + metrics[f"{prefix}logits/chosen"] = ( + self.accelerator.gather_for_metrics(model_output["mean_chosen_logits"]).detach().mean().item() + ) + metrics[f"{prefix}logits/rejected"] = ( + self.accelerator.gather_for_metrics(model_output["mean_rejected_logits"]).detach().mean().item() + ) + if self.args.rpo_alpha is not None or "sft" in self.loss_type: + metrics[f"{prefix}nll_loss"] = ( + self.accelerator.gather_for_metrics(model_output["nll_loss"]).detach().mean().item() + ) + if self.aux_loss_enabled: + metrics[f"{prefix}aux_loss"] = ( + self.accelerator.gather_for_metrics(model_output["aux_loss"]).detach().mean().item() + ) + + return losses.mean(), metrics + + def compute_loss( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + return_outputs=False, + num_items_in_batch=None, + ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, float]]]: + compute_loss_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + with compute_loss_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + + # Make sure to move the loss to the device the original accumulating loss is at back in the `Trainer` class: + loss = loss.to(self.args.device) + # force log the metrics + self.store_metrics(metrics, train_eval="train") + + if return_outputs: + return loss, metrics + + return loss + + def generate_from_model_and_ref(self, model, batch: dict[str, torch.LongTensor]) -> tuple[str, str]: + """Generate samples from the model and reference model for the given batch of inputs.""" + + # If one uses `generate_during_eval` with peft + bf16, we need to explicitly call generate with + # the torch amp context manager as some hidden states are silently casted to full precision. + generate_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with generate_context_manager: + policy_output = model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.padding_value, + ) + + # if ref_output in batch use that otherwise use the reference model + if "ref_output" in batch: + ref_output = batch["ref_output"] + else: + if self.ref_model is None: + with self.null_ref_context(): + ref_output = self.model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.padding_value, + ) + else: + ref_output = self.ref_model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.padding_value, + ) + + policy_output = pad_to_length(policy_output, self.max_length, self.padding_value) + policy_output_decoded = self.processing_class.batch_decode(policy_output, skip_special_tokens=True) + + ref_output = pad_to_length(ref_output, self.max_length, self.padding_value) + ref_output_decoded = self.processing_class.batch_decode(ref_output, skip_special_tokens=True) + + return policy_output_decoded, ref_output_decoded + + def prediction_step( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[list[str]] = None, + ) -> tuple[torch.Tensor, Optional[torch.Tensor], Optional[torch.Tensor]]: + if ignore_keys is None: + if hasattr(model, "config"): + ignore_keys = getattr(model.config, "keys_to_ignore_at_inference", []) + else: + ignore_keys = [] + + prediction_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with torch.no_grad(), prediction_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="eval") + + # force log the metrics + self.store_metrics(metrics, train_eval="eval") + + if prediction_loss_only: + return loss.detach(), None, None + + # logits for the chosen and rejected samples from model + logits_dict = { + "eval_logits/chosen": metrics["eval_logits/chosen"], + "eval_logits/rejected": metrics["eval_logits/rejected"], + } + logits = [v for k, v in logits_dict.items() if k not in ignore_keys] + logits = torch.tensor(logits, device=self.accelerator.device) + labels = torch.zeros(logits.shape[0], device=self.accelerator.device) + + return (loss.detach(), logits, labels) + + def store_metrics(self, metrics: dict[str, float], train_eval: Literal["train", "eval"] = "train") -> None: + for key, value in metrics.items(): + self._stored_metrics[train_eval][key].append(value) + + def evaluation_loop( + self, + dataloader: DataLoader, + description: str, + prediction_loss_only: Optional[bool] = None, + ignore_keys: Optional[list[str]] = None, + metric_key_prefix: str = "eval", + ) -> EvalLoopOutput: + """ + Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by + `Trainer.evaluate()` and `Trainer.predict()`. + + Works both with or without labels. + """ + + # Sample and save to game log if requested (for one batch to save time) + if self.generate_during_eval: + # Generate random indices within the range of the total number of samples + num_samples = len(dataloader.dataset) + random_indices = random.sample(range(num_samples), k=self.args.eval_batch_size) + + # Use dataloader.dataset.select to get the random batch without iterating over the DataLoader + random_batch_dataset = dataloader.dataset.select(random_indices) + random_batch = self.data_collator(random_batch_dataset) + random_batch = self._prepare_inputs(random_batch) + + policy_output_decoded, ref_output_decoded = self.generate_from_model_and_ref(self.model, random_batch) + + table = pd.DataFrame( + columns=["Prompt", "Policy", "Ref Model"], + data=[ + [prompt, pol[len(prompt) :], ref[len(prompt) :]] + for prompt, pol, ref in zip( + random_batch_dataset["prompt"], policy_output_decoded, ref_output_decoded + ) + ], + ) + if "wandb" in self.args.report_to and self.accelerator.is_main_process: + wandb.log({"game_log": wandb.Table(data=table)}) + + if "comet_ml" in self.args.report_to: + log_table_to_comet_experiment( + name="game_log.csv", + table=table, + ) + + if "mlflow" in self.args.report_to and self.accelerator.is_main_process: + mlflow.log_table(data=table, artifact_file="game_log.json") + + # Base evaluation + initial_output = super().evaluation_loop( + dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix + ) + + return initial_output + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + """ + Log `logs` on the various objects watching training, including stored metrics. + + Args: + logs (`dict[str, float]`): + The values to log. + start_time (`float` or `None`, *optional*, defaults to `None`): + Start time of the training. + """ + # logs either has 'loss' or 'eval_loss' + train_eval = "train" if "loss" in logs else "eval" + # Add averaged stored metrics to logs + for key, metrics in self._stored_metrics[train_eval].items(): + logs[key] = torch.tensor(metrics).mean().item() + del self._stored_metrics[train_eval] + return super().log(logs, start_time) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent( + """\ + @inproceedings{rafailov2023direct, + title = {{Direct Preference Optimization: Your Language Model is Secretly a Reward Model}}, + author = {Rafael Rafailov and Archit Sharma and Eric Mitchell and Christopher D. Manning and Stefano Ermon and Chelsea Finn}, + year = 2023, + booktitle = {Advances in Neural Information Processing Systems 36: Annual Conference on Neural Information Processing Systems 2023, NeurIPS 2023, New Orleans, LA, USA, December 10 - 16, 2023}, + url = {http://papers.nips.cc/paper_files/paper/2023/hash/a85b405ed65c6477a4fe8302b5e06ce7-Abstract-Conference.html}, + editor = {Alice Oh and Tristan Naumann and Amir Globerson and Kate Saenko and Moritz Hardt and Sergey Levine}, + }""" + ) + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="DPO", + trainer_citation=citation, + paper_title="Direct Preference Optimization: Your Language Model is Secretly a Reward Model", + paper_id="2305.18290", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothDPOTrainer(_UnslothDPOTrainer): + """ + +Trainer for Direct Preference Optimization (DPO) method. + +This class is a wrapper around the [`transformers.Trainer`] class and inherits all of its attributes and methods. + +Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + args ([`DPOConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + data_collator (`DataCollator`, *optional*): + Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`. + Will default to [`DataCollatorForPreference`]. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. DPO supports [preference](#preference) type and. The format of the samples can + be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If `None`, the processing class is loaded from the model's name + with [`~transformers.AutoTokenizer.from_pretrained`]. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function that will be used to compute metrics at evaluation. Must take a [`EvalPrediction`] and return + a dictionary string to metric values. *Note* When passing TrainingArgs with `batch_eval_metrics` set to + `True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered + after the last eval batch to signal that the function needs to calculate and return the global summary + statistics rather than accumulating the batch-level statistics. + callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your + model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. + optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`): + A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in + `args`. Incompatible with the `optimizers` argument. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`): + A function that preprocess the logits right before caching them at each evaluation step. Must take two + tensors, the logits and the labels, and return the logits once processed as desired. The modifications made + by this function will be reflected in the predictions received by `compute_metrics`. + + Note that the labels (second parameter) will be `None` if the dataset does not have them. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + + """ + def __init__( + self, + model, + ref_model = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + compute_metrics = None, + callbacks = None, + optimizer_cls_and_kwargs = None, + preprocess_logits_for_metrics = None, + peft_config = None, + **kwargs + ): + if args is None: args = UnslothDPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('dpo_trainer', other_metrics) + if hasattr(train_dataset, 'column_names'): + column_names = set(train_dataset.column_names) + check = ['chosen', 'rejected', 'prompt', 'chosen_input_ids', 'chosen_attention_mask', + 'chosen_labels', 'rejected_input_ids', 'rejected_attention_mask', 'rejected_labels', + 'prompt_input_ids', 'prompt_attention_mask'] + if all(x in column_names for x in check): + train_dataset = train_dataset.remove_columns(['chosen', 'rejected', 'prompt']) + del check, column_names + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + ref_model = ref_model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + compute_metrics = compute_metrics, + callbacks = callbacks, + optimizer_cls_and_kwargs = optimizer_cls_and_kwargs, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothGKDTrainer.py b/unsloth_compiled_cache/UnslothGKDTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..70a6a78061a2b11073b086a5c965d3dba78fb272 --- /dev/null +++ b/unsloth_compiled_cache/UnslothGKDTrainer.py @@ -0,0 +1,1220 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.gkd_trainer import (Any, AutoModelForCausalLM, BaseImageProcessor, Callable, DataCollator, DataCollatorForChatML, Dataset, EvalPrediction, F, FeatureExtractionMixin, GKDConfig, GKDTrainer, GenerationConfig, Optional, PeftConfig, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTTrainer, TrainerCallback, Union, disable_dropout_in_model, empty_cache, generate_model_card, get_comet_experiment_url, is_wandb_available, nn, os, prepare_deepspeed, random, textwrap, torch, unwrap_model_for_generation) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothGKDConfig(GKDConfig): + """ + +Configuration class for [`GKDTrainer`]. + +This class includes only the parameters that are specific to GKD training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] and [`SFTConfig`] documentation. + +Args: + temperature (`float`, *optional*, defaults to `0.9`): + Temperature for sampling. The higher the temperature, the more random the completions. + lmbda (`float`, *optional*, defaults to `0.5`): + Lambda parameter that controls the student data fraction (i.e., the proportion of on-policy + student-generated outputs). + beta (`float`, *optional*, defaults to `0.5`): + Interpolation coefficient between `0.0` and `1.0` of the Generalized Jensen-Shannon Divergence loss. When + beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence. + max_new_tokens (`int`, *optional*, defaults to `128`): + Maximum number of tokens to generate per completion. + teacher_model_name_or_path (`str` or `None`, *optional*, defaults to `None`): + Model name or path of the teacher model. If `None`, the teacher model will be the same as the model being + trained. + teacher_model_init_kwargs (`dict[str, Any]]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model + from a string. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. + seq_kd (`bool`, *optional*, defaults to `False`): + Seq_kd parameter that controls whether to perform Sequence-Level KD (can be viewed as supervised FT on + teacher-generated output). + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + model_init_kwargs = None, + chat_template_path = None, + dataset_text_field = 'text', + dataset_kwargs = None, + dataset_num_proc = None, + eos_token = None, + pad_token = None, + max_length = 1024, + packing = False, + packing_strategy = 'bfd', + padding_free = False, + pad_to_multiple_of = None, + eval_packing = None, + completion_only_loss = None, + assistant_only_loss = False, + loss_type = 'nll', + activation_offloading = False, + temperature = 0.9, + lmbda = 0.5, + beta = 0.5, + max_new_tokens = 128, + teacher_model_name_or_path = None, + teacher_model_init_kwargs = None, + disable_dropout = True, + seq_kd = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1': + from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION + if HAS_FLEX_ATTENTION and pad_to_multiple_of is None: + from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE + pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE + + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + model_init_kwargs = model_init_kwargs, + chat_template_path = chat_template_path, + dataset_text_field = dataset_text_field, + dataset_kwargs = dataset_kwargs, + dataset_num_proc = dataset_num_proc, + eos_token = eos_token, + pad_token = pad_token, + max_length = max_length, + packing = packing, + packing_strategy = packing_strategy, + padding_free = padding_free, + pad_to_multiple_of = pad_to_multiple_of, + eval_packing = eval_packing, + completion_only_loss = completion_only_loss, + assistant_only_loss = assistant_only_loss, + loss_type = loss_type, + activation_offloading = activation_offloading, + temperature = temperature, + lmbda = lmbda, + beta = beta, + max_new_tokens = max_new_tokens, + teacher_model_name_or_path = teacher_model_name_or_path, + teacher_model_init_kwargs = teacher_model_init_kwargs, + disable_dropout = disable_dropout, + seq_kd = seq_kd,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothGKDTrainer(SFTTrainer): + """Trainer for Generalized Knowledge Distillation (GKD) of language models. + + For details on GKD, see the paper: [On-Policy Distillation of Language Models: Learning from Self-Generated + Mistakes](https://huggingface.co/papers/2306.13649). + + Args: + model ([`~transformers.PreTrainedModel`] or `torch.nn.Module` or `str`, *optional*): + Model to be trained, or the string identifier of the model to be instantiated from a pretrained model. + teacher_model ([`~transformers.PreTrainedModel`] or `torch.nn.Module` or `str`, *optional*): + Teacher model for knowledge distillation, or the string identifier of the model to be instantiated from a + pretrained model. + args ([`GKDConfig`], *optional*): + Training arguments. + data_collator ([`~transformers.DataCollator`], *optional*): + Data collator to batch samples from the dataset. It defaults to a [`DataCollatorForChatML`] using the + `processing_class`. + train_dataset ([`~datasets.Dataset`], *optional*): + Dataset for training. + eval_dataset ([`~datasets.Dataset`] or `dict` of [`~datasets.Dataset`], *optional*): + Dataset for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*): + Class to process the data. + compute_metrics (`Callable`, *optional*): + Function to compute metrics at evaluation. Must take in an [`~transformers.EvalPrediction`] and return a + dictionary string to float. + callbacks (`list` of [`~transformers.TrainerCallback`], *optional*): + Callbacks to use during training. + optimizers (`tuple` of `torch.optim.Optimizer` and `torch.optim.lr_scheduler.LambdaLR`, *optional*, defaults to `(None, None)`): + Tuple containing the optimizer and the learning rate scheduler to use for training. + preprocess_logits_for_metrics (`Callable`, *optional*): + Function to preprocess the logits before computing the metrics. Must take in the `logits` and `labels` and + return the logits to be used for metrics computation. + peft_config ([`~peft.config.PeftConfig`], *optional*): + PEFT configuration to use PEFT for training. If `None`, PEFT is not used. If provided, the `model` will be + wrapped with the specified PEFT adapter. + formatting_func (`Callable`, *optional*): + Function to format the dataset. Must take in an example and return an example. + """ + + _tag_names = ["trl", "gkd"] + + def __init__( + self, + model: Optional[Union[PreTrainedModel, nn.Module, str]] = None, + teacher_model: Union[PreTrainedModel, nn.Module, str] = None, + args: Optional[GKDConfig] = None, + data_collator: Optional[DataCollator] = None, # type: ignore + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional["PeftConfig"] = None, + formatting_func: Optional[Callable] = None, + ): + # Ensure Trainer does not drop non-signature columns used by the collator [e.g., "prompts"] + args.remove_unused_columns = False + # Respect a user-provided data_collator; otherwise, provide a ChatML collator that + if data_collator is None: + data_collator = DataCollatorForChatML(tokenizer=processing_class, max_length=args.max_length) + + # Ensure SFTTrainer does not pre-process the dataset when using a ChatML collator, + # so that raw conversational fields [e.g., "messages"] remain available to the collator. + if args.dataset_kwargs is None: + args.dataset_kwargs = {"skip_prepare_dataset": True} + else: + args.dataset_kwargs["skip_prepare_dataset"] = True + + # Liger fused GKD loss [JSD] + self.use_liger_gkd_loss = False + if args.use_liger_kernel: + self.liger_jsd_loss = LigerFusedLinearJSDLoss( + beta=args.beta, + ignore_index=-100, + temperature=args.temperature, + compiled=False, + ) + self.use_liger_gkd_loss = True + + super().__init__( + model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + peft_config=peft_config, + formatting_func=formatting_func, + ) + + if args.teacher_model_init_kwargs is None: + teacher_model_init_kwargs = {} + elif not isinstance(teacher_model, str): + raise ValueError( + "You passed teacher_model_init_kwargs to the GKDConfig, but your teacher_model is already instantiated." + ) + else: + teacher_model_init_kwargs = args.teacher_model_init_kwargs + teacher_model_init_kwargs["dtype"] = ( + teacher_model_init_kwargs["dtype"] + if teacher_model_init_kwargs["dtype"] in ["auto", None] + else getattr(torch, teacher_model_init_kwargs["dtype"]) + ) + + if isinstance(teacher_model, str): + teacher_model = AutoModelForCausalLM.from_pretrained(teacher_model, **teacher_model_init_kwargs) + + # Disable dropout in the model + if args.disable_dropout: + disable_dropout_in_model(self.model) + + if self.is_deepspeed_enabled: + self.teacher_model = prepare_deepspeed(teacher_model, self.accelerator) + else: + self.teacher_model = self.accelerator.prepare_model(teacher_model, evaluation_mode=True) + + self.lmbda = args.lmbda + self.beta = args.beta + self.temperature = args.temperature + self.seq_kd = args.seq_kd + + self.generation_config = GenerationConfig( + max_new_tokens=args.max_new_tokens, + temperature=args.temperature, + do_sample=True, + top_k=0, + use_cache=False if args.gradient_checkpointing else True, + pad_token_id=self.processing_class.pad_token_id, + ) + # Set custom EOS tokens if they are specified by the model's generation + # config. This is important for models with the Llama 3 chat template, + # which use special tokens <|eot_id|> and <|eom_id|> to mark the end of + # turns or messages. + if ( + hasattr(self.model.generation_config, "eos_token_id") + and self.model.generation_config.eos_token_id is not None + ): + self.generation_config.eos_token_id = self.model.generation_config.eos_token_id + + @staticmethod + def generalized_jsd_loss( + student_logits, teacher_logits, labels=None, beta=0.5, temperature=1.0, reduction="batchmean" + ): + """ + Compute the generalized Jensen-Shannon Divergence loss for knowledge distillation using F.kl_div. See Eq. (1) + of https://huggingface.co/papers/2306.13649 for the definition. + + Args: + student_logits: + Tensor of shape (batch_size, sequence_length, vocab_size) + teacher_logits: + Tensor of shape (batch_size, sequence_length, vocab_size) + labels: + Tensor of shape (batch_size, sequence_length) with -100 for padding tokens to ignore when computing + loss + beta: + Interpolation coefficient between 0 and 1 (default: 0.5) + temperature: + Softmax temperature (default: 1.0) + reduction: + Specifies the reduction to apply to the output (default: 'batchmean') + + Returns: + loss: Scalar tensor with the generalized JSD loss + """ + + # Apply temperature scaling + student_logits = student_logits / temperature + teacher_logits = teacher_logits / temperature + + # Compute log probabilities for student and probabilities for teacher + student_log_probs = F.log_softmax(student_logits, dim=-1) + teacher_log_probs = F.log_softmax(teacher_logits, dim=-1) + + if beta == 0: + jsd = F.kl_div(student_log_probs, teacher_log_probs, reduction="none", log_target=True) + elif beta == 1: + jsd = F.kl_div(teacher_log_probs, student_log_probs, reduction="none", log_target=True) + else: + # Compute the log of the mixture distribution + # log(a + b) = log(exp(log(a)) + exp(log(b))) -> for mixture + beta = torch.tensor(beta, dtype=student_log_probs.dtype) + mixture_log_probs = torch.logsumexp( + torch.stack([student_log_probs + torch.log(1 - beta), teacher_log_probs + torch.log(beta)]), + dim=0, + ) + + # Compute KL divergences using F.kl_div + # PyTorch differs from the standard mathematical definition, so the order of the probability distributions is swapped compared to that defined in the paper. + kl_teacher = F.kl_div(mixture_log_probs, teacher_log_probs, reduction="none", log_target=True) + kl_student = F.kl_div(mixture_log_probs, student_log_probs, reduction="none", log_target=True) + + # Compute the Generalized Jensen-Shannon Divergence + jsd = beta * kl_teacher + (1 - beta) * kl_student + + # Masking + if labels is not None: + mask = labels != -100 + jsd = jsd[mask] + + # Apply reduction + if reduction == "batchmean": + return jsd.sum() / mask.sum() if labels is not None else jsd.sum() / (jsd.size(0) * jsd.size(1)) + elif reduction == "sum": + return jsd.sum() + elif reduction == "mean": + return jsd.mean() + else: + return jsd + + def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None): + if self.use_liger_gkd_loss: + # Forward only through the base models (avoid lm_head to save memory) + unwrapped_student = self.accelerator.unwrap_model(model) + if hasattr(unwrapped_student, "get_decoder") and unwrapped_student.get_decoder() is not None: + base_student = unwrapped_student.get_decoder() + else: + base_student = getattr( + unwrapped_student, getattr(unwrapped_student, "base_model_prefix", "model"), unwrapped_student + ) + + student_outputs = base_student( + input_ids=inputs["input_ids"], + attention_mask=inputs["attention_mask"], + output_hidden_states=True, + use_cache=False, + ) + + self.teacher_model.eval() + unwrapped_teacher = self.accelerator.unwrap_model(self.teacher_model) + if hasattr(unwrapped_teacher, "get_decoder") and unwrapped_teacher.get_decoder() is not None: + base_teacher = unwrapped_teacher.get_decoder() + else: + base_teacher = getattr( + unwrapped_teacher, getattr(unwrapped_teacher, "base_model_prefix", "model"), unwrapped_teacher + ) + with torch.no_grad(): + teacher_outputs = base_teacher( + input_ids=inputs["input_ids"], + attention_mask=inputs["attention_mask"], + output_hidden_states=True, + use_cache=False, + ) + + # hidden states (shifted) + student_hidden = student_outputs.last_hidden_state[:, :-1].contiguous() + teacher_hidden = teacher_outputs.last_hidden_state[:, :-1].contiguous() + + # labels mask and labels (shifted) + labels_mask = inputs["labels"] != -100 + masked_input_ids = torch.where( + labels_mask, inputs["input_ids"], torch.full_like(inputs["input_ids"], -100) + ) + true_labels = masked_input_ids[:, 1:].contiguous() + + # heads + student_head = unwrapped_student.get_output_embeddings() + teacher_head = unwrapped_teacher.get_output_embeddings() + + # liger fused jsd loss + loss = self.liger_jsd_loss( + student_input=student_hidden, + student_weight=student_head.weight, + teacher_input=teacher_hidden, + teacher_weight=teacher_head.weight, + true_labels=true_labels, + student_bias=getattr(student_head, "bias", None), + teacher_bias=getattr(teacher_head, "bias", None), + ) + else: + # compute student output + student_outputs = model( + input_ids=inputs["input_ids"], + attention_mask=inputs["attention_mask"], + ) + + # compute teacher output in eval mode + self.teacher_model.eval() + with torch.no_grad(): + teacher_outputs = self.teacher_model( + input_ids=inputs["input_ids"], + attention_mask=inputs["attention_mask"], + ) + + # slice the logits for the generated tokens using the inputs["prompts"] lengths + prompt_lengths = inputs["prompts"].shape[1] + shifted_student_logits = student_outputs.logits[:, prompt_lengths - 1 : -1, :] + shifted_teacher_logits = teacher_outputs.logits[:, prompt_lengths - 1 : -1, :] + shifted_labels = inputs["labels"][:, prompt_lengths:] + + # compute loss + loss = self.generalized_jsd_loss( + student_logits=shifted_student_logits, + teacher_logits=shifted_teacher_logits, + labels=shifted_labels, + beta=self.beta, + ) + + # empty cache + empty_cache() + + # Return loss + return (loss, student_outputs) if return_outputs else loss + + @staticmethod + def generate_on_policy_outputs(model, inputs, generation_config, pad_token_id=None): + # Generate output with respect to the prompt-only + generated_outputs = model.generate( + input_ids=inputs["prompts"], + attention_mask=inputs.get("prompt_attention_mask", None), + generation_config=generation_config, + return_dict_in_generate=True, + ) + + # Get the generated token IDs + generated_tokens = generated_outputs.sequences + # Calculate new attention mask + new_attention_mask = torch.ones_like(generated_tokens) + new_labels = generated_tokens.clone() + + # If there's pad_token_id, set attention mask to 0 for padding tokens + if pad_token_id is not None: + new_labels[new_labels == pad_token_id] = -100 + new_attention_mask[generated_tokens == pad_token_id] = 0 + + return generated_tokens, new_attention_mask, new_labels + + def training_step( + self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], num_items_in_batch: Optional[int] = None + ) -> torch.Tensor: + """ + Perform a training step for the Generalized Knowledge Distillation (GKD) model. + + This method implements the on-policy learning approach described in the GKD paper. With probability + `self.lmbda`, it generates new responses using the student model, which are then used for training instead of + the original inputs. + """ + if self.seq_kd: + with unwrap_model_for_generation(self.teacher_model, self.accelerator) as unwrapped_model: + new_input_ids, new_attention_mask, new_labels = self.generate_on_policy_outputs( + unwrapped_model, inputs, self.generation_config, self.processing_class.pad_token_id + ) + inputs["input_ids"] = new_input_ids + inputs["attention_mask"] = new_attention_mask + inputs["labels"] = new_labels + if random.random() <= self.lmbda: + with unwrap_model_for_generation(model, self.accelerator) as unwrapped_model: + new_input_ids, new_attention_mask, new_labels = self.generate_on_policy_outputs( + unwrapped_model, inputs, self.generation_config, self.processing_class.pad_token_id + ) + inputs["input_ids"] = new_input_ids + inputs["attention_mask"] = new_attention_mask + inputs["labels"] = new_labels + + loss = super().training_step(model, inputs, num_items_in_batch) + return loss + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @inproceedings{agarwal2024on-policy, + title = {{On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes}}, + author = {Rishabh Agarwal and Nino Vieillard and Yongchao Zhou and Piotr Stanczyk and Sabela Ramos Garea and Matthieu Geist and Olivier Bachem}, + year = 2024, + booktitle = {The Twelfth International Conference on Learning Representations, {ICLR} 2024, Vienna, Austria, May 7-11, 2024}, + publisher = {OpenReview.net}, + url = {https://openreview.net/forum?id=3zKtaqxLhW}, + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="GKD", + trainer_citation=citation, + paper_title="On-Policy Distillation of Language Models: Learning from Self-Generated Mistakes", + paper_id="2306.13649", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothGKDTrainer(_UnslothGKDTrainer): + """ + Trainer for Generalized Knowledge Distillation (GKD) of language models. + +For details on GKD, see the paper: [On-Policy Distillation of Language Models: Learning from Self-Generated +Mistakes](https://huggingface.co/papers/2306.13649). + +Args: + model ([`~transformers.PreTrainedModel`] or `torch.nn.Module` or `str`, *optional*): + Model to be trained, or the string identifier of the model to be instantiated from a pretrained model. + teacher_model ([`~transformers.PreTrainedModel`] or `torch.nn.Module` or `str`, *optional*): + Teacher model for knowledge distillation, or the string identifier of the model to be instantiated from a + pretrained model. + args ([`GKDConfig`], *optional*): + Training arguments. + data_collator ([`~transformers.DataCollator`], *optional*): + Data collator to batch samples from the dataset. It defaults to a [`DataCollatorForChatML`] using the + `processing_class`. + train_dataset ([`~datasets.Dataset`], *optional*): + Dataset for training. + eval_dataset ([`~datasets.Dataset`] or `dict` of [`~datasets.Dataset`], *optional*): + Dataset for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*): + Class to process the data. + compute_metrics (`Callable`, *optional*): + Function to compute metrics at evaluation. Must take in an [`~transformers.EvalPrediction`] and return a + dictionary string to float. + callbacks (`list` of [`~transformers.TrainerCallback`], *optional*): + Callbacks to use during training. + optimizers (`tuple` of `torch.optim.Optimizer` and `torch.optim.lr_scheduler.LambdaLR`, *optional*, defaults to `(None, None)`): + Tuple containing the optimizer and the learning rate scheduler to use for training. + preprocess_logits_for_metrics (`Callable`, *optional*): + Function to preprocess the logits before computing the metrics. Must take in the `logits` and `labels` and + return the logits to be used for metrics computation. + peft_config ([`~peft.config.PeftConfig`], *optional*): + PEFT configuration to use PEFT for training. If `None`, PEFT is not used. If provided, the `model` will be + wrapped with the specified PEFT adapter. + formatting_func (`Callable`, *optional*): + Function to format the dataset. Must take in an example and return an example. + + """ + def __init__( + self, + model = None, + teacher_model = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + compute_metrics = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + formatting_func = None, + **kwargs + ): + if args is None: args = UnslothGKDConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('gkd_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + teacher_model = teacher_model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + compute_metrics = compute_metrics, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config, + formatting_func = formatting_func,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass diff --git a/unsloth_compiled_cache/UnslothGRPOTrainer.py b/unsloth_compiled_cache/UnslothGRPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..4d5dece359447f8b2b4e4253df228bef8199a9e4 --- /dev/null +++ b/unsloth_compiled_cache/UnslothGRPOTrainer.py @@ -0,0 +1,3597 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.grpo_trainer import (Any, AutoConfig, AutoModelForSequenceClassification, AutoProcessor, AutoTokenizer, DataLoader, Dataset, FSDP, GRPOConfig, GRPOTrainer, GenerationConfig, IterableDataset, Optional, Path, PeftConfig, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, RepeatSampler, RewardFunc, Sampler, SyncRefModelCallback, Trainer, TrainerCallback, Union, VLLMClient, _ForwardRedirection, apply_chat_template, broadcast_object_list, copy, datasets, defaultdict, deque, disable_dropout_in_model, entropy_from_logits, gather, gather_object, generate_model_card, get_comet_experiment_url, identity, inspect, is_conversational, is_datasets_available, is_flash_attn_2_available, is_liger_kernel_available, is_peft_model, is_rich_available, is_vllm_available, is_wandb_available, logger, logging, maybe_apply_chat_template, nanmax, nanmin, nanstd, nn, nullcontext, os, pad, partial, prepare_deepspeed, prepare_fsdp, prepare_multimodal_messages, prepare_peft_model, print_prompt_completions_sample, profiling_context, profiling_decorator, re, seed_worker, selective_log_softmax, set_seed, shuffle_sequence_dict, split_pixel_values_by_grid, split_tensor_dict, textwrap, torch, transformers, truncate_with_protected_tokens, unsplit_pixel_values_by_grid, unwrap_model_for_generation, Any, FSDP, Union, apply_chat_template, broadcast_object_list, copy, gather, gather_object, is_conversational, is_flash_attn_2_available, logging, maybe_apply_chat_template, nanmax, nanmin, nanstd, nullcontext, os, pad, prepare_multimodal_messages, profiling_context, re, torch, transformers, truncate_with_protected_tokens, unwrap_model_for_generation, entropy_from_logits, os, pad, re, selective_log_softmax, torch, transformers, re, Any, Union, profiling_decorator, re, shuffle_sequence_dict, split_pixel_values_by_grid, split_tensor_dict, torch, unsplit_pixel_values_by_grid, Optional, PreTrainedModel, Trainer, logger, os, re, torch, FSDP, nn, os, re, FSDP, nn, re, torch, GRPOTrainer, Trainer, gather, nanmax, nanmin, os, re, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +def grpo_compute_loss( + ref_logits, + new_logits, + old_logits, + sampling_per_token_logps, + input_ids, + mask, + beta, + advantages, + **kwargs +): + # All Unsloth Zoo code licensed under LGPLv3 + # Set defaults for optional arguments + loss_type = kwargs.get("loss_type", "grpo") + epsilon_low = kwargs.get("epsilon_low", 0.2) + epsilon_high = kwargs.get("epsilon_high", 0.2) + max_completion_length = kwargs.get("max_completion_length", 8192) + delta = kwargs.get("delta", None) + temperature = kwargs.get("temperature", 1.0) + logit_scale_multiply = kwargs.get("logit_scale_multiply", 0.0) + logit_scale_divide = kwargs.get("logit_scale_divide", 0.0) + logit_softcapping = kwargs.get("logit_softcapping", 0.0) + importance_sampling_level = kwargs.get("importance_sampling_level", "token") + num_items_in_batch = kwargs.get("num_items_in_batch", None) + current_gradient_accumulation_steps = kwargs.get("current_gradient_accumulation_steps", 1) + num_processes = kwargs.get("num_processes", 1) + use_vllm = kwargs.get("use_vllm", False) + vllm_importance_sampling_cap = kwargs.get("vllm_importance_sampling_cap", 2.0) + input_ids = input_ids.unsqueeze(-1) + + # Optional logit softcapping and logit dividing + if logit_scale_multiply != 0: new_logits = new_logits * logit_scale_multiply + if logit_scale_divide != 0: new_logits = new_logits / logit_scale_divide + if logit_softcapping != 0: new_logits = new_logits * torch.tanh(new_logits / logit_softcapping) + + new_logits = new_logits.to(torch.float32) + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + if temperature != 1.0: new_logits = new_logits / temperature + new_x = torch.gather(new_logits, dim = -1, index = input_ids).squeeze(-1) + new = new_x - torch.logsumexp(new_logits, dim = -1) + # x_i - logsumexp(x_i) + with torch.no_grad(): + if beta != 0.0: + assert ref_logits is not None, "ref_logits should not be None when beta != 0.0" + + # Optional logit softcapping and logit dividing + if logit_scale_multiply != 0: ref_logits = ref_logits * logit_scale_multiply + if logit_scale_divide != 0: ref_logits = ref_logits / logit_scale_divide + if logit_softcapping != 0: ref_logits = ref_logits * torch.tanh(ref_logits / logit_softcapping) + + ref_logits = ref_logits.to(torch.float32) + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + if temperature != 1.0: ref_logits = ref_logits / temperature + ref_x = torch.gather(ref_logits, dim = -1, index = input_ids).squeeze(-1) + ref = ref_x - torch.logsumexp(ref_logits, dim = -1) + pass + + if old_logits is not None: + # Optional logit softcapping and logit dividing + if logit_scale_multiply != 0: old_logits = old_logits * logit_scale_multiply + if logit_scale_divide != 0: old_logits = old_logits / logit_scale_divide + if logit_softcapping != 0: old_logits = old_logits * torch.tanh(old_logits / logit_softcapping) + + old_logits = old_logits.to(torch.float32) + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + if temperature != 1.0: old_logits = old_logits / temperature + old_x = torch.gather(old_logits, dim = -1, index = input_ids).squeeze(-1) + old = old_x - torch.logsumexp(old_logits, dim = -1) + pass + if use_vllm and sampling_per_token_logps is not None: + #must filter out extra prompt tokens in begining after making input_ids left padded + importance_sampling_ratio = torch.exp((old * mask) - sampling_per_token_logps) + importance_sampling_ratio = torch.clamp( + importance_sampling_ratio, max=vllm_importance_sampling_cap + ) + pass + pass + + # Reverse KL + # Note that this is a low variance low bias estimator for the KL divergence as used in GRPO paper + if beta != 0.0: + kl_i = torch.exp(ref - new) - (ref - new) - 1.0 + + else: + # set kl_i to a tensor of zeros with the correct shape + if importance_sampling_level == "sequence": + kl_i = new.new_zeros(new.size(0), 1) + else: + kl_i = torch.zeros_like(new) + # Full correct reverse KL divergence?? Missing term maybe? + # kl_i = torch.exp(new) * kl_i + + # Below is forward KL (normal KL) + # kl_i = torch.exp(old) * (old - new) + if old_logits is not None: + log_ratio = new - old + else: + log_ratio = new - new.detach() + + if importance_sampling_level == "token": + log_importance_weights = log_ratio + elif importance_sampling_level == "sequence": + log_importance_weights = (log_ratio * mask).sum(-1) / mask.sum(-1).clamp(min=1.0) + log_importance_weights = log_importance_weights.unsqueeze(-1) + else: + raise ValueError( + f"Unknown importance sampling level: {importance_sampling_level}. Possible values are 'token' " + "and 'sequence'." + ) + + coef_1 = torch.exp(log_importance_weights) + + coef_2 = torch.clamp(coef_1, 1 - epsilon_low, 1 + epsilon_high) + + if delta is not None: + loss_1 = torch.clamp(coef_1, max=delta) * advantages.unsqueeze(1) + else: + loss_1 = coef_1 * advantages.unsqueeze(1) + pass + + # Must detach - otherwise gradients are not propagated correctly! + # exp(x - x) == 1 + # loss_i = torch.exp(new - new.detach()) * advantages.unsqueeze(1) + + loss_2 = coef_2 * advantages.unsqueeze(1) + loss_i = -torch.min(loss_1, loss_2) + + if use_vllm and sampling_per_token_logps is not None: + loss_i = loss_i * importance_sampling_ratio + #delta for metric + with torch.no_grad(): + delta = torch.abs(old - sampling_per_token_logps) + delta = delta * mask + flat_is_ratio = importance_sampling_ratio * mask + else: + delta = torch.tensor([]).detach() + flat_is_ratio = torch.tensor([]).detach() + if beta != 0.0: + loss_i = loss_i + beta * kl_i + + mask = mask.to(torch.float32) + n_mask_per_reward = mask.sum(1) + + # https://github.com/huggingface/trl/blob/e8b8499f1f8d76838155b515e414ee98f757d6d5/trl/trainer/grpo_trainer.py#L1624 + if loss_type == "grpo": + loss = ((loss_i * mask).sum(-1) / mask.sum(-1).clamp(min=1.0)).mean() + loss = loss / current_gradient_accumulation_steps + elif loss_type == "bnpo": + loss = (loss_i * mask).sum() / mask.sum().clamp(min=1.0) + loss = loss / current_gradient_accumulation_steps + elif loss_type == "dr_grpo": + loss = (loss_i * mask).sum() / (loss_i.size(0) * max_completion_length) + loss = loss / current_gradient_accumulation_steps + elif loss_type == "dapo": + normalizer = num_items_in_batch/ num_processes + loss = (loss_i * mask).sum() / normalizer + else: + raise ValueError(f"Unknown loss type: {loss_type}") + + # loss = (loss_i * mask).sum() / mask.sum() + + # Get metrics as well which are folded + def masked_batch_mean(x): + with torch.inference_mode(): + completion_length = n_mask_per_reward.mean() + if x.shape[1] == 1: # when importance_sampling_level == "sequence" + return completion_length, x.mean() + else: + mean_kl_per_reward = (x * mask).sum(1) / n_mask_per_reward + mean_kl = mean_kl_per_reward.mean() + return completion_length, mean_kl + completion_length, mean_kl = masked_batch_mean(kl_i) + return loss, completion_length, mean_kl, delta, flat_is_ratio + +class UnslothEfficientGRPO(torch.autograd.Function): + # All Unsloth Zoo code licensed under LGPLv3 + @staticmethod + def forward(ctx, _new_hidden_states, _old_hidden_states, _ref_hidden_states, _sampling_per_token_logps, lm_head, _input_ids, _mask, _advantages, beta, scaler = None, n_chunks = 1, extra_kwargs=None): + if extra_kwargs is None: + extra_kwargs = {} + def compute_loss(new_hidden_states, old_hidden_states, ref_hidden_states, sampling_per_token_logps, input_ids, mask, advantages, scaling): + new_logits = torch.matmul(new_hidden_states.to(lm_head.dtype), lm_head.t()) + new_logits = new_logits[:, :-1, :] # exclude the last logit: it corresponds to the next token pred + with torch.no_grad(): + if beta != 0.0: + ref_logits = torch.matmul(ref_hidden_states.to(lm_head.dtype), lm_head.t()) + ref_logits = ref_logits[:, :-1, :] # exclude the last logit: it corresponds to the next token pred + else: + ref_logits = None + if old_hidden_states is not None: + old_logits = torch.matmul(old_hidden_states.to(lm_head.dtype), lm_head.t()) + old_logits = old_logits[:, :-1, :] # exclude the last logit: it corresponds to the next token pred + else: + old_logits = None + # if old_hidden_states is not None: + # old_logits = torch.matmul(old_hidden_states, lm_head.t()) #last logit already excluded + # old_logits = old_logits[:, :-1, :] # exclude the last logit: it corresponds to the next token pred + # else: + # old_logits = None + # unsloth_zoo/rl_replacements.py + loss, completion_length, mean_kl, delta, flat_is_ratio = grpo_compute_loss( + ref_logits, + new_logits, + old_logits, + sampling_per_token_logps, + input_ids, + mask, + beta, + advantages, + **extra_kwargs, + ) + + # Scale loss if needed for mixed precision training + scaled_loss = loss * scaling + # Must add .loss.detach otherwise autograd uses 2x VRAM + return scaled_loss, (loss.detach(), completion_length, mean_kl, delta, flat_is_ratio) + pass + + device =_new_hidden_states.device + grad_inputs = torch.empty_like(_new_hidden_states) + accumulated_loss = torch.zeros(1, device = device) + accumulated_completion_length = torch.zeros(1, device = device) + accumulated_mean_kl = torch.zeros(1, device = device) + accumulated_delta = [] + accumulated_flat_is_ratio = [] + def accumulate_chunk( + new_hidden_states_j, + old_hidden_states_j, + ref_hidden_states_j, + sampling_per_token_logps_j, + input_ids_j, + mask_j, + advantages_j, + scaling, + grad_inputs_j, + ): + (chunk_grad_input,), (chunk_loss, (unscaled_loss, chunk_completion_length, chunk_mean_kl, chunk_delta, chunk_flat_is_ratio)) = torch.func.grad_and_value( + compute_loss, + argnums = (0,), + has_aux = True, + )(new_hidden_states_j, old_hidden_states_j, ref_hidden_states_j, sampling_per_token_logps_j, input_ids_j, mask_j, advantages_j, scaling) + accumulated_loss .add_(unscaled_loss) + accumulated_completion_length.add_(chunk_completion_length) + accumulated_mean_kl .add_(chunk_mean_kl) + accumulated_delta .append(chunk_delta) + accumulated_flat_is_ratio .append(chunk_flat_is_ratio) + grad_inputs_j[:] = chunk_grad_input + pass + + accumulate_chunk = torch.compile( + accumulate_chunk, + fullgraph = True, + # [TODO] Dynamic marking causes torch.compile errors if sequence length is long + dynamic = True, + options = torch_compile_options, + ) + + grad_inputs_chunks = torch.chunk(grad_inputs, chunks = n_chunks, dim = 0) + new_hidden_states = torch.chunk(_new_hidden_states, chunks = n_chunks, dim = 0) + if _old_hidden_states is not None: + old_hidden_states = torch.chunk(_old_hidden_states, chunks = n_chunks, dim = 0) + else: + old_hidden_states = [None] * n_chunks + if _ref_hidden_states is not None: + ref_hidden_states = torch.chunk(_ref_hidden_states, chunks = n_chunks, dim = 0) + else: + ref_hidden_states = [None] * n_chunks + if _sampling_per_token_logps is not None: + sampling_per_token_logps = torch.chunk(_sampling_per_token_logps, chunks = n_chunks, dim = 0) + else: + sampling_per_token_logps = [None] * n_chunks + input_ids = torch.chunk(_input_ids, chunks = n_chunks, dim = 0) + mask = torch.chunk(_mask, chunks = n_chunks, dim = 0) + advantages = torch.chunk(_advantages, chunks = n_chunks, dim = 0) + + # Get mixed precision scaling if seen + scaling = scaler.get_scale() if scaler is not None else 1.0 + + # Force torch.compile to use dynamic shapes for seqlen dim + # mark_dynamic = lambda x: torch._dynamo.mark_dynamic(x, 1) + + for (grad_inputs_j, new_hidden_states_j, old_hidden_states_j, ref_hidden_states_j, sampling_per_token_logps_j, input_ids_j, mask_j, advantages_j, ) in \ + zip(grad_inputs_chunks, new_hidden_states, old_hidden_states, ref_hidden_states, sampling_per_token_logps, input_ids, mask, advantages): + + # [TODO] Dynamic marking causes torch.compile errors if sequence length is long + + # mark_dynamic(new_hidden_states_j) + # mark_dynamic(ref_hidden_states_j) + # if old_hidden_states_j is not None: + # mark_dynamic(old_hidden_states_j) + # mark_dynamic(input_ids_j) + # mark_dynamic(mask_j) + + accumulate_chunk( + new_hidden_states_j, + old_hidden_states_j, + ref_hidden_states_j, + sampling_per_token_logps_j, + input_ids_j, + mask_j, + advantages_j, + scaling, + grad_inputs_j, + ) + pass + + grad_inputs .div_(n_chunks) + accumulated_loss .div_(n_chunks) + accumulated_completion_length.div_(n_chunks) + accumulated_mean_kl .div_(n_chunks) + + if _sampling_per_token_logps is not None: + accumulated_delta = torch.cat(accumulated_delta, dim=0) + accumulated_flat_is_ratio = torch.cat(accumulated_flat_is_ratio, dim=0) + else: + accumulated_delta = None + accumulated_flat_is_ratio = None + ctx.save_for_backward(grad_inputs) + return ( + accumulated_loss, + accumulated_completion_length, + accumulated_mean_kl, + accumulated_delta, + accumulated_flat_is_ratio + ) + pass + + @staticmethod + def backward(ctx, grad_output, dcompletion_length, dmean_kl, ddelta, ddflat_is_ratio): + (grad_input,) = ctx.saved_tensors + return (grad_input, None, None, None, None, None, None, None, None, None, None, None) + pass + +def grpo_accumulated_loss( + trainer, + input_ids, + attention_mask, + logits_to_keep, + completion_mask, + advantages, + old_hidden_states, + ref_hidden_states, + n_chunks = -1, + **kwargs, +): + # All Unsloth Zoo code licensed under LGPLv3 + bsz, qlen = input_ids.shape + + pixel_values = kwargs.get('pixel_values',None) + image_grid_thw = kwargs.get('image_grid_thw',None) + pixel_attention_mask = kwargs.get('pixel_attention_mask',None) + image_sizes = kwargs.get('image_sizes',None) + sampling_per_token_logps = kwargs.get("sampling_per_token_logps", None) + #delete this from kwargs so less issues + del kwargs["sampling_per_token_logps"] + kwargs["vllm_importance_sampling_cap"] = trainer.vllm_importance_sampling_cap if sampling_per_token_logps is not None else None + kwargs["use_vllm"] = trainer.use_vllm + # Find closest multiple + factors = [i for i in range(1, bsz + 1) if bsz % i == 0] + if n_chunks == -1: n_chunks = bsz + n_chunks = factors[min(np.searchsorted(factors, n_chunks), len(factors)-1)] + + if not hasattr(trainer, '_autocast_dtype'): + trainer._autocast_dtype = torch.float16 if os.environ.get('ACCELERATE_MIXED_PRECISION', 'fp16') == 'fp16' else torch.bfloat16 + if os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1': trainer._autocast_dtype = None + pass + os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "1" + + lm_head = trainer.model.get_output_embeddings().weight + + if pixel_values is None: + left_pad_tokens_per_prompt = calculate_pad_tokens_in_prompt(input_ids, logits_to_keep, trainer.processing_class.pad_token_id) + + max_left_pad = max(left_pad_tokens_per_prompt).item() + + input_ids = left_pack_padding(input_ids, trainer.processing_class.pad_token_id) + + completion_input_ids = input_ids[:, -(logits_to_keep +max_left_pad):] + + completion_mask = create_completion_attention_mask(completion_input_ids, left_pad_tokens_per_prompt, max_left_pad, trainer.processing_class.pad_token_id).to(attention_mask.dtype) + #TODO given the completion mask here we need to, handle the left pad tokens so the sizes of completion + #token or old logprobs are compatible with the importance sampling logprobs + if trainer.use_vllm and sampling_per_token_logps is not None: + sampling_per_token_logps = align_logprobs_with_mask(sampling_per_token_logps, completion_mask) + attention_mask = input_ids != trainer.processing_class.pad_token_id + attention_mask = attention_mask.to(attention_mask.dtype) + else: + completion_input_ids = input_ids[:, -logits_to_keep:] + + unwrapped_model = trainer.accelerator.unwrap_model(trainer.model, keep_fp32_wrapper = False) + + # Do not move hidden_states from device 1 to device 0: + for module in unwrapped_model.modules(): + if hasattr(module, "_hf_hook") and hasattr(module._hf_hook, "io_same_decice"): + module._hf_hook.io_same_decice = False + pass + # Get autocaster + if trainer._autocast_dtype is None: + autocaster = nullcontext() + else: + autocaster = torch.amp.autocast(device_type = trainer.model.device.type, dtype = trainer._autocast_dtype) + with autocaster: + if pixel_values is None: + new_hidden_states = unwrapped_model( + input_ids = input_ids, + attention_mask = attention_mask, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + pixel_attention_mask = pixel_attention_mask, + image_sizes = image_sizes, + # logits_to_keep = logits_to_keep + 1, + ).logits + + #keep extra logit as we generated a new token + new_hidden_states = new_hidden_states[:, -(logits_to_keep +max_left_pad+1): , :] + if ref_hidden_states is not None: + ref_hidden_states = ref_hidden_states[:, -(logits_to_keep +max_left_pad+1): , :] + if old_hidden_states is not None: + old_hidden_states = old_hidden_states[:, -(logits_to_keep +max_left_pad+1): , :] + else: + new_hidden_states = unwrapped_model( + input_ids = input_ids, + attention_mask = attention_mask, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + pixel_attention_mask = pixel_attention_mask, + image_sizes = image_sizes, + logits_to_keep = logits_to_keep + 1, + ).logits + loss, completion_length, mean_kl, delta, flat_is_ratio = UnslothEfficientGRPO.apply( + new_hidden_states, + old_hidden_states, + ref_hidden_states, + sampling_per_token_logps, + lm_head, + completion_input_ids, + completion_mask, + advantages, + trainer.beta, + trainer.accelerator.scaler, + n_chunks, + kwargs # pass kwargs as a dict + ) + + + # Must force not returning hidden states but logits otherwise gibberish + os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "0" + + return loss, completion_length, mean_kl, delta, flat_is_ratio + # Old non efficient code path + new_logits = torch.matmul(new_hidden_states, lm_head.t()) + new_logits = new_logits[:, :-1, :] # exclude the last logit: it corresponds to the next token pred + old_logits = torch.matmul(old_hidden_states, lm_head.t()) + old_logits = old_logits[:, :-1, :] # exclude the last logit: it corresponds to the next token pred + loss, completion_length, mean_kl = grpo_compute_loss( + old_logits, + new_logits, + completion_input_ids, + completion_mask, + trainer.beta, + advantages, + ) + return loss, completion_length, mean_kl + pass + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options) +def grpo_compute_loss_slow( + ref_logits, + new_logits, + old_logits, + sampling_per_token_logps, + input_ids, + mask, + beta, + advantages, + **kwargs +): + # All Unsloth Zoo code licensed under LGPLv3 + # Set defaults for optional arguments + loss_type = kwargs.get("loss_type", "grpo") + epsilon_low = kwargs.get("epsilon_low", 0.2) + epsilon_high = kwargs.get("epsilon_high", 0.2) + max_completion_length = kwargs.get("max_completion_length", 8192) + delta = kwargs.get("delta", None) + temperature = kwargs.get("temperature", 1.0) + logit_scale_multiply = kwargs.get("logit_scale_multiply", 0.0) + logit_scale_divide = kwargs.get("logit_scale_divide", 0.0) + logit_softcapping = kwargs.get("logit_softcapping", 0.0) + importance_sampling_level = kwargs.get("importance_sampling_level", "token") + num_items_in_batch = kwargs.get("num_items_in_batch", None) + current_gradient_accumulation_steps = kwargs.get("current_gradient_accumulation_steps", 1) + num_processes = kwargs.get("num_processes", 1) + use_vllm = kwargs.get("use_vllm", False) + vllm_importance_sampling_cap = kwargs.get("vllm_importance_sampling_cap", 2.0) + input_ids = input_ids.unsqueeze(-1) + + # Optional logit softcapping and logit dividing + if logit_scale_multiply != 0: new_logits = new_logits * logit_scale_multiply + if logit_scale_divide != 0: new_logits = new_logits / logit_scale_divide + if logit_softcapping != 0: new_logits = new_logits * torch.tanh(new_logits / logit_softcapping) + + new_logits = new_logits.to(torch.float32) + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + if temperature != 1.0: new_logits = new_logits / temperature + new_x = torch.gather(new_logits, dim = -1, index = input_ids).squeeze(-1) + new = new_x - torch.logsumexp(new_logits, dim = -1) + # x_i - logsumexp(x_i) + with torch.no_grad(): + if beta != 0.0: + assert ref_logits is not None, "ref_logits should not be None when beta != 0.0" + + # Optional logit softcapping and logit dividing + if logit_scale_multiply != 0: ref_logits = ref_logits * logit_scale_multiply + if logit_scale_divide != 0: ref_logits = ref_logits / logit_scale_divide + if logit_softcapping != 0: ref_logits = ref_logits * torch.tanh(ref_logits / logit_softcapping) + + ref_logits = ref_logits.to(torch.float32) + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + if temperature != 1.0: ref_logits = ref_logits / temperature + ref_x = torch.gather(ref_logits, dim = -1, index = input_ids).squeeze(-1) + ref = ref_x - torch.logsumexp(ref_logits, dim = -1) + pass + + if old_logits is not None: + # Optional logit softcapping and logit dividing + if logit_scale_multiply != 0: old_logits = old_logits * logit_scale_multiply + if logit_scale_divide != 0: old_logits = old_logits / logit_scale_divide + if logit_softcapping != 0: old_logits = old_logits * torch.tanh(old_logits / logit_softcapping) + + old_logits = old_logits.to(torch.float32) + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + if temperature != 1.0: old_logits = old_logits / temperature + old_x = torch.gather(old_logits, dim = -1, index = input_ids).squeeze(-1) + old = old_x - torch.logsumexp(old_logits, dim = -1) + pass + if use_vllm and sampling_per_token_logps is not None: + #must filter out extra prompt tokens in begining after making input_ids left padded + importance_sampling_ratio = torch.exp((old * mask) - sampling_per_token_logps) + importance_sampling_ratio = torch.clamp( + importance_sampling_ratio, max=vllm_importance_sampling_cap + ) + pass + pass + + # Reverse KL + # Note that this is a low variance low bias estimator for the KL divergence as used in GRPO paper + if beta != 0.0: + kl_i = torch.exp(ref - new) - (ref - new) - 1.0 + + else: + # set kl_i to a tensor of zeros with the correct shape + if importance_sampling_level == "sequence": + kl_i = new.new_zeros(new.size(0), 1) + else: + kl_i = torch.zeros_like(new) + # Full correct reverse KL divergence?? Missing term maybe? + # kl_i = torch.exp(new) * kl_i + + # Below is forward KL (normal KL) + # kl_i = torch.exp(old) * (old - new) + if old_logits is not None: + log_ratio = new - old + else: + log_ratio = new - new.detach() + + if importance_sampling_level == "token": + log_importance_weights = log_ratio + elif importance_sampling_level == "sequence": + log_importance_weights = (log_ratio * mask).sum(-1) / mask.sum(-1).clamp(min=1.0) + log_importance_weights = log_importance_weights.unsqueeze(-1) + else: + raise ValueError( + f"Unknown importance sampling level: {importance_sampling_level}. Possible values are 'token' " + "and 'sequence'." + ) + + coef_1 = torch.exp(log_importance_weights) + + coef_2 = torch.clamp(coef_1, 1 - epsilon_low, 1 + epsilon_high) + + if delta is not None: + loss_1 = torch.clamp(coef_1, max=delta) * advantages.unsqueeze(1) + else: + loss_1 = coef_1 * advantages.unsqueeze(1) + pass + + # Must detach - otherwise gradients are not propagated correctly! + # exp(x - x) == 1 + # loss_i = torch.exp(new - new.detach()) * advantages.unsqueeze(1) + + loss_2 = coef_2 * advantages.unsqueeze(1) + loss_i = -torch.min(loss_1, loss_2) + + if use_vllm and sampling_per_token_logps is not None: + loss_i = loss_i * importance_sampling_ratio + #delta for metric + with torch.no_grad(): + delta = torch.abs(old - sampling_per_token_logps) + delta = delta * mask + flat_is_ratio = importance_sampling_ratio * mask + else: + delta = torch.tensor([]).detach() + flat_is_ratio = torch.tensor([]).detach() + if beta != 0.0: + loss_i = loss_i + beta * kl_i + + mask = mask.to(torch.float32) + n_mask_per_reward = mask.sum(1) + + # https://github.com/huggingface/trl/blob/e8b8499f1f8d76838155b515e414ee98f757d6d5/trl/trainer/grpo_trainer.py#L1624 + if loss_type == "grpo": + loss = ((loss_i * mask).sum(-1) / mask.sum(-1).clamp(min=1.0)).mean() + loss = loss / current_gradient_accumulation_steps + elif loss_type == "bnpo": + loss = (loss_i * mask).sum() / mask.sum().clamp(min=1.0) + loss = loss / current_gradient_accumulation_steps + elif loss_type == "dr_grpo": + loss = (loss_i * mask).sum() / (loss_i.size(0) * max_completion_length) + loss = loss / current_gradient_accumulation_steps + elif loss_type == "dapo": + normalizer = num_items_in_batch/ num_processes + loss = (loss_i * mask).sum() / normalizer + else: + raise ValueError(f"Unknown loss type: {loss_type}") + + # loss = (loss_i * mask).sum() / mask.sum() + + # Get metrics as well which are folded + def masked_batch_mean(x): + with torch.inference_mode(): + completion_length = n_mask_per_reward.mean() + if x.shape[1] == 1: # when importance_sampling_level == "sequence" + return completion_length, x.mean() + else: + mean_kl_per_reward = (x * mask).sum(1) / n_mask_per_reward + mean_kl = mean_kl_per_reward.mean() + return completion_length, mean_kl + completion_length, mean_kl = masked_batch_mean(kl_i) + return loss, completion_length, mean_kl, delta, flat_is_ratio + +def vLLMSamplingParams(**kwargs): + from vllm import SamplingParams + sampling_params = SamplingParams(**kwargs) + sampling_params._set_kwargs = kwargs + return sampling_params +@dataclass +class UnslothGRPOConfig(GRPOConfig): + """ + +Configuration class for the [`GRPOTrainer`]. + +This class includes only the parameters that are specific to GRPO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + > Parameters that control the model and reference model + + model_init_kwargs (`str`, `dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model` + argument of the [`GRPOTrainer`] is provided as a string. + disable_dropout (`bool`, *optional*, defaults to `False`): + Whether to disable dropout in the model. This is useful for training with a reference model, as it prevents + the model from generating different logprobs for the same input. + + > Parameters that control the data preprocessing + + remove_unused_columns (`bool`, *optional*, defaults to `False`): + Whether to only keep the column `"prompt"` in the dataset. If you use a custom reward function that + requires any column other than `"prompts"` and `"completions"`, you should keep this to `False`. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. If the prompt is longer than this value, it will be truncated left. + num_generations (`int` or `None`, *optional*, defaults to `8`): + Number of generations per prompt to sample. The effective batch size (num_processes * per_device_batch_size + * gradient_accumulation_steps) must be evenly divisible by this value. + max_completion_length (`int` or `None`, *optional*, defaults to `256`): + Maximum length of the generated completion. + ds3_gather_for_generation (`bool`, *optional*, defaults to `True`): + This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for generation, + improving generation speed. However, disabling this option allows training models that exceed the VRAM + capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible + with vLLM generation. + shuffle_dataset (`bool`, *optional*, defaults to `True`): + Whether to shuffle the training dataset. + + > Parameters that control generation + + generation_batch_size: (`int` or `None`, *optional*, defaults to `None`): + Batch size to use for generation. If `None`, it defaults to the effective training batch size: + `per_device_train_batch_size * num_processes * steps_per_generation`. In other words, there is one + generation batch processed per optimization step. Mutually exclusive with `steps_per_generation`. + steps_per_generation: (`int` or `None`, *optional*, defaults to `None`): + Number of steps per generation. If `None`, it defaults to `gradient_accumulation_steps`. Mutually exclusive + with `generation_batch_size`. + temperature (`float`, defaults to `1.0`): + Temperature for sampling. The higher the temperature, the more random the completions. + top_p (`float`, *optional*, defaults to `1.0`): + Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to + `1.0` to consider all tokens. + top_k (`int` or `None`, *optional*, defaults to `None`): + Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is + disabled and all tokens are considered. + min_p (`float` or `None`, *optional*, defaults to `None`): + Minimum token probability, which will be scaled by the probability of the most likely token. It must be a + value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range. + repetition_penalty (`float`, *optional*, defaults to `1.0`): + Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far. + Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat + tokens. + use_transformers_paged (`bool`, *optional*, defaults to `False`): + Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers` + paged implementation will be used for generation instead of the default padded implementation. This + parameter is only effective when `use_vllm` is set to `False`. + cache_implementation (`str` or `None`, *optional*, defaults to `None`): + Implementation of the cache method for faster generation when `use_vllm` is set to `False`. + generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if + using vLLM) when sampling completions. This can be used to further customize the generation behavior, such + as setting `suppress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation + parameters (like `min_p`, `top_p`, etc.), they will override them. + + > Parameters that control generation acceleration powered by vLLM + + use_vllm (`bool`, *optional*, defaults to `False`): + Whether to use vLLM for generating completions. If set to `True`, the trainer will use vLLM for generation + instead of the default model.generate(). Requires `vllm` to be installed. + vllm_mode (`str`, *optional*, defaults to `"server"`): + Mode to use for vLLM integration when `use_vllm` is set to `True`. Must be one of `"server"` or + `"colocate"`. + + - `"server"`: The trainer will send generation requests to a separate vLLM server. Make sure a TRL vLLM + server is running (start with `trl vllm-serve`). + - `"colocate"`: vLLM will run in the same process and share the training GPUs. This avoids the need for a + separate server but may cause resource contention with training. + vllm_model_impl (`str`, *optional*, defaults to `"vllm"`): + Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use + the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model + implementation. + vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`): + Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled. + + > Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`) + + vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`): + Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and + `vllm_server_port` are ignored. + vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`): + Host of the vLLM server to connect to. Ignored if `vllm_server_base_url` is provided. + vllm_server_port (`int`, *optional*, defaults to `8000`): + Port of the vLLM server to connect to. Ignored if `vllm_server_base_url` is provided. + vllm_server_timeout (`float`, *optional*, defaults to `240.0`): + Total timeout duration in seconds to wait for the vLLM server to be up. If the server is not up after the + timeout, a `ConnectionError` is raised. + + > Parameters that control colocated vLLM execution (only used when `vllm_mode` is `"colocate"`) + + vllm_gpu_memory_utilization (`float`, *optional*, defaults to `0.3`): + Control the GPU memory utilization for vLLM. This setting only applies when `vllm_mode` is set to + `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when + launching the vLLM server via the `--vllm_gpu_memory_utilization` flag. + vllm_tensor_parallel_size (`int`, *optional*, defaults to `1`): + Control the tensor parallel size for vLLM. This setting only applies when `vllm_mode` is set to + `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when + launching the vLLM server via the `--vllm_tensor_parallel_size` flag. + vllm_enable_sleep_mode (`bool`, *optional*, defaults to `False`): + Whether to enable sleep mode for vLLM. If `True`, vLLM will sleep during the optimization step and woken + for weight sync and generation. + + > Parameters that control the training + + beta (`float`, *optional*, defaults to `0.0`): + KL coefficient. If `0.0` (default), the reference model is not loaded, reducing memory usage and improving + training speed. + num_iterations (`int`, *optional*, defaults to `1`): + Number of iterations per batch (denoted as μ in the algorithm). + epsilon (`float`, *optional*, defaults to `0.2`): + Epsilon value for clipping. + delta (`float` or `None`, *optional*, defaults to `None`): + Enables the upper clipping bound in two-sided GRPO loss when set to a float. If `None` (default), standard + GRPO clipping is used. Recommended to be greater than `1 + ε` when enabled. This method is introduced in + the [INTELLECT-2 tech report](https://huggingface.co/papers/2505.07291). + epsilon_high (`float` or `None`, *optional*, defaults to `None`): + Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound + specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`. + importance_sampling_level (`str`, *optional*, defaults to `"token"`): + Controls whether importance sampling ratios are computed at the `"token"` or `"sequence"` level. `"token"` + keeps the raw per-token log-probability ratios (one weight per token). `"sequence"` averages the + log-probability ratios across valid tokens to produce a single ratio per sequence. The [GSPO + paper](https://huggingface.co/papers/2507.18071) shows that sequence-level sampling often yields more + stable training and better alignment with sequence-level rewards. + reward_weights (`list[float]` or `None`, *optional*, defaults to `None`): + Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are + weighted equally with weight `1.0`. + scale_rewards (`str` or `bool`, *optional*, defaults to `"group"`): + Specifies the scaling strategy for rewards. Supported values are: + + - `True` or `"group"` (default): rewards are scaled by the standard deviation within each group, ensuring + unit variance within a group. + - `"batch"`: rewards are scaled by the standard deviation across the entire batch, as recommended in the + [PPO Lite paper](https://huggingface.co/papers/2508.08221). + - `False` or `"none"`: no scaling is applied. The [Dr. GRPO + paper](https://huggingface.co/papers/2503.20783) recommends not scaling rewards, as scaling by the + standard deviation introduces a question-level difficulty bias. + loss_type (`str`, *optional*, defaults to `"dapo"`): + Specifies the loss formulation to use. Supported values are: + + - `"grpo"`: Aggregates token-level losses by normalizing over sequence length. Not recommended due to + length bias—this approach tends to prefer shorter completions with positive advantages and longer ones + with negative advantages. + - `"dr_grpo"`: Aggregates token-level losses by normalizing with a global constant. This method was + introduced in the [Dr. GRPO paper](https://huggingface.co/papers/2503.20783) to eliminate length bias. + The value of the constant corresponds to `max_completion_length`. + - `"dapo"` (default): Aggregates token-level losses by normalizing with the number of active token in the + global accumulated batch. This method was introduced in the [DAPO + paper](https://huggingface.co/papers/2503.14476) to eliminate length bias. + - `"bnpo"`: Aggregates token-level losses by normalizing with the number of active token in the local + batch. Note that normalization is performed over the local batch only, so results may slightly vary + depending on the local batch size, despite a constant effective batch size. When using + `per_device_train_batch_size==1`, the loss is equivalent to the GRPO loss. + mask_truncated_completions (`bool`, *optional*, defaults to `False`): + When enabled, truncated completions are excluded from the loss calculation, preventing them from being + incorrectly penalized and introducing noise during training. According to the + [DAPO](https://huggingface.co/papers/2503.14476) paper, this is a good practice for training stability. + sync_ref_model (`bool`, *optional*, defaults to `False`): + Whether to synchronize the reference model with the active model every `ref_model_sync_steps` steps, using + the `ref_model_mixup_alpha` parameter. This synchronization originates from the + [TR-DPO](https://huggingface.co/papers/2404.09656) paper. + ref_model_mixup_alpha (`float`, *optional*, defaults to `0.6`): + α parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which controls the mix + between the current policy and the previous reference policy during updates. The reference policy is + updated according to the equation: `π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you + must set `sync_ref_model=True`. + ref_model_sync_steps (`int`, *optional*, defaults to `512`): + τ parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which determines how + frequently the current policy is synchronized with the reference policy. To use this parameter, you must + set `sync_ref_model=True`. + top_entropy_quantile (`float`, *optional*, defaults to `1.0`): + ρ parameter from [Beyond the 80/20 Rule](https://huggingface.co/papers/2506.01939). Keeps in the policy + loss term only the top-ρ quantile of tokens by entropy of the probability distribution at each sequence + position, improving results. Range: `[0.0-1.0]`. A value of `0.0` masks all but the highest entropy token; + `1.0` keeps all tokens. The paper recommends a value of `0.2`. If used with + `mask_truncated_completions=True`, only tokens from non-truncated completions are considered. + use_liger_loss (`bool`, *optional*, defaults to `False`): + Whether to use the Liger GRPO loss. + vllm_importance_sampling_correction (`bool`, *optional*, defaults to `True`): + Whether to apply Truncated Importance Sampling (TIS) between vLLM completion logprobs and recomputed + logprobs. [Your Efficient RL Framework Secretly Brings You Off-Policy RL + Training](https://fengyao.notion.site/off-policy-rl) highlights that using a separate generation framework + (such as vLLM) can introduce off-policy effects due to subtle implementation differences between generation + and training backends. TIS is proposed as a remedy for this issue. + vllm_importance_sampling_cap (`float`, *optional*, defaults to `2.0`): + Truncation parameter C for Truncated Importance Sampling (TIS). This sets an upper bound on the importance + sampling ratio, improving training stability. + + > Parameters that control the logging + + log_completions (`bool`, *optional*, defaults to `False`): + Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed, + it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`. + num_completions_to_print (`int` or `None`, *optional*, defaults to `None`): + Number of completions to print with `rich`. If `None`, all completions are logged. + wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`): + Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts + are logged. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = False, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + model_init_kwargs = None, + disable_dropout = False, + max_prompt_length = 512, + num_generations = 8, + max_completion_length = 256, + ds3_gather_for_generation = True, + shuffle_dataset = True, + generation_batch_size = None, + steps_per_generation = None, + temperature = 1.0, + top_p = 1.0, + top_k = None, + min_p = None, + generation_kwargs = {}, + repetition_penalty = 1.0, + use_transformers_paged = False, + cache_implementation = None, + use_vllm = False, + vllm_mode = 'colocate', + vllm_model_impl = 'vllm', + vllm_enable_sleep_mode = False, + vllm_guided_decoding_regex = None, + vllm_server_base_url = None, + vllm_server_host = '0.0.0.0', + vllm_server_port = 8000, + vllm_server_timeout = 240.0, + vllm_gpu_memory_utilization = 0.3, + vllm_tensor_parallel_size = 1, + beta = 0.001, + num_iterations = 1, + epsilon = 0.2, + delta = None, + epsilon_high = None, + importance_sampling_level = 'token', + reward_weights = None, + scale_rewards = 'group', + loss_type = 'bnpo', + mask_truncated_completions = False, + sync_ref_model = False, + ref_model_mixup_alpha = 0.6, + ref_model_sync_steps = 512, + top_entropy_quantile = 1.0, + use_liger_loss = False, + vllm_importance_sampling_correction = False, + vllm_importance_sampling_cap = 2.0, + log_completions = False, + num_completions_to_print = None, + wandb_log_unique_prompts = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if loss_type.lower() == 'dr_grpo': + loss_type = 'dr_grpo' + elif loss_type.lower() == 'dapo': + loss_type = 'dapo' + if loss_type.lower() == 'dr_grpo': + if scale_rewards == None: + scale_rewards = True + elif scale_rewards == True: + print('Unsloth: The Dr GRPO paper recommends setting `scale_rewards` to False! Will override. Set it to `None` to force False.') + scale_rewards = False + elif loss_type.lower() == 'dapo': + if mask_truncated_completions != True: + print('Unsloth: The DAPO paper recommends `mask_truncated_completions = True` - we will set it.') + if epsilon_high != 0.28: + print('Unsloth: The DAPO paper recommends `epsilon_high = 0.28` - we will set it.') + if beta != 0.0: + print('Unsloth: The DAPO paper recommends setting `beta = 0.0` to remove the KL term - we will set it.') + mask_truncated_completions = True + epsilon_high = 0.28 + beta = 0.0 + + if (per_device_train_batch_size // num_generations) * num_generations != per_device_train_batch_size: + print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\nWe will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations)) + per_device_train_batch_size = num_generations + + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + model_init_kwargs = model_init_kwargs, + disable_dropout = disable_dropout, + max_prompt_length = max_prompt_length, + num_generations = num_generations, + max_completion_length = max_completion_length, + ds3_gather_for_generation = ds3_gather_for_generation, + shuffle_dataset = shuffle_dataset, + generation_batch_size = generation_batch_size, + steps_per_generation = steps_per_generation, + temperature = temperature, + top_p = top_p, + top_k = top_k, + min_p = min_p, + generation_kwargs = generation_kwargs, + repetition_penalty = repetition_penalty, + use_transformers_paged = use_transformers_paged, + cache_implementation = cache_implementation, + use_vllm = use_vllm, + vllm_mode = vllm_mode, + vllm_model_impl = vllm_model_impl, + vllm_enable_sleep_mode = vllm_enable_sleep_mode, + vllm_guided_decoding_regex = vllm_guided_decoding_regex, + vllm_server_base_url = vllm_server_base_url, + vllm_server_host = vllm_server_host, + vllm_server_port = vllm_server_port, + vllm_server_timeout = vllm_server_timeout, + vllm_gpu_memory_utilization = vllm_gpu_memory_utilization, + vllm_tensor_parallel_size = vllm_tensor_parallel_size, + beta = beta, + num_iterations = num_iterations, + epsilon = epsilon, + delta = delta, + epsilon_high = epsilon_high, + importance_sampling_level = importance_sampling_level, + reward_weights = reward_weights, + scale_rewards = scale_rewards, + loss_type = loss_type, + mask_truncated_completions = mask_truncated_completions, + sync_ref_model = sync_ref_model, + ref_model_mixup_alpha = ref_model_mixup_alpha, + ref_model_sync_steps = ref_model_sync_steps, + top_entropy_quantile = top_entropy_quantile, + use_liger_loss = use_liger_loss, + vllm_importance_sampling_correction = vllm_importance_sampling_correction, + vllm_importance_sampling_cap = vllm_importance_sampling_cap, + log_completions = log_completions, + num_completions_to_print = num_completions_to_print, + wandb_log_unique_prompts = wandb_log_unique_prompts,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + +pass + +class _UnslothGRPOTrainer(Trainer): + """ + Trainer for the Group Relative Policy Optimization (GRPO) method. This algorithm was initially proposed in the + paper [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language + Models](https://huggingface.co/papers/2402.03300). + + Example: + + ```python + from datasets import load_dataset + from trl import GRPOTrainer + + dataset = load_dataset("trl-lib/tldr", split="train") + def reward_func(completions, **kwargs): + # Dummy reward function that rewards completions with more unique letters. + return [float(len(set(completion))) for completion in completions] + trainer = GRPOTrainer( + model="Qwen/Qwen2-0.5B-Instruct", + reward_funcs=reward_func, + train_dataset=dataset, + ) + + trainer.train() + ``` + + Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + reward_funcs (`Union[RewardFunc, list[RewardFunc]]`): + Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward + functions with the prompts and completions and sum the rewards. Can be either: + + - A single reward function, such as: + - A string: The *model ID* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForSequenceClassification.from_pretrained`] with `num_labels=1` and the + keyword arguments in `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object: Only sequence classification models are supported. + - A custom reward function: The function is provided with the prompts and the generated completions, + plus any additional columns in the dataset. It should return a list of rewards. Custom reward + functions can also return `None` when the reward is not applicable to those samples. This is useful + for multi-task training where different reward functions apply to different types of samples. When a + reward function returns `None` for a sample, that reward function is excluded from the reward + calculation for that sample. For more details, see [Using a custom reward + function](#using-a-custom-reward-function). + + The trainer's state is also passed to the reward function. The trainer's state is an instance of + [`~transformers.TrainerState`] and can be accessed by accessing the `trainer_state` argument to the + reward function's signature. + - A list of reward functions, where each item can independently be any of the above types. Mixing different + types within the list (e.g., a string model ID and a custom reward function) is allowed. + args ([`GRPOConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is + ignored. The format of the samples can be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`): + Processing class used to process the data. The padding side must be set to "left". If `None`, the + processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A + padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token, + `tokenizer.eos_token` will be used as the default. + reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`): + Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either: + + - A single processing class: Used when `reward_funcs` contains only one reward function. + - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`. + If set to `None`, or if an element of the list corresponding to a [`~transformers.PreTrainedModel`] is + `None`, the tokenizer for the model is automatically loaded using + [`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward + functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes` + are ignored. + callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your + model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + """ + + _tag_names = ["trl", "grpo"] + + def __init__( + self, + model: Union[str, PreTrainedModel], + reward_funcs: Union[RewardFunc, list[RewardFunc]], + args: Optional[GRPOConfig] = None, + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None, + processing_class: Optional[Union[PreTrainedTokenizerBase, ProcessorMixin]] = None, + reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), + peft_config: Optional["PeftConfig"] = None, + ): + + if hasattr(model, 'vllm_engine') and hasattr(args, 'use_vllm'): + if (getattr(args, 'use_vllm', False) == False): + args.use_vllm = True + args.vllm_mode='colocate' + # Args + if args is None: + model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model_name.split("/")[-1] + args = GRPOConfig(f"{model_name}-GRPO") + + # Models + # Trained model + model_init_kwargs = args.model_init_kwargs or {} + if isinstance(model, str): + model_id = model + dtype = model_init_kwargs.get("dtype") + if isinstance(dtype, torch.dtype) or dtype == "auto" or dtype is None: + pass # dtype is already a torch.dtype or "auto" or None + elif isinstance(dtype, str): # it's a str, but not "auto" + dtype = getattr(torch, dtype) + model_init_kwargs["dtype"] = dtype + else: + raise ValueError( + "Invalid `dtype` passed to `GRPOConfig`. Expected either 'auto' or a string representing " + f"a `torch.dtype` (e.g., 'float32'), but got {dtype}." + ) + # Disable caching if gradient checkpointing is enabled [not supported] + config = AutoConfig.from_pretrained(model_id) + architecture = getattr(transformers, config.architectures[0]) + model = architecture.from_pretrained(model_id, **model_init_kwargs) + else: + model_id = model.config._name_or_path + if args.model_init_kwargs is not None: + logger.warning( + "You passed `model_init_kwargs` to the `GRPOConfig`, but your model is already instantiated. " + "The `model_init_kwargs` will be ignored." + ) + + # Some models [SmolVLM/Idefics3] don't support `logits_to_keep` argument and error out if we pass it + # Inspect the forward method before we wrap the model with PEFT + self.model_kwarg_keys = ( + inspect.signature(model.forward).parameters.keys() + if not hasattr(model, "get_base_model") + else inspect.signature(model.get_base_model().forward).parameters.keys() + ) + + if False: + model = prepare_peft_model(model, peft_config, args) + + # Processing class + if processing_class is None: + processing_class = AutoProcessor.from_pretrained(model.config._name_or_path) + + # Handle pad token for processors or tokenizers + if isinstance(processing_class, ProcessorMixin): + tokenizer = processing_class.tokenizer + elif isinstance(processing_class, PreTrainedTokenizerBase): + tokenizer = processing_class + else: + raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + self.pad_token = tokenizer.pad_token + self.pad_token_id = tokenizer.pad_token_id + self.eos_token_id = tokenizer.eos_token_id + self.image_token = getattr(processing_class, "image_token", None) + self.image_token_id = getattr(processing_class, "image_token_id", None) + self.vision_start_token_id = getattr(model.config, "vision_start_token_id", None) + self.vision_end_token_id = getattr(model.config, "vision_end_token_id", None) + + # Reward functions + if not isinstance(reward_funcs, list): + reward_funcs = [reward_funcs] + self.reward_func_names = [] + for i, reward_func in enumerate(reward_funcs): + if isinstance(reward_func, str): + reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained( + reward_func, num_labels=1, **model_init_kwargs + ) + if isinstance(reward_funcs[i], nn.Module): # Use Module over PretrainedModel for compat w/ compiled models + self.reward_func_names.append(reward_funcs[i].config._name_or_path.split("/")[-1]) + else: + self.reward_func_names.append(reward_funcs[i].__name__) + self.reward_funcs = reward_funcs + + # Reward weights + if args.reward_weights is not None: + if len(args.reward_weights) != len(reward_funcs): + raise ValueError( + f"Number of reward weights ({len(args.reward_weights)}) must match number of reward " + f"functions ({len(reward_funcs)})" + ) + self.reward_weights = torch.tensor(args.reward_weights, dtype=torch.float32) + else: + self.reward_weights = torch.ones(len(reward_funcs), dtype=torch.float32) + + # Reward processing class + if reward_processing_classes is None: + reward_processing_classes = [None] * len(reward_funcs) + elif not isinstance(reward_processing_classes, list): + reward_processing_classes = [reward_processing_classes] + if len(reward_processing_classes) != len(reward_funcs): + raise ValueError( + f"The number of reward processing classes ({len(reward_processing_classes)}) must match the number of " + f"reward functions ({len(reward_funcs)})." + ) + + for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)): + if isinstance(reward_func, PreTrainedModel): + if reward_processing_class is None: + reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path) + if reward_processing_class.pad_token_id is None: + reward_processing_class.pad_token = reward_processing_class.eos_token + # The reward model computes the reward for the latest non-padded token in the input sequence. + # So it's important to set the pad token ID to the padding token ID of the processing class. + reward_func.config.pad_token_id = reward_processing_class.pad_token_id + reward_processing_classes[i] = reward_processing_class + + self.reward_processing_classes = reward_processing_classes + + # Training arguments + self.max_prompt_length = args.max_prompt_length + self.max_completion_length = args.max_completion_length # = |o_i| in the GRPO paper + self.num_generations = args.num_generations # = G in the GRPO paper + self.temperature = args.temperature + self.top_p = args.top_p + self.top_k = args.top_k + self.min_p = args.min_p + self.repetition_penalty = args.repetition_penalty + self.use_transformers_paged = args.use_transformers_paged + self.use_vllm = args.use_vllm + self.vllm_mode = args.vllm_mode + self.vllm_gpu_memory_utilization = args.vllm_gpu_memory_utilization # only applies to colocation mode + self.vllm_tensor_parallel_size = args.vllm_tensor_parallel_size # only applies to colocation mode + self.vllm_importance_sampling_correction = args.vllm_importance_sampling_correction + self.vllm_importance_sampling_cap = args.vllm_importance_sampling_cap + self.use_liger_loss = args.use_liger_loss + self.loss_type = args.loss_type + self.scale_rewards = args.scale_rewards + self.importance_sampling_level = args.importance_sampling_level + self.mask_truncated_completions = args.mask_truncated_completions + self.top_entropy_quantile = args.top_entropy_quantile + if self.use_liger_loss and self.top_entropy_quantile < 1.0: + raise NotImplementedError( + "Liger Kernels don't currently support masking token positions based on entropy." + ) + if self.use_liger_loss and not self.importance_sampling_level == "token": + raise NotImplementedError( + "Liger Kernels currently only support token-level importance sampling. Please set" + "`importance_sampling_level` to 'token'." + ) + + # Datasets + self.shuffle_dataset = args.shuffle_dataset + + if ( + isinstance(train_dataset, IterableDataset) + or isinstance(eval_dataset, IterableDataset) + or ( + isinstance(eval_dataset, dict) and any(isinstance(ds, IterableDataset) for ds in eval_dataset.values()) + ) + ): + # See https://github.com/huggingface/trl/issues/3213 + raise NotImplementedError( + "Iterable datasets are not yet supported in GRPOTrainer. Please use a standard dataset instead." + ) + + # Multi-step + self.num_iterations = args.num_iterations # = 𝜇 in the GRPO paper + self.epsilon_low = args.epsilon + self.epsilon_high = args.epsilon_high if args.epsilon_high is not None else args.epsilon + # Tracks the number of iterations [forward + backward passes], including those within a grad accum cycle + self._step = 0 + # Buffer the batch to reuse generated outputs across multiple updates. For more details, see + # `_get_train_sampler` and `_prepare_inputs`. + self._buffered_inputs = None + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in GRPO, the sampled data does not include the + # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning: + # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To + # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True. + # This acts as a flag to indicate that the warning has already been issued. + model.warnings_issued["estimate_tokens"] = True + + super().__init__( + model=model, + args=args, + data_collator=identity, # No data collation is needed in GRPO + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + callbacks=callbacks, + optimizers=optimizers, + # In Trainer, `training_step` scales the loss by `gradient_accumulation_steps` only if `compute_loss_func` + # is None. For DAPO, loss scaling instead depends on the total number of completions tokens across the + # global accumulated batch. To control scaling ourselves, we must disable Trainer’s built-in scaling. The + # simplest [though a bit hacky] way is to set `compute_loss_func` to any non-None value, which bypasses + # that behavior without rewriting `training_step`. + compute_loss_func="non-None value to disable scaling", + ) + + # Reference model + self.beta = args.beta + if self.beta == 0.0: + # If beta is 0.0, the reference model is not needed + self.ref_model = None + elif is_peft_model(model): + # If PEFT is used, the reference model is not needed since the adapter can be disabled + # to revert to the initial model. + self.ref_model = None + else: + # For deepspeed, fsdp or non-distributed models, create a reference model from scratch + config = AutoConfig.from_pretrained(model_id) + architecture = getattr(transformers, config.architectures[0]) + self.ref_model = architecture.from_pretrained(model_id, **model_init_kwargs) + + # Disable dropout in the models + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) + + # Liger loss + if self.use_liger_loss: + if not is_liger_kernel_available(): + raise ImportError( + "Liger is required to use `liger_loss` as the GRPO loss. Run `pip install liger-kernel`." + ) + # redirect the model.module forward to the model forward to ensure pre-forward hooks are called + self._forward_redirection = _ForwardRedirection() + + self.liger_grpo_loss = LigerFusedLinearGRPOLoss( + beta=self.beta, + epsilon_low=self.epsilon_low, + epsilon_high=self.epsilon_high, + temperature=self.temperature, + use_ref_model=self.beta != 0.0, + loss_type=self.loss_type, + max_completion_length=self.max_completion_length, + ) + + # Initialize the metrics + self._metrics = {"train": defaultdict(list), "eval": defaultdict(list)} + self._total_train_tokens = 0 + self.log_completions = args.log_completions + self.wandb_log_unique_prompts = args.wandb_log_unique_prompts + self.num_completions_to_print = args.num_completions_to_print + # Keep logs sized to the generation batch to record only outputs from the latest model update. + self._logs = { + "image": deque(maxlen=args.generation_batch_size), + "prompt": deque(maxlen=args.generation_batch_size), + "completion": deque(maxlen=args.generation_batch_size), + "rewards": defaultdict(lambda: deque(maxlen=args.generation_batch_size)), + "advantages": deque(maxlen=args.generation_batch_size), + } + + # Ensure each process receives a unique seed to prevent duplicate completions when generating with + # transformers if num_generations exceeds per_device_train_batch_size. We could skip it if we use vLLM, but + # it's safer to set it in all cases. + set_seed(args.seed, device_specific=True) + + if self.use_vllm: + if not is_vllm_available(): + raise ImportError( + "vLLM is not available and `use_vllm` is set to True. Please install vLLM with " + "`pip install vllm` to use it." + ) + + if self.vllm_mode == "server": + if self.accelerator.is_main_process: + if args.vllm_server_base_url is not None: + base_url = args.vllm_server_base_url + else: + base_url = f"http://{args.vllm_server_host}:{args.vllm_server_port}" + self.vllm_client = VLLMClient(base_url=base_url, connection_timeout=args.vllm_server_timeout) + self.vllm_client.init_communicator(device=torch.cuda.current_device()) + + elif self.vllm_mode == "colocate": + if not self.accelerator.num_processes % self.vllm_tensor_parallel_size == 0: + raise ValueError( + f"vllm_tensor_parallel_size ({self.vllm_tensor_parallel_size}) must divide world size " + f"({self.accelerator.num_processes}) evenly." + ) + + if self.vllm_tensor_parallel_size > 1: + self.tp_group, _ = torch.distributed.new_subgroups_by_enumeration( + [ + list(range(i * self.vllm_tensor_parallel_size, (i + 1) * self.vllm_tensor_parallel_size)) + for i in range(self.accelerator.num_processes // self.vllm_tensor_parallel_size) + ] + ) + os.environ["RANK"] = str(self.accelerator.process_index) + os.environ["LOCAL_RANK"] = str(self.accelerator.local_process_index) + os.environ["WORLD_SIZE"] = str(self.accelerator.num_processes) + os.environ["MASTER_ADDR"] = os.environ.get("MASTER_ADDR", "localhost") + os.environ["MASTER_PORT"] = os.environ.get("MASTER_PORT", "12345") + + if self.max_prompt_length is not None and self.max_completion_length is not None: + max_model_len = self.max_prompt_length + self.max_completion_length + else: + max_model_len = None + self.llm = model.vllm_engine + if self.args.vllm_enable_sleep_mode: + self.llm.sleep(level=1) + else: + raise ValueError(f"vllm_mode must be either 'server' or 'colocate', got '{self.vllm_mode}'.") + self.guided_decoding_regex = args.vllm_guided_decoding_regex + + self._last_loaded_step = -1 + self.accelerator.wait_for_everyone() + else: + generation_kwargs = { + "max_new_tokens": self.max_completion_length, + "do_sample": True, + "pad_token_id": tokenizer.pad_token_id, + "bos_token_id": tokenizer.bos_token_id, + "eos_token_id": tokenizer.eos_token_id, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": self.top_k, + "min_p": self.min_p, + "repetition_penalty": self.repetition_penalty, + "cache_implementation": args.cache_implementation, + } + if args.use_transformers_paged: + generation_kwargs["max_batch_tokens"] = 512 + generation_kwargs["num_blocks"] = 1024 + generation_kwargs["block_size"] = 128 + if args.generation_kwargs is not None: + generation_kwargs.update(args.generation_kwargs) + self.generation_config = GenerationConfig(**generation_kwargs) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags to the model + self.model.add_model_tags(self._tag_names) + + if self.ref_model is not None: + if self.is_deepspeed_enabled: + self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator) + elif self.is_fsdp_enabled: + self.ref_model = prepare_fsdp(self.ref_model, self.accelerator) + else: + self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + + if args.sync_ref_model: + self.add_callback(SyncRefModelCallback(ref_model=self.ref_model, accelerator=self.accelerator)) + + for i, reward_func in enumerate(self.reward_funcs): + if isinstance(reward_func, PreTrainedModel): + if self.is_deepspeed_enabled: + self.reward_funcs[i] = prepare_deepspeed(reward_func, self.accelerator) + else: + # set device placement to True to make `prepare_model` move `reward_func` to device when using fsdp + self.reward_funcs[i] = self.accelerator.prepare_model( + reward_func, evaluation_mode=True, device_placement=True + ) + + def _set_signature_columns_if_needed(self): + # If `self.args.remove_unused_columns` is True, non-signature columns are removed. + # By default, this method sets `self._signature_columns` to the model's expected inputs. + # In GRPOTrainer, we preprocess data, so using the model's signature columns doesn't work. + # Instead, we set them to the columns expected by the `training_step` method, hence the override. + if self._signature_columns is None: + self._signature_columns = ["prompt", "image"] + + # This method overrides `Trainer.get_train_dataloader` to support our custom batching strategy. + # Instead of returning a standard per-step batch (i.e., `per_device_batch_size), our dataloader loads an + # *generation* batch (i.e., `per_device_batch_size × steps_per_generation`). This allows us to generate completions + # once every steps_per_generation step—rather than once per accumulation step—which is significantly more + # efficient. The only change from the original implementation is multiplying the batch size by + # `steps_per_generation`. Thus, `_prepare_inputs` is called with this *generation* batch, and it handles the + # splitting internally. + # Maintenance note: This method is a copy-paste of the original `Trainer.get_train_dataloader` with only one line + # modification. As a result, some parts of the method aren't relevant to GRPO, but we keep them to stay one line + # apart from the super method, ensuring easier maintenance in the future. + def get_train_dataloader(self): + if self.train_dataset is None: + raise ValueError("Trainer: training requires a train_dataset.") + + train_dataset = self.train_dataset + data_collator = self.data_collator + if is_datasets_available() and isinstance(train_dataset, datasets.Dataset): + train_dataset = self._remove_unused_columns(train_dataset, description="training") + else: + data_collator = self._get_collator_with_removed_columns(data_collator, description="training") + + dataloader_params = { + "batch_size": self._train_batch_size * self.args.steps_per_generation, # < this is the change + "collate_fn": data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "persistent_workers": self.args.dataloader_persistent_workers, + } + + if not isinstance(train_dataset, torch.utils.data.IterableDataset): + dataloader_params["sampler"] = self._get_train_sampler() + dataloader_params["drop_last"] = self.args.dataloader_drop_last + dataloader_params["worker_init_fn"] = partial( + seed_worker, num_workers=self.args.dataloader_num_workers, rank=self.args.process_index + ) + + dataloader_params["prefetch_factor"] = self.args.dataloader_prefetch_factor + + return self.accelerator.prepare(DataLoader(train_dataset, **dataloader_params)) + + def _get_train_sampler(self, dataset: Optional[Dataset] = None) -> Sampler: + # Returns a sampler that + # 1. ensures each prompt is repeated across multiple processes. This guarantees that identical prompts are + # distributed to different GPUs, allowing rewards to be computed and normalized correctly within each prompt + # group. Using the same seed across processes ensures consistent prompt assignment, preventing discrepancies + # in group formation. + # 2. repeats the batch multiple times to allow reusing generations across multiple updates. Refer to + # _prepare_inputs to see how the generations are stored and reused. + + # In the following figure, the values are the prompt indices. The first row shows the first sampled batch, the + # second row shows the second sampled batch, and so on. + # + # | GPU 0 | GPU 1 | + # + # global_step step <-───> num_generations=2 + # <-───────> per_device_train_batch_size=3 + # grad_accum ▲ ▲ 0 0 0 0 1 1 2 2 <- Generate for the first `steps_per_generation` (prompts 0 to 11); store the completions; use the first slice to compute the loss + # =2 ▼ | 0 1 3 3 4 4 5 5 <- Take the stored generations and use the second slice to compute the loss + # | + # | 1 2 6 6 7 7 8 8 <- Take the stored generations and use the third slice to compute the loss + # steps_per_gen=4 ▼ 1 3 9 9 10 10 11 11 <- Take the stored generations and use the fourth slice to compute the loss + # + # 2 4 12 12 13 13 14 14 <- Generate for the second `steps_per_generation` (prompts 12 to 23); store the completions; use the first slice to compute the loss + # 2 5 15 15 16 16 17 17 <- Take the stored generations and use the second slice to compute the loss + # ... + if dataset is None: + dataset = self.train_dataset + return RepeatSampler( + data_source=dataset, + mini_repeat_count=self.num_generations, + batch_size=self.args.generation_batch_size // self.num_generations, + repeat_count=self.num_iterations * self.args.steps_per_generation, + shuffle=self.shuffle_dataset, + seed=self.args.seed, + ) + + def _get_eval_sampler(self, eval_dataset) -> Sampler: + # See _get_train_sampler for an explanation of the sampler. + return RepeatSampler( + data_source=eval_dataset, + mini_repeat_count=self.num_generations, + seed=self.args.seed, + ) + + @profiling_decorator + def _get_last_hidden_state( + self, + unwrapped_model, + input_ids, + attention_mask, + logits_to_keep, + pixel_values=None, + image_grid_thw=None, + pixel_attention_mask=None, + image_sizes=None, + ): + if is_peft_model(unwrapped_model): + unwrapped_model = unwrapped_model.base_model.model + + # Build model inputs - check if the model supports logits_to_keep (some models and VLMs don't) + model_inputs = {"input_ids": input_ids, "attention_mask": attention_mask} + + # For Qwen models: + if image_grid_thw is not None and pixel_values is not None: + model_inputs["image_grid_thw"] = image_grid_thw + # For Gemma, SmolVLM2, LLaVa-Next etc.: + if pixel_values is not None: + model_inputs["pixel_values"] = pixel_values + # For SmolVLM2 + if pixel_attention_mask is not None: + model_inputs["pixel_attention_mask"] = pixel_attention_mask + # For LLaVa-Next + if image_sizes is not None: + model_inputs["image_sizes"] = image_sizes + + # Only add logits_to_keep if the model supports it + if "logits_to_keep" in self.model_kwarg_keys: + # We add 1 to `logits_to_keep` because the last logits of the sequence is later excluded + model_inputs["logits_to_keep"] = logits_to_keep + 1 + + model_inputs["use_cache"] = False # only used in generation; set False to suppress warnings + + last_hidden_state = unwrapped_model.model(**model_inputs).last_hidden_state + # Exclude the last value: it corresponds to the next token pred + last_hidden_state = last_hidden_state[:, :-1, :] # (B, L-1, H) + # Only keep the last logits_to_keep. For model that support logits_to_keep, this is a no-op. + last_hidden_state = last_hidden_state[:, -logits_to_keep:, :] # (B, logits_to_keep, H) + return last_hidden_state + + def get_high_entropy_mask(self, entropies: torch.Tensor, mask: torch.Tensor, threshold: float) -> torch.Tensor: + """ + Returns a binary mask identifying tokens whose entropy exceeds a given quantile threshold. + + Args: + entropies (`torch.Tensor`): + Tensor of shape (batch_size, seq_len) with per-token entropy values. + mask (`torch.Tensor`): + Binary mask of the same shape as `entropies`, where `1` indicates valid tokens and `0` padding. + threshold (`float`): + Quantile threshold between `0.0` and `1.0` to select high-entropy tokens. + + Returns: + `torch.Tensor`: + Boolean mask of shape (batch_size, seq_len), where `True` indicates tokens with entropy >= threshold + and `False` otherwise. + """ + non_pad_entropies = entropies[mask.bool()].float() + if non_pad_entropies.numel() == 0: + return torch.zeros_like(entropies, dtype=torch.bool) + + # The shape of non_pad_entropies can be different on each gpu/device. + # this can cause the gather operation to hang. So we first gather the lengths + # of non_pad_entropies and pad them to the max length before doing a gather. + non_pad_entropies_seq_length = torch.tensor([non_pad_entropies.numel()], device=entropies.device) + max_non_pad_entropies_seq_length = self.accelerator.gather(non_pad_entropies_seq_length).max().item() + padding = torch.zeros( + max_non_pad_entropies_seq_length - non_pad_entropies.numel(), device=non_pad_entropies.device + ) + padded_entropies = torch.cat([non_pad_entropies, padding]) + padded_entropies_mask = torch.cat([torch.ones_like(non_pad_entropies), padding]) + all_padded_entropies = self.accelerator.gather(padded_entropies) + all_padded_entropies_mask = self.accelerator.gather(padded_entropies_mask) + # Filter out entropies corresponding to padding. + all_non_padded_entropies = all_padded_entropies[all_padded_entropies_mask.bool()] + entropy_threshold = torch.quantile(all_non_padded_entropies, threshold) + masked_entropies = entropies * mask.float() + entropy_mask = masked_entropies >= entropy_threshold + return entropy_mask & mask.bool() # ensure padding tokens are always masked out + + def _get_per_token_logps_and_entropies(self, model, input_ids, attention_mask, logits_to_keep, batch_size = None, + compute_entropy = False, compute_efficient = False, *args, **kwargs): + # if True: # os.environ.get('UNSLOTH_USE_NEW_MODEL', '0') == '0': + # return None, None # logps, entropies Unsloth efficient GRPO + if compute_efficient: + return None, None + else: + # Otherwise, calculate normally: + if not hasattr(self, '_autocast_dtype'): + self._autocast_dtype = torch.float16 if os.environ.get('ACCELERATE_MIXED_PRECISION', 'fp16') == 'fp16' else torch.bfloat16 + if os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1': self._autocast_dtype = torch.float16 + + pixel_values, image_grid_thw = kwargs.get("pixel_values", None), kwargs.get("image_grid_thw", None) + pixel_attention_mask, image_sizes = kwargs.get('pixel_attention_mask',None), kwargs.get('image_sizes',None) + + os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "1" + + unwrapped_model = self.accelerator.unwrap_model(model, keep_fp32_wrapper=False) + + with torch.amp.autocast(device_type = 'cuda', dtype = self._autocast_dtype): + with torch.inference_mode(): + if pixel_values is None: + attention_mask = input_ids != self.processing_class.pad_token_id + attention_mask = attention_mask.to(attention_mask.dtype) + # We add 1 to `logits_to_keep` because the last logits of the sequence is later excluded + logits = unwrapped_model( + input_ids = input_ids, + attention_mask = attention_mask, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + pixel_attention_mask = pixel_attention_mask, + image_sizes = image_sizes, + #logits_to_keep = logits_to_keep + 1, + ).logits + else: + logits = unwrapped_model( + input_ids = input_ids, + attention_mask = attention_mask, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + pixel_attention_mask = pixel_attention_mask, + image_sizes = image_sizes, + logits_to_keep = logits_to_keep + 1, + ).logits + entropies = None + if compute_entropy: + from trl.trainer.utils import entropy_from_logits + entropies = entropy_from_logits(logits) + os.environ["UNSLOTH_RETURN_HIDDEN_STATES"] = "0" + # logits = logits[:, :-1, :] # (B, L-1, V), exclude the last logit: it corresponds to the next token pred + return logits, entropies # logps, entropies + # input_ids = input_ids[:, -logits_to_keep:] + # For transformers<=4.48, logits_to_keep argument isn't supported, so here we drop logits ourselves. + # See https://github.com/huggingface/trl/issues/2770 + # logits = logits[:, -logits_to_keep:] + # return logits + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + # logits = logits / self.temperature + # logps = selective_log_softmax(logits, input_ids) + + # row_indices, col_indices = torch.where(logps < -20) + + # # Method 1: Check if tensors have elements + # if len(row_indices) > 0 and len(col_indices) > 0: + # breakpoint() # Breakpoint triggered here + # print("Found high values!") + # return logps # compute logprobs for the input tokens + pass + + def _fix_param_name_to_vllm(self, name, extra_prefixes: Optional[list[str]] = None): + extra_prefixes = extra_prefixes or [] + prefixes = ["_checkpoint_wrapped_module."] + extra_prefixes + for prefix in prefixes: + name = name.replace(prefix, "") + return name + + def _sync_fsdp1_params_to_vllm(self, module: nn.Module, prefix: str = "", visited=None): + """Memory-efficient post-order traversal of FSDP modules to extract full parameters and sync with vLLM.""" + # For FSDP1, we need to recurse into children and also use summon_full_params + if visited is None: + visited = set() + for child_name, child_module in module.named_children(): + child_prefix = f"{prefix}.{child_name}" if prefix else child_name + self._sync_fsdp1_params_to_vllm( + child_module, prefix=child_prefix, visited=visited + ) # recurse into the child + + if isinstance(module, FSDP): + with FSDP.summon_full_params(module, recurse=False, writeback=False): + for param_name, param in module.named_parameters(): + full_name = f"{prefix}.{param_name}" if prefix else param_name + full_name = self._fix_param_name_to_vllm(full_name, extra_prefixes=["_fsdp_wrapped_module."]) + + if full_name in visited: + continue # skip FSDP subtrees already traversed + visited.add(full_name) + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(full_name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + + def _sync_fsdp2_params_to_vllm(self, module: nn.Module): + # For FSDP2, module already covers all parameters, so no need for recursion + for name, param in module.items(): + if param.is_cpu: + param = param.to(torch.device("cuda")) + param = param.full_tensor() + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param) + elif self.vllm_mode == "colocate": + + pass + + pass + + def _move_model_to_vllm(self, *args, **kwargs): return None + + @profiling_decorator + def _prepare_inputs( + self, generation_batch: dict[str, Union[torch.Tensor, Any]] + ) -> dict[str, Union[torch.Tensor, Any]]: + # Prepares inputs for model training/evaluation by managing completion generation and batch handling. + # During training: + # - Receives the local generation batch (Per-GPU batch size × steps per generation) + # from the modified training dataloader instead of the standard local batch + # - Generates completions once for the entire generation batch and splits it into batches of size + # `per_device_train_batch_size` + # - Buffers these completions and returns the appropriate slice for the current accumulation step + # - Optimizes by regenerating completions only periodically (every steps_per_generation * num_iterations) + # During evaluation: + # - The input is treated as a standard local batch (no accumulation, no multiple iterations) + # - Completions are generated for each batch without buffering or reuse + # Returns a single local batch in both cases. + + mode = "train" if self.model.training else "eval" + if mode == "train": + generate_every = self.args.steps_per_generation * self.num_iterations + if self._step % generate_every == 0 or self._buffered_inputs is None: + # self._buffered_inputs=None can occur when resuming from a checkpoint + generation_batch = self._generate_and_score_completions(generation_batch) + generation_batch = split_pixel_values_by_grid(generation_batch) + + try: generation_batch = shuffle_sequence_dict(generation_batch) + + except: pass + generation_batches = split_tensor_dict(generation_batch, self.args.steps_per_generation) + self._buffered_inputs = [unsplit_pixel_values_by_grid(batch) for batch in generation_batches] + inputs = self._buffered_inputs[self._step % self.args.steps_per_generation] + self._step += 1 + else: + # In evaluation, there is neither batch grouping for generation, nor multiple iterations, hence + # local generation batch == local eval batch + inputs = self._generate_and_score_completions(generation_batch) + return inputs + + @profiling_decorator + def _calculate_rewards(self, inputs, prompts, completions, completion_ids_list): + device = self.accelerator.device + rewards_per_func = torch.zeros(len(prompts), len(self.reward_funcs), device=device) + + # Repeat all input columns (but "prompt", "completion", and "completion_ids") to match the num of generations + keys = [key for key in inputs[0] if key not in ["prompt", "completion", "completion_ids"]] + reward_kwargs = {key: [example[key] for example in inputs] for key in keys} + + # This allows for dynamic reward shaping based on training progress. + reward_kwargs["trainer_state"] = self.state + + for i, (reward_func, reward_processing_class, reward_func_name) in enumerate( + zip(self.reward_funcs, self.reward_processing_classes, self.reward_func_names) + ): + with profiling_context(self, reward_func_name): + if isinstance(reward_func, nn.Module): # Module (no PretrainedModel) for compat with compiled models + if is_conversational(inputs[0]): + messages = [{"messages": p + c} for p, c in zip(prompts, completions)] + texts = [apply_chat_template(x, reward_processing_class)["text"] for x in messages] + else: + texts = [p + c for p, c in zip(prompts, completions)] + reward_inputs = reward_processing_class( + text=texts, return_tensors="pt", padding=True, padding_side="right", add_special_tokens=False + ) + reward_inputs = super()._prepare_inputs(reward_inputs) + with torch.inference_mode(): + rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0] # Shape (B*G,) + else: + output_reward_func = reward_func( + prompts=prompts, completions=completions, completion_ids=completion_ids_list, **reward_kwargs + ) + # Convert None values to NaN + output_reward_func = [reward if reward is not None else torch.nan for reward in output_reward_func] + + rewards_per_func[:, i] = torch.tensor(output_reward_func, dtype=torch.float32, device=device) + + # If all reward functions return None for a given row, issue a detailed warning + if torch.isnan(rewards_per_func).all(dim=1).any(): + nan_row_idx = torch.isnan(rewards_per_func).all(dim=1).nonzero(as_tuple=True)[0][0] + row_reward_kwargs = { + key: value[nan_row_idx] for key, value in reward_kwargs.items() if key != "trainer_state" + } + row_reward_kwargs["prompt"] = prompts[nan_row_idx] + row_reward_kwargs["completion"] = completions[nan_row_idx] + logger.warning( + f"All reward functions returned None for the following kwargs:\n{row_reward_kwargs}\n" + "Please ensure that at least one reward function returns a valid reward." + ) + + # Gather the reward per function: this part is crucial, because the rewards are normalized per group and the + # completions may be distributed across processes + rewards_per_func = gather(rewards_per_func) + return rewards_per_func + + def _generate_and_score_completions( + self, inputs: list[dict[str, Union[torch.Tensor, Any]]] + ) -> dict[str, Union[torch.Tensor, Any]]: + device = self.accelerator.device + mode = "train" if self.model.training else "eval" + + prompts = [x["prompt"] for x in inputs] + + # We don't yet support visual reward models/function, so we keep a copy of the original text-only prompts for + # later use in the reward computation. If images are present, we insert {"type": "image"} as required by the + # VLM chat template. + original_prompts = copy.deepcopy(prompts) + + # If the prompts are conversational and the inputs contain images, we need to convert the prompts from + # [{"role": "user", "content": "What color is the sky?"}] to + # [{"role": "user", "content": [{"type": "image"}, {"type": "text", "text": "What color is the sky?"}]}] + kwargs = {} + has_images = "image" in inputs[0] + if has_images: + images = [example.get("image") for example in inputs] + kwargs = {"images": [[img] for img in images]} + for prompt in prompts: + if isinstance(prompt, list): # i.e., when using conversational data + prepare_multimodal_messages(prompt, num_images=1) + + + _chat_template_ = getattr(self.processing_class, "chat_template", None) + if _chat_template_ is None: _chat_template_ = "" + _supported_keys_ = set(("prompt", "chosen", "rejected", "completion", "messages", "label")) + + prompts_text = [] + for _example_ in inputs: + _tokenizer_kwargs_ = {} + if type(_example_) is not dict: + _example_ = {"prompt": _example_} + _left_keys_ = _example_.keys() - _supported_keys_ + for k in _left_keys_: + if k in _chat_template_: + v = _example_[k] + if type(v) is str: + _tokenizer_kwargs_[k] = v + _x_ = maybe_apply_chat_template(_example_, self.processing_class, **_tokenizer_kwargs_)["prompt"] + prompts_text.append(_x_) + prompt_inputs = self.processing_class( + text=prompts_text, + return_tensors="pt", + padding=True, + padding_side="left", + add_special_tokens=False, + **kwargs, + ) + prompt_inputs = super()._prepare_inputs(prompt_inputs) + prompt_ids, prompt_mask = prompt_inputs["input_ids"], prompt_inputs["attention_mask"] + if self.max_prompt_length is not None: + # If max_prompt_length is set, we trim the prompt to keep only the last `max_prompt_length` tokens. + # Then we decode those tokens back into text. We manually remove leading pad tokens from the decoded text, + # because we can't use `skip_special_tokens=True` (some special tokens are still needed for generation). + protected = [self.image_token_id, self.vision_start_token_id, self.vision_end_token_id] + protected = [token for token in protected if token is not None] + prompt_ids, prompt_mask = truncate_with_protected_tokens( + prompt_ids, prompt_mask, self.max_prompt_length, protected + ) + + prompts_text = [re.sub(rf"^({re.escape(self.pad_token)})+", "", text) for text in prompts_text] + + # The chat template inserts a single image token into the prompt text. However, when this text is later + # tokenized, the single image token string is expanded into multiple image token IDs, depending on the + # image size. Since we're detokenizing here, we may see repeated image tokens in the decoded text. We + # collapse them back into a single token string to match the original template. + if self.image_token is not None: + prompts_text = [ + re.sub(rf"({re.escape(self.image_token)})+", self.image_token, text) for text in prompts_text + ] + # Generate completions using either vLLM or regular generation + if self.use_vllm: + if self.vllm_mode == "colocate" and self.args.vllm_enable_sleep_mode: + # wake up colocated vLLM instances if needed + torch.cuda.empty_cache() # required to avoid OOM in some cases + self.llm.wake_up() + + # First, update the vLLM weights if needed + if self.state.global_step != self._last_loaded_step: + self._move_model_to_vllm() + self._last_loaded_step = self.state.global_step + + # Generate completions using vLLM: gather all prompts and use them in a single call in the main process + if self.vllm_mode == "server": + all_prompts_text = gather_object(prompts_text) + if has_images: + all_images = gather_object(images) + + if self.accelerator.is_main_process: + # Since 'prompts' contains 'num_generations' duplicates, we first take unique prompts, and generate + # num_generations outputs for each one. This is faster than generating outputs for each duplicate + # prompt individually. + ordered_set_of_prompts = all_prompts_text[:: self.num_generations] + + if has_images: + ordered_set_of_images = all_images[:: self.num_generations] + else: + ordered_set_of_images = None + + with profiling_context(self, "vLLM.generate"): + output = self.vllm_client.generate( + prompts=ordered_set_of_prompts, + images=ordered_set_of_images, + n=self.num_generations, + repetition_penalty=self.repetition_penalty, + temperature=self.temperature, + top_p=self.top_p, + top_k=-1 if self.top_k is None else self.top_k, + min_p=0.0 if self.min_p is None else self.min_p, + max_tokens=self.max_completion_length, + guided_decoding_regex=self.guided_decoding_regex, + generation_kwargs=self.args.generation_kwargs, + ) + payload = (output["completion_ids"], output["logprobs"]) + else: + payload = None + + # Broadcast the completions from the main process to all processes, ensuring each process receives its corresponding slice. + obj_list = [payload] + broadcast_object_list(obj_list, from_process=0) + completion_ids, all_logprobs = obj_list[0] + + process_slice = slice( + self.accelerator.process_index * len(prompts), + (self.accelerator.process_index + 1) * len(prompts), + ) + completion_ids = completion_ids[process_slice] + all_logprobs = all_logprobs[process_slice] + + # Generate completions using colocated vLLM instances: each device holds vLLM copy and work on their own batch of prompts + elif self.vllm_mode == "colocate": + if self.guided_decoding_regex: + guided_decoding = GuidedDecodingParams(regex=self.guided_decoding_regex) + else: + guided_decoding = None + + generation_kwargs = { + "n": 1, # vLLM on each GPU generates only 1 in colocate mode + "repetition_penalty": self.repetition_penalty, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": -1 if self.top_k is None else self.top_k, + "min_p": 0.0 if self.min_p is None else self.min_p, + "max_tokens": self.max_completion_length, + "guided_decoding": guided_decoding, + "logprobs": 0, # only return the logprob of the generated token + } + if self.args.generation_kwargs is not None: + generation_kwargs.update(self.args.generation_kwargs) + sampling_params = SamplingParams(**generation_kwargs) + + if self.vllm_tensor_parallel_size > 1: + # Gather prompts from all ranks in the TP group and flatten. + # Each rank starts with its own prompts; after gathering, all ranks see the full group set. + orig_size = len(prompts_text) + gathered_prompts = [None for _ in range(self.vllm_tensor_parallel_size)] + torch.distributed.all_gather_object(gathered_prompts, prompts_text, group=self.tp_group) + all_prompts_text = [p for sublist in gathered_prompts for p in sublist] + + if has_images: + gathered_images = [None for _ in range(self.vllm_tensor_parallel_size)] + torch.distributed.all_gather_object(gathered_images, images, group=self.tp_group) + all_images = [img for sublist in gathered_images for img in sublist] + else: + all_images = None + else: + all_prompts_text = prompts_text + all_images = images if has_images else None + + if has_images and all_images: + vllm_inputs = [] + for prompt, image in zip(all_prompts_text, all_images): + if image is not None: + vllm_inputs.append({"prompt": prompt, "multi_modal_data": {"image": image}}) + else: + vllm_inputs.append(prompt) + else: + vllm_inputs = all_prompts_text + + with profiling_context(self, "vLLM.generate"): + all_outputs = self.llm.generate(vllm_inputs, sampling_params=sampling_params, use_tqdm=False, lora_request = self.model.load_lora('grpo_trainer_lora_model', load_tensors = True)) + + completion_ids = [output.token_ids for outputs in all_outputs for output in outputs.outputs] + from trl.scripts.vllm_serve import sanitize_logprob + all_logprobs = [ + [sanitize_logprob(next(iter(logprob.values()))) for logprob in output.logprobs] + for outputs in all_outputs + for output in outputs.outputs + ] + + if self.vllm_tensor_parallel_size > 1: + # Slice completions for this rank within its TP group. + # Each rank generates all outputs — we keep only our share. + local_rank_in_group = torch.distributed.get_rank(group=self.tp_group) + tp_slice = slice(local_rank_in_group * orig_size, (local_rank_in_group + 1) * orig_size) + completion_ids = completion_ids[tp_slice] + all_logprobs = all_logprobs[tp_slice] + + if self.args.vllm_enable_sleep_mode: + self.llm.sleep(level=1) + + # Pad the completions, and concatenate them with the prompts + completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids] + completion_ids = pad(completion_ids, padding_value=self.pad_token_id) + prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1) + sampling_per_token_logps = [ + torch.tensor(logprobs, device=device, dtype=torch.float32) for logprobs in all_logprobs + ] + sampling_per_token_logps = pad(sampling_per_token_logps, padding_value=0.0) + + elif self.use_transformers_paged: + # Re-process inputs for paged generation if needed + # Note: images are already validated and preprocessed above + paged_prompt_inputs = self.processing_class(text=prompts_text, **kwargs) + previous_attn = self.model_wrapped.config._attn_implementation + + if is_flash_attn_2_available(): + self.model_wrapped.config._attn_implementation = "paged_attention" + else: + self.model_wrapped.config._attn_implementation = "sdpa_paged" + with ( + profiling_context(self, "transformers.generate_batch"), + unwrap_model_for_generation( + self.model_wrapped, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model, + torch.no_grad(), + FSDP.summon_full_params(self.model_wrapped, recurse=False) if self.is_fsdp_enabled else nullcontext(), + ): + # Cast to the appropriate dtype based on training configuration + if self.args.bf16: + unwrapped_model.to(torch.bfloat16) + elif self.args.fp16: + unwrapped_model.to(torch.float16) + with torch.inference_mode(): + all_outputs = unwrapped_model.generate_batch( + paged_prompt_inputs.input_ids, generation_config=self.generation_config, progress_bar=False + ) + completion_ids = [output.generated_tokens for output in all_outputs.values()] + completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids] + completion_ids = pad(completion_ids, padding_value=self.pad_token_id, padding_side="right") + prompt_ids = [torch.tensor(ids, device=device) for ids in paged_prompt_inputs.input_ids] + prompt_ids = pad(prompt_ids, padding_value=self.pad_token_id, padding_side="left") + prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1) + # Restore the original attention implementation, training mode + self.model_wrapped.config._attn_implementation = previous_attn + else: + # Regular generation path + with ( + profiling_context(self, "transformers.generate"), + unwrap_model_for_generation( + self.model_wrapped, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model, + torch.no_grad(), + FSDP.summon_full_params(self.model_wrapped, recurse=False) if self.is_fsdp_enabled else nullcontext(), + ): + prompt_inputs["input_ids"], prompt_inputs["attention_mask"] = prompt_ids, prompt_mask + prompt_completion_ids = unwrapped_model.generate( + **prompt_inputs, generation_config=self.generation_config, disable_compile=True + ) + # Compute prompt length and extract completion ids + prompt_length = prompt_ids.size(1) + prompt_ids = prompt_completion_ids[:, :prompt_length] + completion_ids = prompt_completion_ids[:, prompt_length:] + + # Mask everything after the first EOS token + is_eos = completion_ids == self.eos_token_id + eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=device) + eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)] + sequence_indices = torch.arange(is_eos.size(1), device=device).expand(is_eos.size(0), -1) + completion_mask = (sequence_indices <= eos_idx.unsqueeze(1)).int() + + # Convert tensor to a list of lists of token IDs. This will be passed to the reward function, avoiding the need + # to re-tokenize completions if the reward is computed from tokens. + completion_ids_list = [row[mask_row].tolist() for row, mask_row in zip(completion_ids, completion_mask.bool())] + + # Sum along sequence dimension (dim=1) to get completion length per sequence, used for logging + completion_lengths = completion_mask.sum(1) + agg_completion_lengths = self.accelerator.gather(completion_lengths) + num_items_in_batch = agg_completion_lengths.sum() # this is required for the DAPO loss + + # If mask_truncated_completions is enabled, zero out truncated completions in completion_mask + if self.mask_truncated_completions: + truncated_completions = ~is_eos.any(dim=1) + completion_mask = completion_mask * (~truncated_completions).unsqueeze(1).int() + + # Concatenate prompt_mask with completion_mask for logit computation + attention_mask = torch.cat([prompt_mask, completion_mask], dim=1) # (B, P+C) + + logits_to_keep = completion_ids.size(1) # we only need to compute the logits for the completion tokens + + batch_size = self.args.per_device_train_batch_size if mode == "train" else self.args.per_device_eval_batch_size + try: + # TRL 0.23.1 and below path + if not has_images: + # Left pad prompt before calculation old and ref hidden states + prompt_completion_ids = left_pack_padding(prompt_completion_ids, self.processing_class.pad_token_id) + self.model.for_training() + except: + # TRL 0.24.0 and below path + if images is None: + # Left pad prompt before calculation old and ref hidden states + prompt_completion_ids = left_pack_padding(prompt_completion_ids, self.processing_class.pad_token_id) + self.model.for_training() + + with torch.no_grad(): + # If the generation and optimization steps are misaligned—i.e., if generation does not occur at the end of + # a full optimizer step (when gradient_accumulation_steps is not a multiple of generate_every)—then the + # samples may come from an earlier version of the model. In that case, we need to track old_per_token_logps + # for importance sampling. If the steps are aligned, importance sampling isn't necessary and we set + # old_per_token_logps to None. + # When using vLLM, we always compute old_per_token_logps for importance sampling, it was shown that the + # distribution mismatch between vLLM and the training model can be large and harm the training. + generate_every = self.args.steps_per_generation * self.num_iterations # generation frequency + + if self.args.gradient_accumulation_steps % generate_every != 0 or ( + self.use_vllm + ): + old_per_token_logps, _ = self._get_per_token_logps_and_entropies( + self.model, + prompt_completion_ids, + attention_mask, + logits_to_keep, + batch_size, + pixel_values=prompt_inputs.get("pixel_values"), + image_grid_thw=prompt_inputs.get("image_grid_thw"), + pixel_attention_mask=prompt_inputs.get("pixel_attention_mask"), + image_sizes=prompt_inputs.get("image_sizes"), + ) + else: + old_per_token_logps = None + + # Compute the importance sampling ratio when using vLLM, to correct for potential distribution mismatch + if self.use_vllm and self.vllm_importance_sampling_correction: + importance_sampling_ratio = torch.exp(old_per_token_logps - sampling_per_token_logps) + importance_sampling_ratio = torch.clamp( + importance_sampling_ratio, max=self.vllm_importance_sampling_cap + ) + + # Compute the per-token log probabilities for the reference model + if self.beta != 0.0: + if self.ref_model is not None: + ref_per_token_logps, _ = self._get_per_token_logps_and_entropies( + self.ref_model, + prompt_completion_ids, + attention_mask, + logits_to_keep, + batch_size=batch_size, + pixel_values=prompt_inputs.get("pixel_values"), + image_grid_thw=prompt_inputs.get("image_grid_thw"), + pixel_attention_mask=prompt_inputs.get("pixel_attention_mask"), + image_sizes=prompt_inputs.get("image_sizes"), + ) + else: + with self.accelerator.unwrap_model(self.model).disable_adapter(): + ref_per_token_logps, _ = self._get_per_token_logps_and_entropies( + self.model, + prompt_completion_ids, + attention_mask, + logits_to_keep, + batch_size=batch_size, + pixel_values=prompt_inputs.get("pixel_values"), + image_grid_thw=prompt_inputs.get("image_grid_thw"), + pixel_attention_mask=prompt_inputs.get("pixel_attention_mask"), + image_sizes=prompt_inputs.get("image_sizes"), + ) + else: + ref_per_token_logps = None + + # Decode the generated completions + completions_text = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True) + if is_conversational(inputs[0]): + completions = [] + for prompt, completion in zip(prompts, completions_text): + bootstrap = prompt.pop()["content"] if prompt[-1]["role"] == "assistant" else "" + completions.append([{"role": "assistant", "content": bootstrap + completion}]) + else: + completions = completions_text + + # Calculate rewards for each reward function. rewards_per_func aggregates rewards across all processes. This is + # important because rewards will be normalized per group, and completions are distributed. We will later slice + # rewards_per_func to extract each process's subset. + rewards_per_func = self._calculate_rewards(inputs, original_prompts, completions, completion_ids_list) + + # Apply weights to each reward function's output and sum + rewards = (rewards_per_func * self.reward_weights.to(device).unsqueeze(0)).nansum(dim=1) + + # Compute grouped-wise rewards + mean_grouped_rewards = rewards.view(-1, self.num_generations).mean(dim=1) + + # Normalize the rewards to compute the advantages + mean_grouped_rewards = mean_grouped_rewards.repeat_interleave(self.num_generations, dim=0) + advantages = rewards - mean_grouped_rewards + + if self.scale_rewards in ["group", "none"]: + # If self.scale_rewards = "none", we'll still log group level std + std_rewards = rewards.view(-1, self.num_generations).std(dim=1) + std_rewards = std_rewards.repeat_interleave(self.num_generations, dim=0) + elif self.scale_rewards == "batch": + # Compute global std + std_rewards = rewards.std().expand_as(rewards) + else: + raise ValueError( + f"Invalid value for scale_rewards: {self.scale_rewards}. Must be one of 'batch', 'group', or 'none'." + ) + + is_std_zero = torch.isclose(std_rewards, torch.zeros_like(std_rewards)) + if self.scale_rewards != "none": + advantages = advantages / (std_rewards + 1e-4) + + # Slice to keep only the local part of the data + process_slice = slice( + self.accelerator.process_index * len(prompts), + (self.accelerator.process_index + 1) * len(prompts), + ) + all_process_advantages = advantages.clone() # keep the aggregated advantages for logging + advantages = advantages[process_slice] + + # Log the metrics + if mode == "train": + self.state.num_input_tokens_seen += self.accelerator.gather(attention_mask.sum()).sum().item() + self._metrics[mode]["num_tokens"] = [self.state.num_input_tokens_seen] + + # Log completion lengths, mean, min, max + self._metrics[mode]["completions/mean_length"].append(agg_completion_lengths.float().mean().item()) + self._metrics[mode]["completions/min_length"].append(agg_completion_lengths.float().min().item()) + self._metrics[mode]["completions/max_length"].append(agg_completion_lengths.float().max().item()) + + # Identify sequences that terminated with EOS and log their lengths + agg_terminated_with_eos = self.accelerator.gather(is_eos.any(dim=1)) + term_completion_lengths = agg_completion_lengths[agg_terminated_with_eos] + clipped_completions_ratio = 1 - len(term_completion_lengths) / len(agg_completion_lengths) + self._metrics[mode]["completions/clipped_ratio"].append(clipped_completions_ratio) + if len(term_completion_lengths) == 0: # edge case where no terminated sequences are found + term_completion_lengths = torch.zeros(1, device=device) + self._metrics[mode]["completions/mean_terminated_length"].append(term_completion_lengths.float().mean().item()) + self._metrics[mode]["completions/min_terminated_length"].append(term_completion_lengths.float().min().item()) + self._metrics[mode]["completions/max_terminated_length"].append(term_completion_lengths.float().max().item()) + + # Calculate mean reward per function, but only for samples where the function was applied (non-NaN values) + for i, reward_func_name in enumerate(self.reward_func_names): + mean_rewards = torch.nanmean(rewards_per_func[:, i]).item() + self._metrics[mode][f"rewards/{reward_func_name}/mean"].append(mean_rewards) + std_func_rewards = nanstd(rewards_per_func[:, i]).item() + self._metrics[mode][f"rewards/{reward_func_name}/std"].append(std_func_rewards) + self._metrics[mode]["reward"].append(mean_grouped_rewards.mean().item()) + self._metrics[mode]["reward_std"].append(std_rewards.mean().item()) + self._metrics[mode]["frac_reward_zero_std"].append(is_std_zero.float().mean().item()) + + # Log prompt and completion texts + self._logs["prompt"].extend(gather_object(prompts_text)) + self._logs["completion"].extend(gather_object(completions_text)) + for i, name in enumerate(self.reward_func_names): + self._logs["rewards"][name].extend(rewards_per_func[:, i].tolist()) + self._logs["advantages"].extend(all_process_advantages.tolist()) + + if has_images: + self._logs["image"].extend(gather_object(images)) + + if self.use_vllm and self.vllm_importance_sampling_correction: + delta = torch.abs(old_per_token_logps - sampling_per_token_logps) + delta = delta[completion_mask.bool()] + mean_delta = torch.mean(delta) if delta.numel() > 0 else torch.tensor(0.0, device=device) + max_delta = torch.max(delta) if delta.numel() > 0 else torch.tensor(0.0, device=device) + self._metrics[mode]["sampling/sampling_logp_difference/mean"].append( + self.accelerator.gather(mean_delta).mean().item() + ) + self._metrics[mode]["sampling/sampling_logp_difference/max"].append( + self.accelerator.gather(max_delta).max().item() + ) + + flat_is_ratio = importance_sampling_ratio[completion_mask.bool()] + min_importance_sampling_ratio = ( + torch.min(flat_is_ratio) if flat_is_ratio.numel() > 0 else torch.tensor(0.0, device=device) + ) + mean_importance_sampling_ratio = ( + torch.mean(flat_is_ratio) if flat_is_ratio.numel() > 0 else torch.tensor(0.0, device=device) + ) + max_importance_sampling_ratio = ( + torch.max(flat_is_ratio) if flat_is_ratio.numel() > 0 else torch.tensor(0.0, device=device) + ) + self._metrics[mode]["sampling/importance_sampling_ratio/min"].append( + nanmin(self.accelerator.gather(min_importance_sampling_ratio)).item() + ) + self._metrics[mode]["sampling/importance_sampling_ratio/mean"].append( + self.accelerator.gather(mean_importance_sampling_ratio).nanmean().item() + ) + self._metrics[mode]["sampling/importance_sampling_ratio/max"].append( + nanmax(self.accelerator.gather(max_importance_sampling_ratio)).item() + ) + + output = { + "prompt_ids": prompt_ids, + "prompt_mask": prompt_mask, + "completion_ids": completion_ids, + "completion_mask": completion_mask, + "advantages": advantages, + "num_items_in_batch": num_items_in_batch, + } + if old_per_token_logps is not None: + output["old_per_token_logps"] = old_per_token_logps + if self.use_vllm and self.vllm_importance_sampling_correction: + output["importance_sampling_ratio"] = importance_sampling_ratio + if ref_per_token_logps is not None: + output["ref_per_token_logps"] = ref_per_token_logps + if "pixel_values" in prompt_inputs: + output["pixel_values"] = prompt_inputs["pixel_values"] + if "image_grid_thw" in prompt_inputs: + output["image_grid_thw"] = prompt_inputs["image_grid_thw"] + if "pixel_attention_mask" in prompt_inputs: + output["pixel_attention_mask"] = prompt_inputs["pixel_attention_mask"] + if "image_sizes" in prompt_inputs: + output["image_sizes"] = prompt_inputs["image_sizes"] + + if self.use_vllm: + try: + output["sampling_per_token_logps"] = sampling_per_token_logps + except NameError: + output["sampling_per_token_logps"] = None + return output + + def compute_liger_loss(self, unwrapped_model, inputs): + # Compute the per-token log probabilities for the model + prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"] + completion_ids, completion_mask = inputs["completion_ids"], inputs["completion_mask"] + input_ids = torch.cat([prompt_ids, completion_ids], dim=1) + attention_mask = torch.cat([prompt_mask, completion_mask], dim=1) + logits_to_keep = completion_ids.size(1) # we only need to compute the logits for the completion tokens + + # Get the last hidden state of the model + last_hidden_state = self._get_last_hidden_state( + unwrapped_model, + input_ids, + attention_mask, + logits_to_keep, + inputs.get("pixel_values"), + inputs.get("image_grid_thw"), + inputs.get("pixel_attention_mask"), + inputs.get("image_sizes"), + ) + + # compute loss and metrics using liger grpo loss + loss, metrics = self.liger_grpo_loss( + _input=last_hidden_state, + lin_weight=unwrapped_model.lm_head.weight, + selected_token_ids=completion_ids, + attention_mask=completion_mask, + advantages=inputs["advantages"], + bias=unwrapped_model.lm_head.bias, + old_per_token_logps=inputs.get("old_per_token_logps"), + ref_per_token_logps=inputs.get("ref_per_token_logps"), + ) + # Extract metrics from the liger_grpo_loss output + # KL divergence is the first metric when beta is non-zero + mean_kl = metrics[0] if self.beta != 0.0 else None + clip_ratio = metrics[-1] + + mode = "train" if self.model.training else "eval" + if self.beta != 0.0: + self._metrics[mode]["kl"].append(self.accelerator.gather(mean_kl).mean().item()) + self._metrics[mode]["clip_ratio"].append(self.accelerator.gather(clip_ratio).mean().item()) + return loss / self.current_gradient_accumulation_steps + + def compute_loss(self, model, inputs, return_outputs = False, num_items_in_batch = None): + if return_outputs: + raise ValueError("The GRPOTrainer does not support returning outputs") + # Compute the per-token log probabilities for the model + prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"] + completion_ids, completion_mask = inputs["completion_ids"], inputs["completion_mask"] + pixel_values, image_grid_thw = inputs.get("pixel_values", None), inputs.get("image_grid_thw", None) + pixel_attention_mask, image_sizes = inputs.get('pixel_attention_mask',None), inputs.get('image_sizes',None) + num_items_in_batch = inputs.get("num_items_in_batch", None) + sampling_per_token_logps = inputs.get("sampling_per_token_logps", None) + current_gradient_accumulation_steps = self.current_gradient_accumulation_steps + num_processes = self.accelerator.num_processes + + input_ids = torch.cat([prompt_ids, completion_ids], dim=1) + bsz, qlen = input_ids.shape + attention_mask = torch.cat([prompt_mask, completion_mask], dim=1) + # attention_mask = None + logits_to_keep = completion_ids.size(1) # we only need to compute the logits for the completion tokens + _input_ids = input_ids + _logits_to_keep = logits_to_keep + + get_logps_func = \ + lambda model, input_ids, attention_mask, logits_to_keep, batch_size=None, compute_entropy=False, compute_efficient = False: \ + self._get_per_token_logps(model, input_ids, attention_mask, logits_to_keep, compute_efficient) \ + if hasattr(self, "_get_per_token_logps") else \ + self._get_per_token_logps_and_entropies(model, input_ids, attention_mask, logits_to_keep, batch_size, compute_entropy, compute_efficient)[0] # logps + + per_token_logps = get_logps_func(model, input_ids, attention_mask, logits_to_keep, compute_efficient = True) + # Compute the KL divergence between the model and the reference model + # _prepare_inputs doesn't return reference log probs anymore. We need to calculate it ourselves. + # https://github.com/huggingface/trl/blob/05bc43e960396581e458195b8388efe6b82cae1f/trl/trainer/grpo_trainer.py#L1328 + # if self.beta != 0.0: + # with torch.inference_mode(), model.disable_adapter(): + # ref_per_token_logps = per_token_logps = get_logps_func(model, input_ids, attention_mask, logits_to_keep) + # else: + # ref_per_token_logps = None + ref_hidden_states = inputs.get("ref_per_token_logps", None) + # per_token_kl = torch.exp(ref_per_token_logps - per_token_logps) - (ref_per_token_logps - per_token_logps) - 1 + # x - x.detach() allows for preserving gradients from x + advantages = inputs["advantages"] + # per_token_loss = torch.exp(per_token_logps - per_token_logps.detach()) * advantages.unsqueeze(1) + # per_token_loss = -(per_token_loss - self.beta * per_token_kl) + # loss = ((per_token_loss * completion_mask).sum(dim=1) / completion_mask.sum(dim=1)).mean() + old_hidden_states = inputs.get("old_per_token_logps", None) + + input_ids = input_ids[:, -logits_to_keep:] + + # Get logit softcapping and logit scale + logit_softcapping = getattr(model.config, "final_logit_softcapping", 0) # Gemma + if logit_softcapping is None: logit_softcapping = 0 + logit_scale_multiply = getattr(model.config, "logit_scale", 0) # Cohere + if logit_scale_multiply is None: logit_scale_multiply = 0 + logit_scale_divide = getattr(model.config, "logits_scaling", 0) # Granite + if logit_scale_divide is None: logit_scale_divide = 0 + + if per_token_logps is not None: + + if ref_hidden_states is not None: + ref_hidden_states = ref_hidden_states[:, :-1, :] # (B, L-1, V), exclude the last logit: it corresponds to the next token pred + if old_hidden_states is not None: + old_hidden_states = old_hidden_states[:, :-1, :] # (B, L-1, V), exclude the last logit: it corresponds to the next token pred + per_token_logps = per_token_logps[:, :-1, :] # (B, L-1, V), exclude the last logit: it corresponds to the next token pred + + loss, completion_length, mean_kl, delta, flat_is_ratio = grpo_compute_loss_slow( + ref_hidden_states, + per_token_logps, + old_hidden_states, + input_ids, + completion_mask, + self.beta, + advantages, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + loss_type = self.args.loss_type, + importance_sampling_level = self.importance_sampling_level, + epsilon_low = self.epsilon_low, + epsilon_high = self.epsilon_high, + max_completion_length = self.args.max_completion_length, + delta = self.args.delta, + temperature = self.args.temperature, + logit_softcapping = logit_softcapping, + logit_scale_multiply = logit_scale_multiply, + logit_scale_divide = logit_scale_divide, + num_items_in_batch = num_items_in_batch, + current_gradient_accumulation_steps = current_gradient_accumulation_steps, + num_processes = num_processes, + sampling_per_token_logps = sampling_per_token_logps, + ) + else: + if hasattr(self.args, "loss_type"): + loss, completion_length, mean_kl, delta, flat_is_ratio = grpo_accumulated_loss( + trainer = self, + input_ids = _input_ids, + pixel_values = pixel_values, + image_grid_thw = image_grid_thw, + logits_to_keep = logits_to_keep, + completion_mask = completion_mask, + advantages = advantages, + old_hidden_states = old_hidden_states, + ref_hidden_states = ref_hidden_states, + n_chunks = self.args.unsloth_num_chunks, + loss_type = self.args.loss_type, + importance_sampling_level = self.importance_sampling_level, + epsilon_low = self.epsilon_low, + epsilon_high = self.epsilon_high, + max_completion_length = self.args.max_completion_length, + delta = self.args.delta, + temperature = self.args.temperature, + logit_softcapping = logit_softcapping, + logit_scale_multiply = logit_scale_multiply, + logit_scale_divide = logit_scale_divide, + attention_mask = attention_mask, + num_items_in_batch = num_items_in_batch, + current_gradient_accumulation_steps = current_gradient_accumulation_steps, + num_processes = num_processes, + sampling_per_token_logps = sampling_per_token_logps, + ) + else: + # to ensure backwards compatibility with trl 0.15.2 and maybe even 0.17 + loss, completion_length, mean_kl = grpo_accumulated_loss( + trainer = self, + input_ids = _input_ids, + logits_to_keep = logits_to_keep, + completion_mask = completion_mask, + advantages = advantages, + old_hidden_states = old_hidden_states, + ref_hidden_states = ref_hidden_states, + n_chunks = self.args.unsloth_num_chunks, + temperature = self.args.temperature, + logit_softcapping = logit_softcapping, + logit_scale_multiply = logit_scale_multiply, + logit_scale_divide = logit_scale_divide, + attention_mask = attention_mask, + ) + pass + pass + + if "train" in self._metrics: + mode = "eval" if self.control.should_evaluate else "train" + self._metrics[mode]["completion_length"].append(completion_length.item()) + self._metrics[mode]["kl"].append(mean_kl.item()) + else: + self._metrics["completion_length"].append(completion_length.item()) + self._metrics["kl"].append(mean_kl.item()) + + if self.use_vllm and delta is not None: + mean_delta = torch.mean(delta) if delta.numel() > 0 else torch.tensor(0.0, device=self.model.device) + max_delta = torch.max(delta) if delta.numel() > 0 else torch.tensor(0.0, device=self.model.device) + self._metrics[mode]["sampling/sampling_logp_difference/mean"].append( + self.accelerator.gather(mean_delta).mean().item() + ) + self._metrics[mode]["sampling/sampling_logp_difference/max"].append( + self.accelerator.gather(max_delta).max().item() + ) + + min_importance_sampling_ratio = ( + torch.min(flat_is_ratio) if flat_is_ratio.numel() > 0 else torch.tensor(0.0, device=self.model.device) + ) + mean_importance_sampling_ratio = ( + torch.mean(flat_is_ratio) if flat_is_ratio.numel() > 0 else torch.tensor(0.0, device=self.model.device) + ) + max_importance_sampling_ratio = ( + torch.max(flat_is_ratio) if flat_is_ratio.numel() > 0 else torch.tensor(0.0, device=self.model.device) + ) + self._metrics[mode]["sampling/importance_sampling_ratio/min"].append( + nanmin(self.accelerator.gather(min_importance_sampling_ratio)).item() + ) + self._metrics[mode]["sampling/importance_sampling_ratio/mean"].append( + self.accelerator.gather(mean_importance_sampling_ratio).nanmean().item() + ) + self._metrics[mode]["sampling/importance_sampling_ratio/max"].append( + nanmax(self.accelerator.gather(max_importance_sampling_ratio)).item() + ) + + return loss + + def _compute_loss(self, model, inputs): + # Compute the per-token log probabilities for the model + prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"] + completion_ids, completion_mask = inputs["completion_ids"], inputs["completion_mask"] + input_ids = torch.cat([prompt_ids, completion_ids], dim=1) + attention_mask = torch.cat([prompt_mask, completion_mask], dim=1) + logits_to_keep = completion_ids.size(1) # we only need to compute the logits for the completion tokens + + # Compute the per_token_logps and the entropy at each position in the completion + per_token_logps, entropies = self._get_per_token_logps_and_entropies( + model, + input_ids, + attention_mask, + logits_to_keep, + compute_entropy=True, + pixel_values=inputs.get("pixel_values"), + image_grid_thw=inputs.get("image_grid_thw"), + pixel_attention_mask=inputs.get("pixel_attention_mask"), + image_sizes=inputs.get("image_sizes"), + ) + + if self.top_entropy_quantile < 1.0: + entropy_mask = self.get_high_entropy_mask(entropies, completion_mask, 1 - self.top_entropy_quantile) + else: + entropy_mask = None + + # Compute the KL divergence between the model and the reference model + if self.beta != 0.0: + ref_per_token_logps = inputs["ref_per_token_logps"] + per_token_kl = ( + torch.exp(ref_per_token_logps - per_token_logps) - (ref_per_token_logps - per_token_logps) - 1 + ) + + # Compute the loss + advantages = inputs["advantages"] + # When num_iterations == 1 and steps_per_generation <= gradient_accumulation_steps, + # old_per_token_logps == per_token_logps. In this case we can skip its computation + # (see _generate_and_score_completions) and instead use per_token_logps.detach(). + # The exception is when using vLLM, where we always compute old_per_token_logps + # for importance sampling + old_per_token_logps = inputs.get("old_per_token_logps") + old_per_token_logps = per_token_logps.detach() if old_per_token_logps is None else old_per_token_logps + + log_ratio = per_token_logps - old_per_token_logps + if self.importance_sampling_level == "token": + log_importance_weights = log_ratio + elif self.importance_sampling_level == "sequence": + log_importance_weights = (log_ratio * completion_mask).sum(-1) / completion_mask.sum(-1).clamp(min=1.0) + log_importance_weights = log_importance_weights.unsqueeze(-1) + else: + raise ValueError( + f"Unknown importance sampling level: {self.importance_sampling_level}. Possible values are 'token' " + "and 'sequence'." + ) + # From here, log_importance_weights (and all subsequent tensors, coef_1, coef_2, etc.) shape depends on + # importance_sampling_level: "token" level: (B, T); "sequence" level: (B, 1) + + coef_1 = torch.exp(log_importance_weights) + coef_2 = torch.clamp(coef_1, 1 - self.epsilon_low, 1 + self.epsilon_high) + + # Two-sided clipping + if self.args.delta is not None: + coef_1 = torch.clamp(coef_1, max=self.args.delta) + + per_token_loss1 = coef_1 * advantages.unsqueeze(1) + per_token_loss2 = coef_2 * advantages.unsqueeze(1) + per_token_loss = -torch.min(per_token_loss1, per_token_loss2) + if entropy_mask is not None: + per_token_loss = per_token_loss * entropy_mask + + if self.use_vllm and self.vllm_importance_sampling_correction: + per_token_loss = per_token_loss * inputs["importance_sampling_ratio"] + + if self.beta != 0.0: + per_token_loss = per_token_loss + self.beta * per_token_kl + + if self.loss_type == "grpo": + loss = ((per_token_loss * completion_mask).sum(-1) / completion_mask.sum(-1).clamp(min=1.0)).mean() + loss = loss / self.current_gradient_accumulation_steps + elif self.loss_type == "bnpo": + loss = (per_token_loss * completion_mask).sum() / completion_mask.sum().clamp(min=1.0) + loss = loss / self.current_gradient_accumulation_steps + elif self.loss_type == "dr_grpo": + loss = (per_token_loss * completion_mask).sum() / (per_token_loss.size(0) * self.max_completion_length) + loss = loss / self.current_gradient_accumulation_steps + elif self.loss_type == "dapo": + normalizer = inputs["num_items_in_batch"] / self.accelerator.num_processes + loss = (per_token_loss * completion_mask).sum() / normalizer + else: + raise ValueError(f"Unknown loss type: {self.loss_type}") + + # Log the metrics + mode = "train" if self.model.training else "eval" + + completion_token_count = completion_mask.sum().clamp(min=1.0) + + def masked_batch_mean(x): + if x.shape[1] == 1: # when importance_sampling_level == "sequence" + return x.mean() + else: + return (x * completion_mask).sum() / completion_token_count + + if self.beta != 0.0: + mean_kl = masked_batch_mean(per_token_kl) + self._metrics[mode]["kl"].append(self.accelerator.gather(mean_kl).nanmean().item()) + + mean_entropy = masked_batch_mean(entropies) + self._metrics[mode]["entropy"].append(self.accelerator.gather(mean_entropy).nanmean().item()) + + # Compute the clipped probability ratios + is_low_clipped = (coef_1 < 1 - self.epsilon_low) & (advantages.unsqueeze(1) < 0) + is_high_clipped = (coef_1 > 1 + self.epsilon_high) & (advantages.unsqueeze(1) > 0) + is_region_clipped = is_low_clipped | is_high_clipped + + low_clip = masked_batch_mean(is_low_clipped.float()) + high_clip = masked_batch_mean(is_high_clipped.float()) + clip_ratio = masked_batch_mean(is_region_clipped.float()) + + gathered_low_clip = self.accelerator.gather(low_clip) + self._metrics[mode]["clip_ratio/low_mean"].append(gathered_low_clip.nanmean().item()) + self._metrics[mode]["clip_ratio/low_min"].append(nanmin(gathered_low_clip).item()) + gathered_high_clip = self.accelerator.gather(high_clip) + self._metrics[mode]["clip_ratio/high_mean"].append(gathered_high_clip.nanmean().item()) + self._metrics[mode]["clip_ratio/high_max"].append(nanmax(gathered_high_clip).item()) + gathered_clip_ratio = self.accelerator.gather(clip_ratio) + self._metrics[mode]["clip_ratio/region_mean"].append(gathered_clip_ratio.nanmean().item()) + return loss + + def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys: Optional[list[str]] = None): + inputs = self._prepare_inputs(inputs) + with torch.no_grad(): + with self.compute_loss_context_manager(): + loss = self.compute_loss(model, inputs) + loss = loss.mean().detach() + return loss, None, None + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + mode = "train" if self.model.training else "eval" + metrics = {key: sum(val) / len(val) for key, val in self._metrics[mode].items()} # average the metrics + + # This method can be called both in training and evaluation. When called in evaluation, the keys in `logs` + # start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format. + if mode == "eval": + metrics = {f"eval_{key}": val for key, val in metrics.items()} + + logs = {**logs, **metrics} + super().log(logs, start_time) + self._metrics[mode].clear() + + if self.accelerator.is_main_process and self.log_completions: + if is_rich_available(): + print_prompt_completions_sample( + self._logs["prompt"], + self._logs["completion"], + self._logs["rewards"], + self._logs["advantages"], + self.state.global_step, + self.num_completions_to_print, + ) + + if self.args.report_to and "wandb" in self.args.report_to and wandb.run is not None: + import pandas as pd + + table = { + "step": [str(self.state.global_step)] * len(self._logs["prompt"]), + "prompt": self._logs["prompt"], + "completion": self._logs["completion"], + **self._logs["rewards"], + "advantage": self._logs["advantages"], + } + + if self._logs["image"]: + table["image"] = [] + for img in self._logs["image"]: + if img is not None: + # Convert images to wandb Image objects for proper visualization + table["image"].append(wandb.Image(img)) + else: + table["image"].append(None) + + df = pd.DataFrame(table) + if self.wandb_log_unique_prompts: + df = df.drop_duplicates(subset=["prompt"]) + wandb.log({"completions": wandb.Table(dataframe=df)}) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent( + """\ + @article{shao2024deepseekmath, + title = {{DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models}}, + author = {Zhihong Shao and Peiyi Wang and Qihao Zhu and Runxin Xu and Junxiao Song and Mingchuan Zhang and Y. K. Li and Y. Wu and Daya Guo}, + year = 2024, + eprint = {arXiv:2402.03300}, + } + """ + ) + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="GRPO", + trainer_citation=citation, + paper_title="DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models", + paper_id="2402.03300", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothGRPOTrainer(_UnslothGRPOTrainer): + """ + +Trainer for the Group Relative Policy Optimization (GRPO) method. This algorithm was initially proposed in the +paper [DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language +Models](https://huggingface.co/papers/2402.03300). + +Example: + +```python +from datasets import load_dataset +from trl import GRPOTrainer + +dataset = load_dataset("trl-lib/tldr", split="train") +def reward_func(completions, **kwargs): + # Dummy reward function that rewards completions with more unique letters. + return [float(len(set(completion))) for completion in completions] +trainer = GRPOTrainer( + model="Qwen/Qwen2-0.5B-Instruct", + reward_funcs=reward_func, + train_dataset=dataset, +) + +trainer.train() +``` + +Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + reward_funcs (`Union[RewardFunc, list[RewardFunc]]`): + Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward + functions with the prompts and completions and sum the rewards. Can be either: + + - A single reward function, such as: + - A string: The *model ID* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForSequenceClassification.from_pretrained`] with `num_labels=1` and the + keyword arguments in `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object: Only sequence classification models are supported. + - A custom reward function: The function is provided with the prompts and the generated completions, + plus any additional columns in the dataset. It should return a list of rewards. Custom reward + functions can also return `None` when the reward is not applicable to those samples. This is useful + for multi-task training where different reward functions apply to different types of samples. When a + reward function returns `None` for a sample, that reward function is excluded from the reward + calculation for that sample. For more details, see [Using a custom reward + function](#using-a-custom-reward-function). + + The trainer's state is also passed to the reward function. The trainer's state is an instance of + [`~transformers.TrainerState`] and can be accessed by accessing the `trainer_state` argument to the + reward function's signature. + - A list of reward functions, where each item can independently be any of the above types. Mixing different + types within the list (e.g., a string model ID and a custom reward function) is allowed. + args ([`GRPOConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is + ignored. The format of the samples can be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`): + Processing class used to process the data. The padding side must be set to "left". If `None`, the + processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A + padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token, + `tokenizer.eos_token` will be used as the default. + reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`): + Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either: + + - A single processing class: Used when `reward_funcs` contains only one reward function. + - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`. + If set to `None`, or if an element of the list corresponding to a [`~transformers.PreTrainedModel`] is + `None`, the tokenizer for the model is automatically loaded using + [`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward + functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes` + are ignored. + callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your + model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + + """ + def __init__( + self, + model, + reward_funcs, + args = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + reward_processing_classes = None, + callbacks = None, + peft_config = None, + **kwargs + ): + if args is None: args = UnslothGRPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + other_metrics = [] + if not isinstance(reward_funcs, list): _reward_funcs = [reward_funcs] + else: _reward_funcs = reward_funcs + for reward_func in _reward_funcs: + try: + reward_func_name = reward_func.__name__ + if True: + other_metrics.append(f'rewards/{reward_func_name}/mean') + if True: + other_metrics.append(f'rewards/{reward_func_name}/std') + if False: + other_metrics.append(f'rewards/{reward_func_name}') + except: pass + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('grpo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + reward_funcs = reward_funcs, + args = args, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + reward_processing_classes = reward_processing_classes, + callbacks = callbacks, + peft_config = peft_config,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothIterativeSFTTrainer.py b/unsloth_compiled_cache/UnslothIterativeSFTTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..de27975bb36ea45104a2fcf9190ffdb5fae524c0 --- /dev/null +++ b/unsloth_compiled_cache/UnslothIterativeSFTTrainer.py @@ -0,0 +1,1149 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.iterative_sft_trainer import (AutoModelForCausalLM, AutoTokenizer, BaseImageProcessor, Callable, DataCollator, DataCollatorForLanguageModeling, DataCollatorForSeq2Seq, DataLoader, Dataset, EvalLoopOutput, FeatureExtractionMixin, IterativeSFTConfig, IterativeSFTTrainer, Optional, PPODecorators, Path, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, Trainer, TrainingArguments, Union, generate_model_card, get_comet_experiment_url, is_peft_available, is_wandb_available, logger, logging, os, torch, warnings, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothIterativeSFTConfig(IterativeSFTConfig): + """ + +Configuration class for the [`IterativeSFTTrainer`]. + + + +The [`IterativeSFTTrainer`] is deprecated and will be removed in version 0.24.0. Please use the [`SFTTrainer`]. + + + +This class includes only the parameters that are specific to Iterative SFT training. For a full list of training +arguments, please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this +class may differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + > Parameters that control the model + + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model` + argument of the [`IterativeSFTTrainer`] is provided as a string. + + > Parameters that control the data preprocessing + + max_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the tokenized sequence. Sequences longer than `max_length` are truncated. + truncation_mode (`str`, *optional*, defaults to `"keep_end"`): + The truncation mode to use, either `"keep_end"` or `"keep_start"`. + optimize_device_cache (`bool`, *optional*, defaults to `False`): + Whether to optimize accelerator cache for slightly more memory-efficient training. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + model_init_kwargs = None, + max_length = None, + truncation_mode = 'keep_end', + optimize_device_cache = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + model_init_kwargs = model_init_kwargs, + max_length = max_length, + truncation_mode = truncation_mode, + optimize_device_cache = optimize_device_cache,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothIterativeSFTTrainer(Trainer): + """ + The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization. + + + + The [`IterativeSFTTrainer`] is deprecated and will be removed in version 0.24.0. Please use the [`SFTTrainer`]. + + + + Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + args ([`IterativeSFTConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + data_collator (`DataCollator`, *optional*): + Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`. + Will default to [`~transformers.default_data_collator`] if no `processing_class` is provided, an instance + of [`~transformers.DataCollatorWithPadding`] otherwise if the processing_class is a feature extractor or + tokenizer. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If `None`, the processing class is loaded from the model's name + with [`~transformers.AutoTokenizer.from_pretrained`]. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + """ + + _tag_names = ["trl", "iterative-sft"] + + def __init__( + self, + model: Union[str, PreTrainedModel], + args: Optional[Union[IterativeSFTConfig, TrainingArguments]] = None, + data_collator: Optional[DataCollator] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = ( + None, + None, + ), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None, + ): + warnings.warn( + "The `IterativeSFTTrainer` is deprecated and will be removed in version 0.24.0. Please use the " + "`SFTTrainer`.", + FutureWarning, + ) + + # Args + model_id = model if isinstance(model, str) else model.config._name_or_path + if args is None: + model_name = model_id.split("/")[-1] + args = IterativeSFTConfig(f"{model_name}-IterativeSFT") + elif isinstance(args, TrainingArguments) and not isinstance(args, IterativeSFTConfig): + dict_args = args.to_dict() + dict_args["hub_token"] = args.hub_token # to_dict hides the hub_token + dict_args.pop("push_to_hub_token") + args = IterativeSFTConfig(**dict_args) + + # Handle the tokenizer + if processing_class is None: + processing_class = AutoTokenizer.from_pretrained(model_id) + + # Model + if args.model_init_kwargs is not None and not isinstance(model, str): + logger.warning( + "You passed model_init_kwargs to the `IterativeSFTConfig`, but your model is already instantiated. " + "The `model_init_kwargs` will be ignored." + ) + if isinstance(model, str): + model = self._create_model_from_path(model, args) + + # PEFT configuration and model wrapping + if is_peft_available() and isinstance(model, PeftModel): + self.is_peft_model = True + else: + self.is_peft_model = False + + self.processing_class = processing_class + self.is_encoder_decoder = getattr(model.config, "is_encoder_decoder", False) + + if data_collator is None: + if self.is_encoder_decoder: + self.data_collator = DataCollatorForSeq2Seq( + processing_class, label_pad_token_id=-100, pad_to_multiple_of=8 + ) + else: + self.data_collator = DataCollatorForLanguageModeling(self.processing_class, mlm=False) + else: + self.data_collator = data_collator + + self.max_length = args.max_length + self.truncation_mode = args.truncation_mode + self.optimize_device_cache = args.optimize_device_cache + + super().__init__( + model=model, + args=args, + data_collator=self.data_collator, + eval_dataset=eval_dataset, + processing_class=processing_class, + compute_metrics=compute_metrics, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + self.create_optimizer_and_scheduler(self.args.max_steps) + + # prepare model, optimizer and lr_scheduler + self.model, self.optimizer, self.lr_scheduler = self.accelerator.prepare( + self.model, self.optimizer, self.lr_scheduler + ) + + self.processing_class.truncation_side = "left" if self.truncation_mode == "keep_end" else "right" + + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) + + PPODecorators.optimize_device_cache = self.optimize_device_cache + + def _create_model_from_path(self, model_path: str, args: IterativeSFTConfig) -> PreTrainedModel: + """Creates a model from a path or model identifier.""" + model_init_kwargs = args.model_init_kwargs or {} + return AutoModelForCausalLM.from_pretrained(model_path, **model_init_kwargs) + + def prepare_model_inputs(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, labels: torch.Tensor): + if attention_mask is None: + attention_mask = [torch.ones_like(ids) for ids in input_ids] + + if self.is_encoder_decoder: + input_data = self.data_collator( + [ + {"input_ids": ids, "attention_mask": att, "labels": lab} + for ids, att, lab in zip(input_ids, attention_mask, labels) + ] + ).to(self.model.device) + + input_data.pop("decoder_input_ids", None) # This is directly computed inside the model + + input_data["labels"][input_data["labels"] == self.processing_class.pad_token_id] = -100 + + else: + input_data = self.data_collator( + [{"input_ids": ids, "attention_mask": att} for ids, att in zip(input_ids, attention_mask)] + ).to(self.model.device) + + # truncate in case the user has provided input_ids, attention_mask and labels + if self.max_length is not None: + if self.truncation_mode == "keep_start": + input_data = {k: v[: self.max_length] for k, v in input_data.items()} + elif self.truncation_mode == "keep_end": + input_data = {k: v[-self.max_length :] for k, v in input_data.items()} + else: + raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") + + return input_data + + @staticmethod + def _step_safety_checker( + input_ids: list[torch.LongTensor], + attention_mask: list[torch.LongTensor], + labels: list[torch.LongTensor], + texts: list[str], + texts_labels: list[str], + ): + """ + Check if the input data is valid for training. + + Args: + input_ids (list[`torch.LongTensor`]): + List of tensors containing the input_ids + attention_mask (list[`torch.LongTensor`]): + List of tensors containing the attention_mask + labels (list[`torch.FloatTensor`]): + List of tensors containing the labels + texts (list[`str`]): + List of string containing the text input. + texts_labels (list[`str`]): + List of string containing the text labels. + + Returns: + `tuple`: The input data. + """ + if texts is None: + if attention_mask is None: + for name, tensor_list in zip(["input_ids", "labels"], [input_ids, labels]): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + else: + for name, tensor_list in zip( + ["input_ids", "attention_mask", "labels"], [input_ids, attention_mask, labels] + ): + if not isinstance(tensor_list, list): + raise ValueError(f"{name} must be a list of tensors - got {type(tensor_list)}") + if not isinstance(tensor_list[0], torch.Tensor): + raise ValueError(f"Elements in {name} must be tensors - got {type(tensor_list[0])}") + else: + if not isinstance(texts, list): + raise ValueError(f"'text' must be a list of strings - got {type(texts)}") + if not isinstance(texts[0], str): + raise ValueError(f"Elements in 'text' must be strings - got {type(texts[0])}") + if texts_labels is not None: + if not isinstance(texts_labels, list): + raise ValueError(f"'text_labels' must be a list of strings - got {type(texts_labels)}") + if not isinstance(texts_labels[0], str): + raise ValueError(f"Elements in 'text_labels' must be strings - got {type(texts_labels[0])}") + + return input_ids, attention_mask, labels, texts, texts_labels + + @PPODecorators.empty_device_cache() + def step( + self, + input_ids: Optional[list[torch.LongTensor]] = None, + attention_mask: Optional[list[torch.LongTensor]] = None, + labels: Optional[list[torch.LongTensor]] = None, + texts: Optional[list[str]] = None, + texts_labels: Optional[list[str]] = None, + ): + """ + Run an optimisation step given a list of input_ids, attention_mask, and labels or a list of text and + text_labels. + + Args: + input_ids (list[`torch.LongTensor`]): + List of tensors containing the input_ids (if not provided, text will be used) + attention_mask (list[`torch.LongTensor`], , *optional*): + List of tensors containing the attention_mask + labels (list[`torch.FloatTensor`], *optional*): + List of tensors containing the labels (if set to None, will default to input_ids) + texts (list[`str`], *optional*): + List of strings containing the text input (if not provided, input_ids will directly be used) + texts_labels (list[`str`], *optional*): + List of strings containing the text labels (if set to None, will default to text) + + Returns: + `dict[str, Any]`: A summary of the training statistics + """ + self.model.train() + + if self.state.global_step == 0: + self.tr_loss = torch.tensor(0.0).to(self.args.device) + self._globalstep_last_logged = self.state.global_step + + if input_ids is None and texts is None: + raise ValueError("Step should include `input_ids` or `texts` as keyword arguments.") + elif input_ids is not None and texts is not None: + logger.warning( + "Both `input_ids` and `texts` argument are provided. `input_ids` will be ignored. " + "Please provide only one of the two.", + ) + + if labels is None and texts_labels is None and self.is_encoder_decoder: + raise ValueError( + "No 'labels' or 'text_labels' are provided. When using an encoder-decoder architecture, 'labels' or 'text_labels' must be passed." + ) + + # Convert Column to list if not already + input_ids = input_ids[:] if input_ids is not None else None + attention_mask = attention_mask[:] if attention_mask is not None else None + labels = labels[:] if labels is not None else None + texts = texts[:] if texts is not None else None + texts_labels = texts_labels[:] if texts_labels is not None else None + + input_ids, attention_mask, labels, texts, texts_labels = self._step_safety_checker( + input_ids, attention_mask, labels, texts, texts_labels + ) + + if texts is not None: + model_inputs = self.processing_class( + texts, max_length=self.max_length, truncation=True, padding=True, return_tensors="pt" + ) + + input_ids, attention_mask = model_inputs["input_ids"], model_inputs["attention_mask"] + + if texts_labels is not None: + labels = self.processing_class( + texts, max_length=self.max_length, truncation=True, padding=True, return_tensors="pt" + )["input_ids"] + + if labels is None: + labels = input_ids + + model_inputs = self.prepare_model_inputs(input_ids, attention_mask, labels) + + model_inputs_names = list(model_inputs.keys()) + + batch_dict = {} + batch_dict.update(model_inputs) + + def collator(data): + return_dict = dict() + for key in data[0]: + if key in ["input_ids", "attention_mask", "labels"]: + return_dict[key] = torch.stack([d[key] for d in data]).to(self.model.device) + return return_dict + + batch_data = Dataset.from_dict(batch_dict) + batch_data.set_format("torch") + + step_dataloader = DataLoader( + batch_data, + batch_size=self.args.per_device_train_batch_size, + shuffle=True, + collate_fn=collator, + ) + + for _, batch in enumerate(step_dataloader): + with self.accelerator.accumulate(self.model): + model_inputs = {k: batch[k] for k in model_inputs_names} + loss = self.compute_loss(self.model, model_inputs) + + if self.args.n_gpu > 1: + loss = loss.mean() + + tr_loss_step = loss.detach() + + self.accelerator.backward(loss) + + if self.accelerator.sync_gradients and self.args.max_grad_norm is not None: + self.accelerator.clip_grad_norm_( + self.model.parameters(), + self.args.max_grad_norm, + ) + + self.optimizer.step() + self.optimizer.zero_grad() + if self.lr_scheduler is not None: + self.lr_scheduler.step() + + self.state.global_step += 1 + + # update stats etc + self.tr_loss += tr_loss_step + + self._maybe_log_save_evaluate() + + def _maybe_log_save_evaluate(self): + # check if eval is required + if self.args.eval_steps is not None: + if self.state.global_step % self.args.eval_steps == 0 and self.state.global_step != 0: + self.evaluate(self.eval_dataset) + + # check if logging is required + if self.args.logging_steps is not None: + if self.state.global_step % self.args.logging_steps == 0 and self.state.global_step != 0: + logs: dict[str, float] = {} + + tr_loss_scalar = self._nested_gather(self.tr_loss).mean().item() + + # reset tr_loss to zero + self.tr_loss -= self.tr_loss + + logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4) + logs["learning_rate"] = self._get_learning_rate() + + self._globalstep_last_logged = self.state.global_step + + self.log(logs) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="Iterative SFT", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothIterativeSFTTrainer(_UnslothIterativeSFTTrainer): + """ + +The IterativeSFTTrainer can be used to finetune models with methods that requires some steps between optimization. + + + +The [`IterativeSFTTrainer`] is deprecated and will be removed in version 0.24.0. Please use the [`SFTTrainer`]. + + + +Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + args ([`IterativeSFTConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + data_collator (`DataCollator`, *optional*): + Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`. + Will default to [`~transformers.default_data_collator`] if no `processing_class` is provided, an instance + of [`~transformers.DataCollatorWithPadding`] otherwise if the processing_class is a feature extractor or + tokenizer. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If `None`, the processing class is loaded from the model's name + with [`~transformers.AutoTokenizer.from_pretrained`]. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + + """ + def __init__( + self, + model, + args = None, + data_collator = None, + eval_dataset = None, + processing_class = None, + preprocess_logits_for_metrics = None, + compute_metrics = None, + **kwargs + ): + if args is None: args = UnslothIterativeSFTConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('iterative_sft_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + args = args, + data_collator = data_collator, + eval_dataset = eval_dataset, + processing_class = processing_class, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + compute_metrics = compute_metrics,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothKTOTrainer.py b/unsloth_compiled_cache/UnslothKTOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..6226aa246255dcc18933e3bc5c236ca4fefe513f --- /dev/null +++ b/unsloth_compiled_cache/UnslothKTOTrainer.py @@ -0,0 +1,2289 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.kto_trainer import (Any, AutoModelForCausalLM, BaseImageProcessor, Callable, DPODataCollatorWithPadding, DataCollator, DataLoader, Dataset, EvalLoopOutput, F, FeatureExtractionMixin, KTOConfig, KTOTrainer, Literal, Optional, PartialState, Path, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SequentialSampler, Trainer, TrainerCallback, TrainingArguments, Union, _get_kl_dataset, _process_tokens, _tokenize, autocast, concatenate_datasets, contextmanager, create_reference_model, defaultdict, disable_dropout_in_model, generate_model_card, get_comet_experiment_url, has_length, inspect, is_comet_available, is_liger_kernel_available, is_peft_available, is_wandb_available, itemgetter, log_table_to_comet_experiment, logger, logging, maybe_apply_chat_template, maybe_extract_prompt, maybe_unpair_preference_dataset, nn, np, nullcontext, os, pad_to_length, pd, peft_module_casting_to_bf16, prepare_deepspeed, prepare_model_for_kbit_training, random, selective_log_softmax, textwrap, torch, tqdm, F, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothKTOConfig(KTOConfig): + """ + +Configuration class for the [`KTOTrainer`]. + +This class includes only the parameters that are specific to KTO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want + to use the default data collator. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. This argument is required if you want to use the default data collator. + max_completion_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the completion. This argument is required if you want to use the default data collator + and your model is an encoder-decoder. + beta (`float`, *optional*, defaults to `0.1`): + Parameter controlling the deviation from the reference model. Higher β means less deviation from the + reference model. + loss_type (`str`, *optional*, defaults to `"kto"`): + Type of loss to use. Possible values are: + + - `"kto"`: KTO loss from the [KTO](https://huggingface.co/papers/2402.01306) paper. + - `"apo_zero_unpaired"`: Unpaired variant of APO-zero loss from the + [APO](https://huggingface.co/papers/2408.06266) paper. + + desirable_weight (`float`, *optional*, defaults to `1.0`): + Desirable losses are weighed by this factor to counter unequal number of desirable and undesirable paris. + undesirable_weight (`float`, *optional*, defaults to `1.0`): + Undesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs. + label_pad_token_id (`int`, *optional*, defaults to `-100`): + Label pad token id. This argument is required if you want to use the default data collator. + padding_value (`int` or `None`, *optional*, defaults to `None`): + Padding value to use. If `None`, the padding value of the tokenizer is used. + truncation_mode (`str`, *optional*, defaults to `"keep_end"`): + Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`. + This argument is required if you want to use the default data collator. + generate_during_eval (`bool`, *optional*, defaults to `False`): + If `True`, generates and logs completions from both the model and the reference model to W&B or Comet + during evaluation. + is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`): + When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument, + you need to specify if the model returned by the callable is an encoder-decoder model. + precompute_ref_log_probs (`bool`, *optional*, defaults to `False`): + Whether to precompute reference model log probabilities for training and evaluation datasets. This is + useful when training without the reference model to reduce the total GPU memory needed. + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a + string. + ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model + from a string. + dataset_num_proc: (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model and reference model. + use_liger_loss (`bool`, *optional*, defaults to `False`): + Whether to use Liger loss. It requires liger-kernel to be installed. + base_model_attribute_name (`str`, *optional*, defaults to `"model"`): + Name of the attribute in the model that contains the base model. This is used to get the base model from + the model when the model does not have a `get_decoder` method in the case when `use_liger_loss` is `True`. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + max_length = 1024, + max_prompt_length = 512, + max_completion_length = None, + beta = 0.1, + loss_type = 'kto', + desirable_weight = 1.0, + undesirable_weight = 1.0, + label_pad_token_id = -100, + padding_value = None, + truncation_mode = 'keep_end', + generate_during_eval = False, + is_encoder_decoder = None, + disable_dropout = True, + precompute_ref_log_probs = False, + model_init_kwargs = None, + ref_model_init_kwargs = None, + dataset_num_proc = None, + use_liger_loss = False, + base_model_attribute_name = 'model', + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + max_length = max_length, + max_prompt_length = max_prompt_length, + max_completion_length = max_completion_length, + beta = beta, + loss_type = loss_type, + desirable_weight = desirable_weight, + undesirable_weight = undesirable_weight, + label_pad_token_id = label_pad_token_id, + padding_value = padding_value, + truncation_mode = truncation_mode, + generate_during_eval = generate_during_eval, + is_encoder_decoder = is_encoder_decoder, + disable_dropout = disable_dropout, + precompute_ref_log_probs = precompute_ref_log_probs, + model_init_kwargs = model_init_kwargs, + ref_model_init_kwargs = ref_model_init_kwargs, + dataset_num_proc = dataset_num_proc, + use_liger_loss = use_liger_loss, + base_model_attribute_name = base_model_attribute_name,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothKTOTrainer(Trainer): + r""" + Initialize KTOTrainer. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + args (`KTOConfig`): + The arguments to use for training. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + data_collator (`transformers.DataCollator`, *optional*, defaults to `None`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + model_adapter_name (`str`, defaults to `None`): + Name of the train target PEFT adapter, when using LoRA with multiple adapters. + ref_adapter_name (`str`, defaults to `None`): + Name of the reference PEFT adapter, when using LoRA with multiple adapters. + """ + + _tag_names = ["trl", "kto"] + + def __init__( + self, + model: Union[PreTrainedModel, nn.Module, str] = None, + ref_model: Optional[Union[PreTrainedModel, nn.Module, str]] = None, + args: KTOConfig = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + data_collator: Optional[DataCollator] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional[dict] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None, + model_adapter_name: Optional[str] = None, + ref_adapter_name: Optional[str] = None, + ): + if type(args) is TrainingArguments: + raise ValueError("Please use `KTOConfig` instead TrainingArguments.") + + if not isinstance(model, str) and ref_model is model: + raise ValueError( + "`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the " + "same as `model`, you must mass a copy of it, or `None` if you use peft." + ) + + if args.model_init_kwargs is None: + model_init_kwargs = {} + elif not isinstance(model, str): + raise ValueError("You passed model_kwargs to the KTOTrainer. But your model is already instantiated.") + else: + model_init_kwargs = args.model_init_kwargs + dtype = model_init_kwargs.get("dtype") + if dtype is not None: + # Convert to `torch.dtype` if an str is passed + if isinstance(dtype, str) and dtype != "auto": + dtype = getattr(torch, dtype) + if dtype != "auto" and not isinstance(dtype, torch.dtype): + raise ValueError( + f"Invalid `dtype` passed to the KTOConfig. Expected a string with either `torch.dtype` or 'auto', but got {dtype}." + ) + model_init_kwargs["dtype"] = dtype + + if args.ref_model_init_kwargs is None: + ref_model_init_kwargs = {} + elif not isinstance(ref_model, str): + raise ValueError( + "You passed ref_model_kwargs to the KTOTrainer. But your ref_model is already instantiated." + ) + else: + ref_model_init_kwargs = args.ref_model_init_kwargs + dtype = ref_model_init_kwargs.get("dtype") + if dtype is not None: + # Convert to `torch.dtype` if an str is passed + if isinstance(dtype, str) and dtype != "auto": + dtype = getattr(torch, dtype) + if dtype != "auto" and not isinstance(dtype, torch.dtype): + raise ValueError( + f"Invalid `dtype` passed to the KTOConfig. Expected a string with either `torch.dtype` or 'auto', but got {dtype}." + ) + ref_model_init_kwargs["dtype"] = dtype + + if isinstance(model, str): + model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs) + + if isinstance(ref_model, str): + ref_model = AutoModelForCausalLM.from_pretrained(ref_model, **ref_model_init_kwargs) + + # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` + # has been called in order to properly call autocast if needed. + self._peft_has_been_casted_to_bf16 = False + + if not is_peft_available() and peft_config is not None: + raise ValueError( + "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it with `pip install peft` to use the PEFT models" + ) + elif is_peft_available() and peft_config is not None: + # if model is a peft model and we have a peft_config, we merge and unload it first + if isinstance(model, PeftModel): + model = model.merge_and_unload() + + if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): + _support_gc_kwargs = hasattr( + args, "gradient_checkpointing_kwargs" + ) and "gradient_checkpointing_kwargs" in list( + inspect.signature(prepare_model_for_kbit_training).parameters + ) + + prepare_model_kwargs = {"use_gradient_checkpointing": args.gradient_checkpointing} + + if _support_gc_kwargs: + prepare_model_kwargs["gradient_checkpointing_kwargs"] = args.gradient_checkpointing_kwargs + + model = prepare_model_for_kbit_training(model, **prepare_model_kwargs) + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + # get peft model with the given config + model = model + if args.bf16 and getattr(model, "is_loaded_in_4bit", False): + peft_module_casting_to_bf16(model) + # If args.bf16 we need to explicitly call `generate` with torch amp autocast context manager + self._peft_has_been_casted_to_bf16 = True + + # For models that use gradient_checkpointing, we need to attach a hook that enables input + # to explicitly have `requires_grad=True`, otherwise training will either silently + # fail or completely fail. + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + if args.generate_during_eval and not (is_wandb_available() or is_comet_available()): + raise ValueError( + "`generate_during_eval=True` requires Weights and Biases or Comet to be installed." + " Please install `wandb` or `comet-ml` to resolve." + ) + + if model is not None: + self.is_encoder_decoder = model.config.is_encoder_decoder + elif args.is_encoder_decoder is None: + raise ValueError("When no model is provided, you need to pass the parameter is_encoder_decoder.") + else: + self.is_encoder_decoder = args.is_encoder_decoder + + self.is_peft_model = is_peft_available() and isinstance(model, PeftModel) + self.model_adapter_name = model_adapter_name + self.ref_adapter_name = ref_adapter_name + + if ref_model: + self.ref_model = ref_model + elif self.is_peft_model or args.precompute_ref_log_probs: + # The `model` with adapters turned off will be used as the reference model + self.ref_model = None + else: + self.ref_model = create_reference_model(model) + + if processing_class is None: + raise ValueError( + "max_length or a processing_class must be specified when using the default DPODataCollatorWithPadding" + ) + if args.max_length is None: + logger.warning( + "When using DPODataCollatorWithPadding, you should set `max_length` in the KTOTrainer's init" + " it will be set to `512` by default, but you should do it yourself in the future.", + ) + max_length = 512 + if args.max_length is not None: + max_length = args.max_length + + if args.max_prompt_length is None: + logger.warning( + "When using DPODataCollatorWithPadding, you should set `max_prompt_length` in the KTOTrainer's init" + " it will be set to `128` by default, but you should do it yourself in the future.", + ) + max_prompt_length = 128 + if args.max_prompt_length is not None: + max_prompt_length = args.max_prompt_length + + max_completion_length = None + if args.max_completion_length is None and self.is_encoder_decoder: + logger.warning( + "When using DPODataCollatorWithPadding with an encoder decoder architecture, you should set `max_completion_length` in the KTOTrainer's init" + " it will be set to `128` by default, but you should do it yourself in the future.", + ) + max_completion_length = 128 + if args.max_completion_length is not None and self.is_encoder_decoder: + max_completion_length = args.max_completion_length + + if data_collator is None: + data_collator = DPODataCollatorWithPadding( + pad_token_id=processing_class.pad_token_id, + label_pad_token_id=args.label_pad_token_id, + is_encoder_decoder=self.is_encoder_decoder, + ) + + if args.remove_unused_columns: + args.remove_unused_columns = False + # warn users + logger.warning( + "When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your KTOConfig" + " we have set it for you, but you should do it yourself in the future.", + ) + + self.use_dpo_data_collator = True + else: + self.use_dpo_data_collator = False + + # Disable dropout in the model and reference model + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) + + self.loss_type = args.loss_type + self.max_length = max_length + self.generate_during_eval = args.generate_during_eval + self.label_pad_token_id = args.label_pad_token_id + self.padding_value = args.padding_value if args.padding_value is not None else processing_class.pad_token_id + self.max_prompt_length = max_prompt_length + self.truncation_mode = args.truncation_mode + self.max_completion_length = max_completion_length + self.processing_class = processing_class + self.precompute_ref_log_probs = args.precompute_ref_log_probs + + # Not all losses require a KL calculation + self.calculate_KL = True + if self.loss_type in ["apo_zero_unpaired"]: + self.calculate_KL = False + + # Since ref_logs are precomputed on the first call to get_train/eval_dataloader + # keep track of first called to avoid computation of future calls + self._precomputed_train_ref_log_probs = False + self._precomputed_eval_ref_log_probs = False + + # metric + self._stored_metrics = defaultdict(lambda: defaultdict(list)) + + # KTO parameter + self.beta = args.beta + self.desirable_weight = args.desirable_weight + self.undesirable_weight = args.undesirable_weight + self.aux_loss_enabled = getattr(model.config, "output_router_logits", False) + self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0) + if self.aux_loss_enabled and self.aux_loss_coef == 0.0: + logger.warning( + "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to " + "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value " + "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary " + "loss.", + ) + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in KTO, the sampled data does not include the + # "input_ids" key. Instead, the available keys are "prompt_input_ids" and "completion_input_ids". As a result, + # the trainer issues the warning: "Could not estimate the number of tokens of the input, floating-point + # operations will not be computed." To suppress this warning, we set the "estimate_tokens" key in the model's + # "warnings_issued" dictionary to True. This acts as a flag to indicate that the warning has already been + # issued. + model.warnings_issued["estimate_tokens"] = True + + # Compute that only on the main process for faster data processing. + # see: https://github.com/huggingface/trl/pull/1255 + with PartialState().main_process_first(): + # Extract the prompt if needed + train_dataset = train_dataset.map( + maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from train dataset" + ) + # Unpair the dataset if needed + train_dataset = maybe_unpair_preference_dataset( + train_dataset, args.dataset_num_proc, desc="Unpairing train dataset" + ) + # Apply the chat template if needed + train_dataset = train_dataset.map( + maybe_apply_chat_template, + fn_kwargs={"tokenizer": processing_class}, + num_proc=args.dataset_num_proc, + desc="Applying chat template to train dataset", + ) + if eval_dataset is not None: + eval_dataset = eval_dataset.map( + maybe_extract_prompt, num_proc=args.dataset_num_proc, desc="Extracting prompt from eval dataset" + ) + eval_dataset = maybe_unpair_preference_dataset( + eval_dataset, args.dataset_num_proc, desc="Unpairing eval dataset" + ) + eval_dataset = eval_dataset.map( + maybe_apply_chat_template, + fn_kwargs={"tokenizer": processing_class}, + num_proc=args.dataset_num_proc, + desc="Applying chat template to eval dataset", + ) + + # Tokenize and prepare the training datasets + train_dataset = train_dataset.map( + _tokenize, + batched=True, + fn_kwargs={"tokenizer": self.processing_class}, + num_proc=args.dataset_num_proc, + desc="Tokenizing train dataset", + ) + + fn_kwargs = { + "prefix": "", + "is_encoder_decoder": self.is_encoder_decoder, + "tokenizer": self.processing_class, + "max_length": self.max_length, + "truncation_mode": self.truncation_mode, + "label_pad_token_id": self.label_pad_token_id, + "max_prompt_length": self.max_prompt_length, + "max_completion_length": self.max_completion_length, + } + + train_dataset = train_dataset.map( + _process_tokens, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + desc="Processing tokenized train dataset", + ) + + # Tokenize and prepare the eval datasets + if eval_dataset is not None: + eval_dataset = eval_dataset.map( + _tokenize, + fn_kwargs={"tokenizer": self.processing_class}, + batched=True, + num_proc=args.dataset_num_proc, + desc="Tokenizing eval dataset", + ) + + eval_dataset = eval_dataset.map( + _process_tokens, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + desc="Processing tokenized eval dataset", + ) + + # Get KL datasets if needed + if self.calculate_KL: + if args.per_device_train_batch_size <= 1: + raise ValueError( + "Actual (not effective) batch size must be > 1. KTO will not work properly because the KL term will be equivalent to the implied reward." + ) + + # create pairs for estimating the KL term by flipping the matched pairs in each batch of size total_batch_size + # i.e., [x_1, y_1], ..., [x_n, y_n] --> [x_1, y_n], ..., [x_n, y_1] = [x'_1, y'_1], ..., [x'_n, y'_n] + train_kl_dataset = train_dataset.map( + _get_kl_dataset, + batched=True, + batch_size=args.per_device_train_batch_size, + num_proc=args.dataset_num_proc, + desc="Extracting KL train dataset", + ) + + fn_kwargs["prefix"] = "KL_" + train_kl_dataset = train_kl_dataset.map( + _process_tokens, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + remove_columns=[c for c in train_kl_dataset.column_names if c in train_dataset.column_names], + desc="Processing tokenized train KL dataset", + ) + + # merge the datasets + train_dataset = concatenate_datasets([train_dataset, train_kl_dataset], axis=1) + + if eval_dataset is not None: + # Get KL dataset + eval_kl_dataset = eval_dataset.map( + _get_kl_dataset, + batched=True, + batch_size=args.per_device_train_batch_size, + num_proc=args.dataset_num_proc, + desc="Extracting eval KL dataset", + ) + + eval_kl_dataset = eval_kl_dataset.map( + _process_tokens, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + remove_columns=[c for c in eval_kl_dataset.column_names if c in eval_dataset.column_names], + desc="Processing tokenized eval KL dataset", + ) + + # merge the datasets + eval_dataset = concatenate_datasets([eval_dataset, eval_kl_dataset], axis=1) + + # calculate dataset desirability balance + num_desirable = max(sum(train_dataset["label"]), 1) + num_undesirable = max(len(train_dataset["label"]) - num_desirable, 1) # "label" is binary + + if num_desirable != num_undesirable: + # The lower and upper bounds come from Eq. [8] of https://huggingface.co/papers/2402.01306 + des_weight_lower_bound = round((num_undesirable * self.undesirable_weight / num_desirable) * 1, 2) + des_weight_upper_bound = round((num_undesirable * self.undesirable_weight / num_desirable) * 1.33, 2) + und_weight_lower_bound = round((num_desirable * self.desirable_weight / num_undesirable) / 1.33, 2) + und_weight_upper_bound = round((num_desirable * self.desirable_weight / num_undesirable) / 1, 2) + + des_weight_in_range = des_weight_lower_bound <= self.desirable_weight <= des_weight_upper_bound + und_weight_in_range = und_weight_lower_bound <= self.undesirable_weight <= und_weight_upper_bound + + if not (des_weight_in_range or und_weight_in_range): + logger.warning( + "You have different amounts of desirable/positive and undesirable/negative examples but the " + "weights on the desirable and undesirable losses don't seem to be in an ideal range. Based " + f"on your data, we recommend EITHER " + f"desirable_weight in [{des_weight_lower_bound}, {des_weight_upper_bound}] or " + f"undesirable_weight in [{und_weight_lower_bound}, {und_weight_upper_bound}] (but NOT BOTH). " + "See the documentation on how to optimally set these weights.", + ) + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + model_init=model_init, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) + + # Deepspeed Zero-3 does not support precompute_ref_log_probs + if self.is_deepspeed_enabled: + if self.accelerator.state.deepspeed_plugin.zero_stage == 3 and self.precompute_ref_log_probs: + raise ValueError( + "You cannot use `precompute_ref_log_probs=True` with Deepspeed ZeRO-3. Please set `precompute_ref_log_probs=False`." + ) + + if self.ref_model is None: + if not (self.is_peft_model or self.precompute_ref_log_probs): + raise ValueError( + "No reference model and model is not a Peft model. Try setting `precompute_ref_log_probs=True`" + ) + else: + if self.is_deepspeed_enabled: + self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator) + else: + self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + + # Import Liger loss if enabled + if self.args.use_liger_loss: + if not is_liger_kernel_available(): + raise ImportError( + "You set `use_liger_loss=True` but the liger kernel is not available. " + "Please install liger-kernel first: `pip install liger-kernel`" + ) + if self.loss_type in ["apo_zero_unpaired"]: + raise ValueError( + "You cannot set `loss_type='apo_zero_unpaired'` with liger-kernel." + "Only KTO loss is supported with liger-kernel." + ) + if self.precompute_ref_log_probs: + raise ValueError( + "You cannot use `precompute_ref_log_probs=True` with liger kernel. Please set " + "`precompute_ref_log_probs=False`." + ) + if self.is_peft_model or self.ref_adapter_name is not None: + raise ValueError( + "You cannot use `use_liger_loss=True` with Peft models. Please set `use_liger_loss=False`." + ) + self.kto_loss_fn = LigerFusedLinearKTOLoss( + ignore_index=self.label_pad_token_id, beta=self.beta, use_ref_model=(self.ref_model is not None) + ) + + @contextmanager + def null_ref_context(self): + """Context manager for handling null reference model (that is, peft adapter manipulation).""" + with ( + self.accelerator.unwrap_model(self.model).disable_adapter() + if self.is_peft_model and not self.ref_adapter_name + else nullcontext() + ): + if self.ref_adapter_name: + self.model.set_adapter(self.ref_adapter_name) + yield + if self.ref_adapter_name: + self.model.set_adapter(self.model_adapter_name or "default") + + def get_train_dataloader(self) -> DataLoader: + """ + Returns the training [`~torch.utils.data.DataLoader`]. + + Subclass of transformers.src.transformers.trainer.get_train_dataloader to precompute `ref_log_probs`. + """ + + if self.precompute_ref_log_probs and not self._precomputed_train_ref_log_probs: + dataloader_params = { + "batch_size": self.args.per_device_train_batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(self.train_dataset, **dataloader_params)) + reference_completion_logps = [] + reference_KL_logps = [] + + for padded_batch in tqdm(iterable=data_loader, desc="Train dataset reference log probs"): + reference_completion_logp, reference_KL_logp = self.compute_reference_log_probs(padded_batch) + + reference_completion_logp = self.accelerator.gather_for_metrics(reference_completion_logp) + reference_completion_logps.append(reference_completion_logp.cpu()) + + if self.calculate_KL: + reference_KL_logp = self.accelerator.gather_for_metrics(reference_KL_logp) + reference_KL_logps.append(reference_KL_logp.cpu()) + + self.train_dataset = self.train_dataset.add_column( + name="reference_logps", column=torch.cat(reference_completion_logps).float().numpy() + ) + + if self.calculate_KL: + self.train_dataset = self.train_dataset.add_column( + name="reference_KL_logps", column=torch.cat(reference_KL_logps).float().numpy() + ) + + self._precomputed_train_ref_log_probs = True + + return super().get_train_dataloader() + + def get_eval_dataloader(self, eval_dataset: Optional[Dataset] = None) -> DataLoader: + """ + Returns the evaluation [`~torch.utils.data.DataLoader`]. + + Subclass of transformers.src.transformers.trainer.get_eval_dataloader to precompute `ref_log_probs`. + + Args: + eval_dataset (`torch.utils.data.Dataset`, *optional*): + If provided, will override `self.eval_dataset`. If it is a [`~datasets.Dataset`], columns not accepted + by the `model.forward()` method are automatically removed. It must implement `__len__`. + """ + if eval_dataset is None and self.eval_dataset is None: + raise ValueError("Trainer: evaluation requires an eval_dataset.") + eval_dataset = eval_dataset if eval_dataset is not None else self.eval_dataset + + if self.precompute_ref_log_probs and not self._precomputed_eval_ref_log_probs: + dataloader_params = { + "batch_size": self.args.per_device_eval_batch_size, + "collate_fn": self.data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "shuffle": False, + } + + # prepare dataloader + data_loader = self.accelerator.prepare(DataLoader(eval_dataset, **dataloader_params)) + + reference_completion_logps = [] + reference_KL_logps = [] + + for padded_batch in tqdm(iterable=data_loader, desc="Eval dataset reference log probs"): + reference_completion_logp, reference_KL_logp = self.compute_reference_log_probs(padded_batch) + + reference_completion_logp = self.accelerator.gather_for_metrics(reference_completion_logp) + reference_completion_logps.append(reference_completion_logp.cpu()) + + if self.calculate_KL: + reference_KL_logp = self.accelerator.gather_for_metrics(reference_KL_logp) + reference_KL_logps.append(reference_KL_logp.cpu()) + + eval_dataset = eval_dataset.add_column( + name="reference_logps", column=torch.cat(reference_completion_logps).float().numpy() + ) + if self.calculate_KL: + eval_dataset = eval_dataset.add_column( + name="reference_KL_logps", column=torch.cat(reference_KL_logps).float().numpy() + ) + + # Save calculated reference_chosen_logps and reference_rejected_logps to the eval_dataset for subsequent runs + if self.eval_dataset is not None: + self.eval_dataset = eval_dataset + self._precomputed_eval_ref_log_probs = True + + return super().get_eval_dataloader(eval_dataset=eval_dataset) + + def compute_reference_log_probs(self, padded_batch: dict) -> dict: + """Computes log probabilities of the reference model for a single padded batch of a KTO specific dataset.""" + with torch.no_grad(): + if self.ref_model is None: + with self.null_ref_context(): + if self.is_encoder_decoder: + completion_logits = self.model( + padded_batch["prompt_input_ids"], + attention_mask=padded_batch["prompt_attention_mask"], + decoder_input_ids=padded_batch.get("completion_decoder_input_ids"), + labels=padded_batch["completion_labels"], + ).logits + + if self.calculate_KL: + KL_logits = self.model( + padded_batch["KL_prompt_input_ids"], + attention_mask=padded_batch["KL_prompt_attention_mask"], + decoder_input_ids=padded_batch.get("KL_completion_decoder_input_ids"), + labels=padded_batch["KL_completion_labels"], + ).logits + else: + completion_logits = self.model( + padded_batch["completion_input_ids"], + attention_mask=padded_batch["completion_attention_mask"], + ).logits + + if self.calculate_KL: + KL_logits = self.model( + padded_batch["KL_completion_input_ids"], + attention_mask=padded_batch["KL_completion_attention_mask"], + ).logits + else: + if self.is_encoder_decoder: + completion_logits = self.ref_model( + padded_batch["prompt_input_ids"], + attention_mask=padded_batch["prompt_attention_mask"], + decoder_input_ids=padded_batch.get("completion_decoder_input_ids"), + labels=padded_batch["completion_labels"], + ).logits + + if self.calculate_KL: + KL_logits = self.ref_model( + padded_batch["KL_prompt_input_ids"], + attention_mask=padded_batch["KL_prompt_attention_mask"], + decoder_input_ids=padded_batch.get("KL_completion_decoder_input_ids"), + labels=padded_batch["KL_completion_labels"], + ).logits + else: + completion_logits = self.ref_model( + padded_batch["completion_input_ids"], attention_mask=padded_batch["completion_attention_mask"] + ).logits + + if self.calculate_KL: + KL_logits = self.ref_model( + padded_batch["KL_completion_input_ids"], + attention_mask=padded_batch["KL_completion_attention_mask"], + ).logits + + completion_logps = self.get_batch_logps( + completion_logits, + padded_batch["completion_labels"], + average_log_prob=False, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + + if self.calculate_KL: + KL_logps = self.get_batch_logps( + KL_logits, + padded_batch["KL_completion_labels"], + average_log_prob=False, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + else: + KL_logps = None + + return completion_logps, KL_logps + + @staticmethod + def get_batch_logps( + logits: torch.FloatTensor, + labels: torch.LongTensor, + average_log_prob: bool = False, + label_pad_token_id: int = -100, + is_encoder_decoder: bool = False, + ) -> torch.FloatTensor: + """Compute the log probabilities of the given labels under the given logits. + + Args: + logits: + Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size) + labels: + Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are + ignored. Shape: (batch_size, sequence_length) + average_log_prob: + If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the + log probabilities of the (non-masked) tokens. + label_pad_token_id: + The label value to ignore when computing log probabilities. + is_encoder_decoder: + Whether the model is an encoder-decoder model. If True, the labels are not shifted and the logits are + assumed to already be aligned with the labels. If False, the labels are shifted to the right by one + position, and the logits are assumed to be aligned with the shifted labels. + + Returns: + A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the + given logits. + """ + if logits.shape[:-1] != labels.shape: + raise ValueError("Logits (batch and sequence length dim) and labels must have the same shape.") + + if not is_encoder_decoder: + labels = labels[:, 1:].clone() + logits = logits[:, :-1, :] + else: + # Fixes end-dec RuntimeError + labels = labels.clone() + + loss_mask = labels != label_pad_token_id + + # dummy token; we'll ignore the losses on these tokens later + labels[labels == label_pad_token_id] = 0 + + per_token_logps = selective_log_softmax(logits, labels) + + if average_log_prob: + return (per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1) + else: + return (per_token_logps * loss_mask).sum(-1) + + def forward( + self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]] + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + KL_logps = self._compute_kl_logps(model, batch) + + model_kwargs = ( + { + "labels": batch["completion_labels"], + "decoder_input_ids": batch.get("completion_decoder_input_ids"), + } + if self.is_encoder_decoder + else {} + ) + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + outputs = model( + batch["completion_input_ids"], + attention_mask=batch["completion_attention_mask"], + **model_kwargs, + ) + completion_logits = outputs.logits + + completion_logps = self.get_batch_logps( + completion_logits, + batch["completion_labels"], + average_log_prob=False, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + + if completion_logps.shape[0] != len(batch["label"]): + raise ValueError( + "There is a mismatch between the number of examples in this batch and the number of " + "examples for which an output sequence was predicted." + ) + + chosen_idx = [i for i in range(completion_logps.shape[0]) if batch["label"][i] is True] + rejected_idx = [i for i in range(completion_logps.shape[0]) if batch["label"][i] is False] + + chosen_logps = completion_logps[chosen_idx, ...] + rejected_logps = completion_logps[rejected_idx, ...] + + chosen_logits = completion_logits[chosen_idx, ...] + rejected_logits = completion_logits[rejected_idx, ...] + + if self.aux_loss_enabled: + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, KL_logps, outputs.aux_loss) + else: + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, KL_logps) + + def kto_loss( + self, + policy_chosen_logps: torch.FloatTensor, + policy_rejected_logps: torch.FloatTensor, + policy_KL_logps: torch.FloatTensor, + reference_chosen_logps: torch.FloatTensor, + reference_rejected_logps: torch.FloatTensor, + reference_KL_logps: torch.FloatTensor, + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Compute the KTO loss for a batch of policy and reference model log probabilities. + + Args: + policy_chosen_logps: + Log probabilities of the policy model for the chosen responses. Shape: (num(chosen) in batch_size,) + policy_rejected_logps: + Log probabilities of the policy model for the rejected responses. Shape: (num(rejected) in batch_size,) + policy_KL_logps: Log probabilities of the policy model for the KL responses. Shape: (batch_size,) + reference_chosen_logps: + Log probabilities of the reference model for the chosen responses. Shape: (num(chosen) in batch_size,) + reference_rejected_logps: + Log probabilities of the reference model for the rejected responses. Shape: (num(rejected) in + batch_size,) + reference_KL_logps: Log probabilities of the reference model for the KL responses. Shape: (batch_size,) + + Returns: + A tuple of four tensors: (losses, chosen_rewards, rejected_rewards, KL). The losses tensor contains the KTO + loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for + the chosen and rejected responses, respectively. The KL tensor contains the detached KL divergence estimate + between the policy and reference models. + """ + if self.calculate_KL: + kl = (policy_KL_logps - reference_KL_logps).mean().detach() + kl = self.accelerator.gather_for_metrics(kl).mean().clamp(min=0) + else: + kl = torch.zeros(1).to(policy_chosen_logps.device) + + # Chosen losses + if policy_chosen_logps.shape[0] != 0 or reference_chosen_logps.shape[0] != 0: + chosen_logratios = policy_chosen_logps - reference_chosen_logps + + if self.loss_type == "kto": + # Eqn (7) of the KTO paper (https://huggingface.co/papers/2402.01306) + chosen_losses = 1 - F.sigmoid(self.beta * (chosen_logratios - kl)) + elif self.loss_type == "apo_zero_unpaired": + # Unpaired variant of Eqn (7) of the APO paper (https://huggingface.co/papers/2408.06266) + # Use this loss when you believe the chosen outputs are better than your model's default output + chosen_losses = 1 - F.sigmoid(self.beta * chosen_logratios) + + chosen_rewards = self.beta * chosen_logratios.detach() + + else: + # lists can't be empty -- if they are, then accelerate.gather will hang + chosen_losses = torch.Tensor([]).to(self.accelerator.device) + chosen_rewards = torch.Tensor([]).to(self.accelerator.device) + + # Rejected losses + if policy_rejected_logps.shape[0] != 0 or reference_rejected_logps.shape[0] != 0: + rejected_logratios = policy_rejected_logps - reference_rejected_logps + + if self.loss_type == "kto": + rejected_losses = 1 - F.sigmoid(self.beta * (kl - rejected_logratios)) + elif self.loss_type == "apo_zero_unpaired": + rejected_losses = F.sigmoid(self.beta * rejected_logratios) + + rejected_rewards = self.beta * rejected_logratios.detach() + else: + # lists can't be empty -- if they are, then accelerate.gather will hang + rejected_losses = torch.Tensor([]).to(self.accelerator.device) + rejected_rewards = torch.Tensor([]).to(self.accelerator.device) + + losses = torch.cat( + (self.desirable_weight * chosen_losses, self.undesirable_weight * rejected_losses), + 0, + ) + + return losses, chosen_rewards, rejected_rewards, kl + + def _compute_kl_logps(self, model, batch): + """Compute KL log probabilities for a given batch.""" + KL_logps = None + if self.calculate_KL: + if self.is_encoder_decoder: + KL_model_kwargs = { + "input_ids": batch["KL_prompt_input_ids"], + "attention_mask": batch["KL_prompt_attention_mask"], + "labels": batch["KL_completion_labels"], + "decoder_input_ids": batch.get("KL_completion_decoder_input_ids"), + } + else: + KL_model_kwargs = { + "input_ids": batch["KL_completion_input_ids"], + "attention_mask": batch["KL_completion_attention_mask"], + } + + with torch.no_grad(): + KL_logits = model(**KL_model_kwargs).logits + + KL_logps = self.get_batch_logps( + KL_logits, + batch["KL_completion_labels"], + average_log_prob=False, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + return KL_logps + + def _compute_loss_liger(self, model, batch): + """ + Compute the KTO loss using the Liger-Kernel's LigerFusedLinearKTOLoss. + + Args: + model: + The policy model used for generating log probabilities and outputs. It could be an encoder-decoder + model or a regular language model. + batch: A dictionary containing the input data and labels for the batch. + + Returns: + A dictionary containing the following keys: + - "loss": The computed KTO loss for the batch. + - "chosen_logits_sum": Sum of the logits for the chosen responses from the policy model. + - "rejected_logits_sum": Sum of the logits for the rejected responses from the policy model. + - "chosen_logps": Log probabilities of the chosen responses from the policy model. + - "rejected_logps": Log probabilities of the rejected responses from the policy model. + - "chosen_rewards": Rewards for the chosen responses. + - "rejected_rewards": Rewards for the rejected responses. + - "kl": The KL divergence between the policy and reference models (detached). + + If auxiliary loss is enabled, the dictionary will also include: + - "aux_loss": The auxiliary loss from the model outputs. + """ + policy_KL_logps = self._compute_kl_logps(model, batch) + reference_KL_logps = self._compute_kl_logps(self.ref_model, batch) + if self.calculate_KL: + kl = (policy_KL_logps - reference_KL_logps).mean().detach() + kl = self.accelerator.gather_for_metrics(kl).mean().clamp(min=0) + else: + kl = torch.zeros(1).to(self.accelerator.device) + + model_kwargs = ( + { + "labels": batch["completion_labels"], + "decoder_input_ids": batch.get("completion_decoder_input_ids"), + } + if self.is_encoder_decoder + else {} + ) + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + if self.is_encoder_decoder: + # 1. Get encoder outputs + encoder_outputs = model.get_encoder()( + batch["completion_input_ids"], + attention_mask=batch["completion_attention_mask"], + return_dict=True, + **model_kwargs, + ) + # 2. Get decoder outputs + outputs = model.get_decoder()( + input_ids=model_kwargs["decoder_input_ids"], + encoder_hidden_states=encoder_outputs.last_hidden_state, + use_cache=False, + **model_kwargs, + ) + # 1. Get reference encoder outputs + ref_encoder_outputs = self.ref_model.get_encoder()( + batch["completion_input_ids"], + attention_mask=batch["completion_attention_mask"], + return_dict=True, + **model_kwargs, + ) + # 2. Get reference decoder outputs + ref_outputs = self.ref_model.get_decoder()( + input_ids=model_kwargs["decoder_input_ids"], + encoder_hidden_states=ref_encoder_outputs.last_hidden_state, + use_cache=False, + **model_kwargs, + ) + else: + # skip the lm head and get the last hidden state + if hasattr(model, "get_decoder") and model.get_decoder() is not None: + base_model = model.get_decoder() + else: + base_attr = getattr(model, "base_model_prefix", self.args.base_model_attribute_name) + base_model = getattr(model, base_attr, model) + outputs = base_model( + batch["completion_input_ids"], + attention_mask=batch["completion_attention_mask"], + use_cache=False, + **model_kwargs, + ) + + # reference model + if hasattr(self.ref_model, "get_decoder") and self.ref_model.get_decoder() is not None: + ref_base_model = self.ref_model.get_decoder() + else: + ref_attr = getattr(self.ref_model, "base_model_prefix", self.args.base_model_attribute_name) + ref_base_model = getattr(self.ref_model, ref_attr, self.ref_model) + ref_outputs = ref_base_model( + batch["completion_input_ids"], + attention_mask=batch["completion_attention_mask"], + use_cache=False, + **model_kwargs, + ) + lm_head = model.get_output_embeddings() + ref_lm_head = self.ref_model.get_output_embeddings() + + ( + loss, + ( + chosen_logps_sum, + rejected_logps_sum, + chosen_logits_sum, + rejected_logits_sum, + chosen_rewards_sum, + rejected_rewards_sum, + ), + ) = self.kto_loss_fn( + _input=outputs.last_hidden_state[:, :-1] if not self.is_encoder_decoder else outputs.last_hidden_state, + lin_weight=lm_head.weight, + target=batch["completion_labels"][:, 1:], + bias=lm_head.bias if hasattr(lm_head, "bias") else None, + preference_labels=torch.tensor(batch["label"], dtype=torch.bool).to(self.accelerator.device), + ref_input=ref_outputs.last_hidden_state[:, :-1] + if not self.is_encoder_decoder + else outputs.last_hidden_state, + ref_weight=ref_lm_head.weight, + ref_bias=ref_lm_head.bias if hasattr(lm_head, "bias") else None, + kl=kl, + ) + + output = { + "loss": loss, + "chosen_logits_sum": chosen_logits_sum, + "rejected_logits_sum": rejected_logits_sum, + "chosen_logps_sum": chosen_logps_sum, + "rejected_logps_sum": rejected_logps_sum, + "chosen_rewards_sum": chosen_rewards_sum, + "rejected_rewards_sum": rejected_rewards_sum, + "kl": kl, + } + if self.aux_loss_enabled: + output["aux_loss"] = outputs.aux_loss + + return output + + def get_batch_loss_metrics( + self, + model, + batch: dict[str, Union[list, torch.LongTensor]], + ): + """Compute the KTO loss and other metrics for the given batch of inputs for train or test.""" + metrics = {} + batch = {k: (v.to(self.accelerator.device) if isinstance(v, torch.Tensor) else v) for k, v in batch.items()} + + labels = torch.tensor(batch["label"]) + num_chosen = labels.sum().to(self.accelerator.device) + num_rejected = (len(labels) - num_chosen).to(self.accelerator.device) + + if self.args.use_liger_loss: + model_output = self._compute_loss_liger(model, batch) + losses = model_output["loss"] + policy_chosen_logits = model_output["chosen_logits_sum"] + policy_rejected_logits = model_output["rejected_logits_sum"] + policy_chosen_logps = model_output["chosen_logps_sum"] + policy_rejected_logps = model_output["rejected_logps_sum"] + chosen_rewards = model_output["chosen_rewards_sum"] + rejected_rewards = model_output["rejected_rewards_sum"] + kl = model_output["kl"] + if self.aux_loss_enabled: + aux_loss = model_output["aux_loss"] + else: + forward_output = self.forward(model, batch) + ( + policy_chosen_logps, + policy_rejected_logps, + policy_chosen_logits, + policy_rejected_logits, + policy_KL_logps, + ) = forward_output[:5] + if self.aux_loss_enabled: + aux_loss = forward_output[5] + + # if reference_logps in batch use them, otherwise use the reference model + if "reference_logps" in batch: + chosen_idx = [i for i in range(batch["reference_logps"].shape[0]) if batch["label"][i] is True] + rejected_idx = [i for i in range(batch["reference_logps"].shape[0]) if batch["label"][i] is False] + + reference_chosen_logps = batch["reference_logps"][chosen_idx, ...] + reference_rejected_logps = batch["reference_logps"][rejected_idx, ...] + if self.calculate_KL: + reference_KL_logps = batch["reference_KL_logps"] + else: + reference_KL_logps = None + else: + with torch.no_grad(): + if self.ref_model is None: + with self.null_ref_context(): + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + reference_KL_logps, + ) = self.forward(self.model, batch)[:5] + else: + ( + reference_chosen_logps, + reference_rejected_logps, + _, + _, + reference_KL_logps, + ) = self.forward(self.ref_model, batch)[:5] + + losses, chosen_rewards, rejected_rewards, kl = self.kto_loss( + policy_chosen_logps, + policy_rejected_logps, + policy_KL_logps, + reference_chosen_logps, + reference_rejected_logps, + reference_KL_logps, + ) + + metrics["kl"] = kl.item() + + all_num_chosen = self.accelerator.gather_for_metrics(num_chosen).sum().item() + all_num_rejected = self.accelerator.gather_for_metrics(num_rejected).sum().item() + + if all_num_chosen > 0: + metrics["rewards/chosen_sum"] = ( + self.accelerator.gather_for_metrics(chosen_rewards.nansum()).nansum().item() + ) + metrics["logps/chosen_sum"] = ( + self.accelerator.gather_for_metrics(policy_chosen_logps.nansum()).nansum().item() + ) + metrics["logits/chosen_sum"] = ( + self.accelerator.gather_for_metrics(policy_chosen_logits.nansum()).nansum().item() + ) + metrics["count/chosen"] = all_num_chosen + + if all_num_rejected > 0: + metrics["rewards/rejected_sum"] = ( + self.accelerator.gather_for_metrics(rejected_rewards.nansum()).nansum().item() + ) + metrics["logps/rejected_sum"] = ( + self.accelerator.gather_for_metrics(policy_rejected_logps.nansum()).nansum().item() + ) + metrics["logits/rejected_sum"] = ( + self.accelerator.gather_for_metrics(policy_rejected_logits.nansum()).nansum().item() + ) + metrics["count/rejected"] = all_num_rejected + + loss = losses.nanmean() + if self.aux_loss_enabled: + loss += self.aux_loss_coef * aux_loss + + return loss, metrics + + def compute_loss( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + return_outputs=False, + num_items_in_batch=None, + ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, torch.Tensor]]]: + compute_loss_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with compute_loss_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs) + + # Make sure to move the loss to the device the original accumulating loss is at back in the `Trainer` class: + loss = loss.to(self.args.device) + # force log the metrics + if self.accelerator.is_main_process: + self.store_metrics(metrics, train_eval="train") + + if return_outputs: + return (loss, metrics) + return loss + + def store_metrics(self, metrics: dict[str, float], train_eval: Literal["train", "eval"] = "train") -> None: + for key, value in metrics.items(): + self._stored_metrics[train_eval][key].append(value) + + def _get_train_sampler(self, dataset: Optional[Dataset] = None) -> Optional[torch.utils.data.Sampler]: + if dataset is None: + dataset = self.train_dataset + if dataset is None or not has_length(dataset): + return None + return SequentialSampler(dataset) + + def generate_from_model_and_ref(self, model, batch: dict[str, torch.LongTensor]) -> tuple[str, str]: + """Generate samples from the model and reference model for the given batch of inputs.""" + + # If one uses `generate_during_eval` with peft + bf16, we need to explicitly call generate with + # the torch amp context manager as some hidden states are silently casted to full precision. + generate_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with generate_context_manager: + policy_output = model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + + # if reference_output in batch use that otherwise use the reference model + if "reference_output" in batch: + reference_output = batch["reference_output"] + else: + if self.ref_model is None: + with self.null_ref_context(): + reference_output = self.model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + else: + reference_output = self.ref_model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + + policy_output = pad_to_length(policy_output, self.max_length, self.processing_class.pad_token_id) + policy_output_decoded = self.processing_class.batch_decode(policy_output, skip_special_tokens=True) + + reference_output = pad_to_length(reference_output, self.max_length, self.processing_class.pad_token_id) + reference_output_decoded = self.processing_class.batch_decode(reference_output, skip_special_tokens=True) + + return policy_output_decoded, reference_output_decoded + + def prediction_step( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[list[str]] = None, + ): + if ignore_keys is None: + if hasattr(model, "config"): + ignore_keys = getattr(model.config, "keys_to_ignore_at_inference", []) + else: + ignore_keys = [] + + prediction_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + with torch.no_grad(), prediction_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs) + + # force log the metrics + if self.accelerator.is_main_process: + self.store_metrics(metrics, train_eval="eval") + + if prediction_loss_only: + return (loss.detach(), None, None) + + # logits for the chosen and rejected samples from model + logits_dict = {} + if "logits/chosen_sum" in metrics: + logits_dict["eval_logits/chosen"] = metrics["logits/chosen_sum"] + if "logits/rejected_sum" in metrics: + logits_dict["eval_logits/rejected"] = metrics["logits/rejected_sum"] + logits = [v for k, v in logits_dict.items() if k not in ignore_keys] + logits = torch.tensor(logits, device=self.accelerator.device) + labels = torch.zeros(logits.shape[0], device=self.accelerator.device) + + return (loss.detach(), logits, labels) + + def evaluation_loop( + self, + dataloader: DataLoader, + description: str, + prediction_loss_only: Optional[bool] = None, + ignore_keys: Optional[list[str]] = None, + metric_key_prefix: str = "eval", + ) -> EvalLoopOutput: + """ + Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by + `Trainer.evaluate()` and `Trainer.predict()`. + + Works both with or without labels. + """ + + # Sample and save to game log if requested (for one batch to save time) + if self.generate_during_eval: + # Generate random indices within the range of the total number of samples + num_samples = len(dataloader.dataset) + random_indices = random.sample(range(num_samples), k=self.args.eval_batch_size) + + # Use dataloader.dataset.select to get the random batch without iterating over the DataLoader + random_batch_dataset = dataloader.dataset.select(random_indices) + random_batch = self.data_collator(random_batch_dataset) + random_batch = self._prepare_inputs(random_batch) + + target_labels = torch.tensor(random_batch["label"], dtype=torch.bool, device=self.accelerator.device) + target_indices = torch.where(~target_labels)[0] + target_batch = { + "prompt_input_ids": random_batch["prompt_input_ids"][target_indices], + "prompt_attention_mask": random_batch["prompt_attention_mask"][target_indices], + "prompt": itemgetter(*target_indices)(random_batch["prompt"]), + } + policy_output_decoded, ref_output_decoded = self.generate_from_model_and_ref(self.model, target_batch) + + table = pd.DataFrame( + columns=["Prompt", "Policy", "Ref Model"], + data=[ + [prompt, pol[len(prompt) :], ref[len(prompt) :]] + for prompt, pol, ref in zip(target_batch["prompt"], policy_output_decoded, ref_output_decoded) + ], + ) + if "wandb" in self.args.report_to: + wandb.log({"game_log": wandb.Table(data=table)}) + + if "comet_ml" in self.args.report_to: + log_table_to_comet_experiment( + name="game_log.csv", + table=table, + ) + + # Base evaluation + initial_output = super().evaluation_loop( + dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix + ) + + return initial_output + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + """ + Log `logs` on the various objects watching training, including stored metrics. + + Args: + logs (`dict[str, float]`): + The values to log. + start_time (`float` or `None`, *optional*, defaults to `None`): + Start time of the training. + """ + # logs either has 'loss' or 'eval_loss' + train_eval = "train" if "loss" in logs else "eval" + # train metrics should have no prefix, eval should have 'eval_' + prefix = "eval_" if train_eval == "eval" else "" + # accumulate average metrics from sums and lengths + for split in ["chosen", "rejected"]: + if f"count/{split}" in self._stored_metrics[train_eval]: + count_sum = torch.Tensor(self._stored_metrics[train_eval][f"count/{split}"]).sum().item() + for metric in ["rewards", "logps", "logits"]: + logs[f"{prefix}{metric}/{split}"] = ( + torch.Tensor(self._stored_metrics[train_eval][f"{metric}/{split}_sum"]).sum().item() + / count_sum + ) + # delete obsolete metric + del self._stored_metrics[train_eval][f"{metric}/{split}_sum"] + del self._stored_metrics[train_eval][f"count/{split}"] + # calculate reward margin + if f"{prefix}rewards/chosen" in logs and f"{prefix}rewards/rejected" in logs: + logs[f"{prefix}rewards/margins"] = logs[f"{prefix}rewards/chosen"] - logs[f"{prefix}rewards/rejected"] + # Add averaged stored metrics to logs + for key, metrics in self._stored_metrics[train_eval].items(): + logs[f"{prefix}{key}"] = torch.Tensor(metrics).mean().item() + del self._stored_metrics[train_eval] + return super().log(logs, start_time) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{ethayarajh2024kto, + title = {{KTO: Model Alignment as Prospect Theoretic Optimization}}, + author = {Kawin Ethayarajh and Winnie Xu and Niklas Muennighoff and Dan Jurafsky and Douwe Kiela}, + year = 2024, + eprint = {arXiv:2402.01306}, + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="KTO", + trainer_citation=citation, + paper_title="KTO: Model Alignment as Prospect Theoretic Optimization", + paper_id="2402.01306", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothKTOTrainer(_UnslothKTOTrainer): + """ + +Initialize KTOTrainer. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + args (`KTOConfig`): + The arguments to use for training. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + data_collator (`transformers.DataCollator`, *optional*, defaults to `None`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + model_adapter_name (`str`, defaults to `None`): + Name of the train target PEFT adapter, when using LoRA with multiple adapters. + ref_adapter_name (`str`, defaults to `None`): + Name of the reference PEFT adapter, when using LoRA with multiple adapters. + + """ + def __init__( + self, + model = None, + ref_model = None, + args = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + data_collator = None, + model_init = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + compute_metrics = None, + model_adapter_name = None, + ref_adapter_name = None, + **kwargs + ): + if args is None: args = UnslothKTOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('kto_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + ref_model = ref_model, + args = args, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + data_collator = data_collator, + model_init = model_init, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config, + compute_metrics = compute_metrics, + model_adapter_name = model_adapter_name, + ref_adapter_name = ref_adapter_name,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothNashMDTrainer.py b/unsloth_compiled_cache/UnslothNashMDTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..9cb41b8ce12b59ccc8dbda7ab14ce02d3d27977e --- /dev/null +++ b/unsloth_compiled_cache/UnslothNashMDTrainer.py @@ -0,0 +1,1285 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.nash_md_trainer import (Any, BaseImageProcessor, BasePairwiseJudge, Callable, Dataset, EvalPrediction, F, FeatureExtractionMixin, GeometricMixtureWrapper, IterableDataset, NashMDConfig, NashMDTrainer, OnlineDPOTrainer, OptimizerNames, Optional, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SIMPLE_CHAT_TEMPLATE, TrainerCallback, Union, empty_cache, generate_model_card, get_comet_experiment_url, get_reward, is_conversational, is_peft_available, is_wandb_available, jinja2, maybe_apply_chat_template, nn, os, selective_log_softmax, textwrap, torch, truncate_right, unwrap_model_for_generation) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothNashMDConfig(NashMDConfig): + """ + +Configuration class for the [`NashMDTrainer`]. + +Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following: + +Parameters: + mixture_coef (`float` or `list[float]`, *optional*, defaults to `0.5`): + Logit mixture coefficient for the model and reference model. If a list of floats is provided then the + mixture coefficient is selected for each new epoch and the last coefficient is used for the rest of the + epochs. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + reward_model_path = None, + judge = None, + max_new_tokens = 64, + max_length = 512, + temperature = 0.9, + top_p = 1.0, + top_k = None, + min_p = None, + repetition_penalty = 1.0, + generation_kwargs = {}, + use_transformers_paged = False, + cache_implementation = None, + missing_eos_penalty = None, + loss_type = 'sigmoid', + disable_dropout = True, + use_vllm = False, + vllm_model_impl = 'vllm', + vllm_guided_decoding_regex = None, + vllm_gpu_memory_utilization = 0.55, + vllm_mode = 'colocate', + vllm_server_base_url = None, + vllm_server_host = '0.0.0.0', + vllm_server_port = 8000, + vllm_server_timeout = 240.0, + vllm_tensor_parallel_size = 1, + ds3_gather_for_generation = True, + model_init_kwargs = None, + reward_weights = None, + dataset_num_proc = None, + gpu_memory_utilization = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + reward_model_path = reward_model_path, + judge = judge, + max_new_tokens = max_new_tokens, + max_length = max_length, + temperature = temperature, + top_p = top_p, + top_k = top_k, + min_p = min_p, + repetition_penalty = repetition_penalty, + generation_kwargs = generation_kwargs, + use_transformers_paged = use_transformers_paged, + cache_implementation = cache_implementation, + missing_eos_penalty = missing_eos_penalty, + loss_type = loss_type, + disable_dropout = disable_dropout, + use_vllm = use_vllm, + vllm_model_impl = vllm_model_impl, + vllm_guided_decoding_regex = vllm_guided_decoding_regex, + vllm_gpu_memory_utilization = vllm_gpu_memory_utilization, + vllm_mode = vllm_mode, + vllm_server_base_url = vllm_server_base_url, + vllm_server_host = vllm_server_host, + vllm_server_port = vllm_server_port, + vllm_server_timeout = vllm_server_timeout, + vllm_tensor_parallel_size = vllm_tensor_parallel_size, + ds3_gather_for_generation = ds3_gather_for_generation, + model_init_kwargs = model_init_kwargs, + reward_weights = reward_weights, + dataset_num_proc = dataset_num_proc, + gpu_memory_utilization = gpu_memory_utilization,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothNashMDTrainer(OnlineDPOTrainer): + r""" + Initialize NashMDTrainer as a subclass of [`OnlineDPOConfig`]. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForCausalLM`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + reward_funcs (`transformers.PreTrainedModel`): + The reward model to score completions with, preferably an `AutoModelForSequenceClassification`. + judge (`BasePairwiseJudge`): + The judge to use for pairwise comparison of model completions. + args (`NashMDConfig`): + The NashMD config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + peft_config (`dict`): + The peft config to use for training. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + + .. deprecated:: 0.22.0 + The following parameters are deprecated and will be removed in a future version: + + * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`. + """ + + _tag_names = ["trl", "nash-md"] + + def __init__( + self, + model: Union[PreTrainedModel, nn.Module] = None, + ref_model: Union[PreTrainedModel, nn.Module] = None, + reward_funcs: Union[PreTrainedModel, nn.Module, None] = None, + judge: Optional[BasePairwiseJudge] = None, + args: Optional[NashMDConfig] = None, + data_collator: Optional[Callable] = None, + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + peft_config: Optional[dict] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + # Deprecated parameters + reward_model: Optional[Union[PreTrainedModel, nn.Module]] = None, + ) -> None: + super().__init__( + model=model, + ref_model=ref_model, + reward_funcs=reward_funcs, + judge=judge, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + reward_processing_classes=processing_class, + peft_config=peft_config, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + reward_model=reward_model, + ) + + self._mixture_coef = self.args.mixture_coef + + # Overwrite the stats dictionary to include NashMD specific statistics + self.stats = { + # Remove "non_score_reward", "rlhf_reward", "scores_margin" + # Add "mixture_coef" + "loss/kl": [], + "objective/entropy": [], + "loss/score": [], + "rewards/probabilities": [], + "rewards/accuracies": [], + "rewards/margins": [], + "logps/chosen": [], + "logps/rejected": [], + "val/model_contain_eos_token": [], + "val/ref_contain_eos_token": [], + "beta": [], + "mixture_coef": [], + } + if self.reward_funcs is not None: + if len(self.reward_funcs) != 1: + raise ValueError("NashMDTrainer only supports one reward function/model.") + self.reward_funcs = self.reward_funcs[0] + self.stats["rewards/chosen"] = [] + self.stats["rewards/rejected"] = [] + + @property + def mixture_coef(self): + if isinstance(self._mixture_coef, list): + epoch = self.state.epoch + return self._mixture_coef[epoch] if epoch < len(self._mixture_coef) else self._mixture_coef[-1] + else: + return self._mixture_coef + + def _generate_completions(self, model, prompts): + # Generate completions from the policy model. + with unwrap_model_for_generation(model, self.accelerator) as unwrapped_policy_for_gen_ctx: + model_output = unwrapped_policy_for_gen_ctx.generate( + input_ids=prompts["input_ids"], + attention_mask=prompts["attention_mask"], + generation_config=self.generation_config, + ) + + # Get the DDP/FSDP unwrapped version of the main model. + # This will be the policy model for GeometricMixtureWrapper (PEFT adapters active if PEFT is used). + policy_model_for_gmw = self.accelerator.unwrap_model(model) + + # Determine the correct reference model for GeometricMixtureWrapper. + # This also needs to be DDP/FSDP unwrapped. + ref_model_for_gmw: torch.nn.Module + if self.ref_model is None: + # No explicit ref_model is provided. + # Use the base of the main `model` if it's a PEFT model. + # policy_model_for_gmw is already DDP-unwrapped. + if is_peft_available() and isinstance(policy_model_for_gmw, PeftModel): + ref_model_for_gmw = policy_model_for_gmw.get_base_model() + else: + # Not a PEFT model (or PEFT not available), or already a base model. + # Use the DDP-unwrapped policy model itself as the reference. + ref_model_for_gmw = policy_model_for_gmw + else: + # An explicit ref_model is provided. Unwrap it for DDP/FSDP. + ref_model_for_gmw = self.accelerator.unwrap_model(self.ref_model) + + # Both models given to GeometricMixtureWrapper (policy_model_for_gmw and ref_model_for_gmw) are DDP-unwrapped. + with torch.no_grad(): # Ensure no_grad context for mixture model generation + mixture_model = GeometricMixtureWrapper( + model=policy_model_for_gmw, + ref_model=ref_model_for_gmw, + generation_config=self.generation_config, + mixture_coef=self.mixture_coef, + device=self.accelerator.device, + ) + + mixture_output = mixture_model.generate( + input_ids=prompts["input_ids"], + attention_mask=prompts["attention_mask"], + generation_config=self.generation_config, + ) + + return model_output, mixture_output + + def _process_completions(self, model_output, mixture_output, prompts): + context_length = prompts["input_ids"].shape[1] + + # Process model completions + model_completion_ids = model_output[:, context_length:] + model_completion_ids, model_completion_mask = truncate_right( + model_completion_ids, self.processing_class.eos_token_id, self.processing_class.pad_token_id + ) + model_data = { + "input_ids": torch.cat((prompts["input_ids"], model_completion_ids), dim=1), + "attention_mask": torch.cat((prompts["attention_mask"], model_completion_mask), dim=1), + "raw": prompts["raw"], + } + + # Process reference model completions + mixture_completion_ids = mixture_output[:, context_length:] + mixture_completion_ids, mixture_completion_mask = truncate_right( + mixture_completion_ids, self.processing_class.eos_token_id, self.processing_class.pad_token_id + ) + mixture_data = { + "input_ids": torch.cat((prompts["input_ids"], mixture_completion_ids), dim=1), + "attention_mask": torch.cat((prompts["attention_mask"], mixture_completion_mask), dim=1), + "raw": prompts["raw"], + } + + return model_data, mixture_data + + def _compute_rewards(self, model_data, mixture_data, context_length): + with torch.no_grad(): + _, model_scores, _ = get_reward( + self.reward_funcs, model_data["input_ids"], self.processing_class.pad_token_id, context_length + ) + _, mixture_scores, _ = get_reward( + self.reward_funcs, mixture_data["input_ids"], self.processing_class.pad_token_id, context_length + ) + + # Apply EOS penalty if needed + if self.args.missing_eos_penalty is not None: + model_contain_eos = torch.any(model_data["input_ids"] == self.processing_class.eos_token_id, dim=-1) + mixture_contain_eos = torch.any(mixture_data["input_ids"] == self.processing_class.eos_token_id, dim=-1) + model_scores[~model_contain_eos] -= self.args.missing_eos_penalty + mixture_scores[~mixture_contain_eos] -= self.args.missing_eos_penalty + + return model_scores, mixture_scores + + def _compute_judge(self, model_data, mixture_data, context_length): + prompts = model_data["raw"] + model_data_completions = self.processing_class.batch_decode( + model_data["input_ids"][:, context_length:], skip_special_tokens=True + ) + model_data_completions = [completion.strip() for completion in model_data_completions] + + mixture_data_completions = self.processing_class.batch_decode( + mixture_data["input_ids"][:, context_length:], skip_special_tokens=True + ) + mixture_data_completions = [completion.strip() for completion in mixture_data_completions] + if is_conversational({"prompt": prompts[0]}): + model_data_completions = [ + [{"role": "assistant", "content": completion}] for completion in model_data_completions + ] + environment = jinja2.Environment() + template = environment.from_string(SIMPLE_CHAT_TEMPLATE) + prompts = [template.render(messages=message) for message in prompts] + model_data_completions = [template.render(messages=completion) for completion in model_data_completions] + + mixture_data_completions = [ + [{"role": "assistant", "content": completion}] for completion in mixture_data_completions + ] + mixture_data_completions = [ + template.render(messages=completion) for completion in mixture_data_completions + ] + + probability = self.judge.judge( + prompts, + list(zip(model_data_completions, mixture_data_completions)), + return_scores=True, + ) + return torch.tensor(probability, device=model_data["input_ids"].device) + + def _compute_logprobs(self, model, model_data, context_length): + def compute_logprobs_for_data(m, data): + output = m(data["input_ids"], attention_mask=data["attention_mask"]) + logits = output.logits[:, context_length - 1 : -1] + token_logprobs = selective_log_softmax(logits, data["input_ids"][:, context_length:]) + return token_logprobs + + # Compute logprobs for model completions under the model + model_logprobs_model_data = compute_logprobs_for_data(model, model_data) + + # Compute logprobs of model completions under the reference model + with torch.no_grad(): + if self.ref_model is None: + with model.disable_adapter(): + ref_logprobs_model_data = compute_logprobs_for_data(model, model_data) + else: + ref_logprobs_model_data = compute_logprobs_for_data(self.ref_model, model_data) + + # Mask padding tokens + model_padding_mask = model_data["attention_mask"][:, context_length:] == 0 + model_logprobs_model_data = model_logprobs_model_data.masked_fill(model_padding_mask, 0.0) + ref_logprobs_model_data = ref_logprobs_model_data.masked_fill(model_padding_mask, 0.0) + + return (model_logprobs_model_data, ref_logprobs_model_data) + + def _compute_losses( + self, + model_logprobs_model_data, + ref_logprobs_model_data, + probability, + ): + # reinforce score where 0.5 is a control variate + score = (probability - 0.5) * model_logprobs_model_data.sum(1) + + # kl divergence via reinforce + with torch.no_grad(): + log_ratio = model_logprobs_model_data - ref_logprobs_model_data + kl_div_log = log_ratio.sum(1) + kl_div_loss = (log_ratio * model_logprobs_model_data).sum(1) + + # final loss + loss = self.beta * kl_div_loss - score + + return loss.mean(), score, kl_div_log + + def _log_statistics( + self, + model_data, + mixture_data, + model_logprobs_model_data, + ref_logprobs_model_data, + probability, + score, + kl_div, + context_length, + model_scores=None, + mixture_scores=None, + ): + # Helper function to gather and compute mean + def gather_mean(tensor): + return self.accelerator.gather_for_metrics(tensor).mean().item() + + # Log score + self.stats["loss/score"].append(gather_mean(score)) + # Log KL divergence + self.stats["loss/kl"].append(gather_mean(kl_div)) + + # Log logprobs + model_logprobs_model_data_sum = model_logprobs_model_data.sum(1) + ref_logprobs_model_data_sum = ref_logprobs_model_data.sum(1) + + self.stats["logps/chosen"].append(gather_mean(model_logprobs_model_data_sum)) + self.stats["logps/rejected"].append(gather_mean(ref_logprobs_model_data_sum)) + + # Log rewards + if self.reward_funcs is not None: + self.stats["rewards/chosen"].append(gather_mean(model_scores)) + self.stats["rewards/rejected"].append(gather_mean(mixture_scores)) + + # Log probabilities + self.stats["rewards/probabilities"].append(gather_mean(probability)) + + # Calculate entropy for model data + entropy_model_data = -model_logprobs_model_data.sum(1) + self.stats["objective/entropy"].append(gather_mean(entropy_model_data)) + + # Calculate margins + margin = model_logprobs_model_data_sum - ref_logprobs_model_data_sum + self.stats["rewards/margins"].append(gather_mean(margin)) + + # Calculate accuracy + accuracy = (margin > 0).float() + self.stats["rewards/accuracies"].append(gather_mean(accuracy)) + + # Log EOS token statistics + model_eos = (model_data["input_ids"][:, context_length:] == self.processing_class.eos_token_id).any(dim=1) + mixture_eos = (mixture_data["input_ids"][:, context_length:] == self.processing_class.eos_token_id).any(dim=1) + self.stats["val/model_contain_eos_token"].append(gather_mean(model_eos.float())) + self.stats["val/ref_contain_eos_token"].append(gather_mean(mixture_eos.float())) + + # Log beta and mixture coef + self.stats["beta"].append(self.beta) + self.stats["mixture_coef"].append(self.mixture_coef) + + def training_step( + self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], num_items_in_batch: Optional[int] = None + ) -> torch.Tensor: + model.train() + + # Apply chat template and tokenize the input + batch_size = len(next(iter(inputs.values()))) + prompts = inputs["prompt"] + inputs = [{k: v[i] for k, v in inputs.items()} for i in range(batch_size)] + inputs = [maybe_apply_chat_template(x, self.processing_class) for x in inputs] + inputs = [self.tokenize_row(x, self.model.config.is_encoder_decoder, self.processing_class) for x in inputs] + inputs = self.data_collator(inputs) + + # need the prompt_ only + inputs = self._prepare_inputs(inputs) + context_length = inputs["prompt_input_ids"].shape[1] + prompts = { + "input_ids": inputs["prompt_input_ids"], + "attention_mask": inputs["prompt_attention_mask"], + "raw": prompts, + } + del inputs + + # Sample completions from both the model and the reference model + model_output, mixture_output = self._generate_completions(model, prompts) + + # Process model completions + model_data, mixture_data = self._process_completions(model_output, mixture_output, prompts) + + # Compute rewards + if self.reward_funcs is not None: + model_scores, mixture_scores = self._compute_rewards(model_data, mixture_data, context_length) + # probability of the model data vs the mixture data + probability = F.sigmoid(model_scores - mixture_scores) + else: + model_scores, mixture_scores = None, None + probability = self._compute_judge(model_data, mixture_data, context_length) + + # Compute logprobs + model_logprobs_model_data, ref_logprobs_model_data = self._compute_logprobs(model, model_data, context_length) + + # Compute loss + loss, score, kl_div = self._compute_losses(model_logprobs_model_data, ref_logprobs_model_data, probability) + + # Log everything + self._log_statistics( + model_data, + mixture_data, + model_logprobs_model_data.detach(), + ref_logprobs_model_data, + probability, + score.detach(), + kl_div.detach(), + context_length, + model_scores, + mixture_scores, + ) + + if ( + self.args.torch_empty_cache_steps is not None + and self.state.global_step % self.args.torch_empty_cache_steps == 0 + ): + empty_cache() + + kwargs = {} + # For LOMO optimizers you need to explicitly use the learning rate + if self.args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: + kwargs["learning_rate"] = self._get_learning_rate() + + if self.args.n_gpu > 1: + loss = loss.mean() # mean() to average on multi-gpu parallel training + + if self.use_apex: + with amp.scale_loss(loss, self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + self.accelerator.backward(loss, **kwargs) + + return loss.detach() / self.args.gradient_accumulation_steps + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @inproceedings{munos2024nash, + title = {{Nash Learning from Human Feedback}}, + author = {R{\'{e}}mi Munos and Michal Valko and Daniele Calandriello and Mohammad Gheshlaghi Azar and Mark Rowland and Zhaohan Daniel Guo and Yunhao Tang and Matthieu Geist and Thomas Mesnard and C{\\^{o}}me Fiegel and Andrea Michi and Marco Selvi and Sertan Girgin and Nikola Momchev and Olivier Bachem and Daniel J. Mankowitz and Doina Precup and Bilal Piot}, + year = 2024, + booktitle = {Forty-first International Conference on Machine Learning, {ICML} 2024, Vienna, Austria, July 21-27, 2024}, + publisher = {OpenReview.net}, + url = {https://openreview.net/forum?id=Y5AmNYiyCQ} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="Nash-MD", + trainer_citation=citation, + paper_title="Nash Learning from Human Feedback", + paper_id="2312.00886", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothNashMDTrainer(_UnslothNashMDTrainer): + """ + +Initialize NashMDTrainer as a subclass of [`OnlineDPOConfig`]. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForCausalLM`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + reward_funcs (`transformers.PreTrainedModel`): + The reward model to score completions with, preferably an `AutoModelForSequenceClassification`. + judge (`BasePairwiseJudge`): + The judge to use for pairwise comparison of model completions. + args (`NashMDConfig`): + The NashMD config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + peft_config (`dict`): + The peft config to use for training. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + +.. deprecated:: 0.22.0 + The following parameters are deprecated and will be removed in a future version: + + * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`. + + """ + def __init__( + self, + model = None, + ref_model = None, + reward_funcs = None, + judge = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + peft_config = None, + compute_metrics = None, + callbacks = None, + preprocess_logits_for_metrics = None, + reward_model = None, + **kwargs + ): + if args is None: args = UnslothNashMDConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('nash_md_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + ref_model = ref_model, + reward_funcs = reward_funcs, + judge = judge, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + peft_config = peft_config, + compute_metrics = compute_metrics, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + reward_model = reward_model,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass diff --git a/unsloth_compiled_cache/UnslothORPOTrainer.py b/unsloth_compiled_cache/UnslothORPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..c5f59af66c69e1fb3c1ddba9e897e5bd4ac0b7a5 --- /dev/null +++ b/unsloth_compiled_cache/UnslothORPOTrainer.py @@ -0,0 +1,1791 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.orpo_trainer import (Any, AutoModelForCausalLM, BaseImageProcessor, Callable, DPODataCollatorWithPadding, DataCollator, DataLoader, Dataset, EvalLoopOutput, F, FeatureExtractionMixin, Literal, ORPOConfig, ORPOTrainer, Optional, PartialState, Path, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, Trainer, TrainerCallback, Union, add_bos_token_if_needed, add_eos_token_if_needed, autocast, defaultdict, disable_dropout_in_model, generate_model_card, get_comet_experiment_url, inspect, is_comet_available, is_peft_available, is_torch_fx_proxy, is_torch_xla_available, is_wandb_available, log_table_to_comet_experiment, logger, logging, maybe_apply_chat_template, maybe_extract_prompt, nn, np, nullcontext, os, pad_to_length, pd, peft_module_casting_to_bf16, prepare_model_for_kbit_training, random, selective_log_softmax, textwrap, torch, F, Optional, PeftModel, PreTrainedModel, Trainer, is_peft_available, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothORPOConfig(ORPOConfig): + """ + +Configuration class for the [`ORPOTrainer`]. + +This class includes only the parameters that are specific to ORPO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the sequences (prompt + completion) in the batch. This argument is required if you want + to use the default data collator. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. This argument is required if you want to use the default data collator. + max_completion_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the completion. This argument is required if you want to use the default data collator + and your model is an encoder-decoder. + beta (`float`, *optional*, defaults to `0.1`): + Parameter controlling the relative ratio loss weight in the ORPO loss. In the + [paper](https://huggingface.co/papers/2403.07691), it is denoted by λ. In the + [code](https://github.com/xfactlab/orpo), it is denoted by `alpha`. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. + label_pad_token_id (`int`, *optional*, defaults to `-100`): + Label pad token id. This argument is required if you want to use the default data collator. + padding_value (`int` or `None`, *optional*, defaults to `None`): + Padding value to use. If `None`, the padding value of the tokenizer is used. + truncation_mode (`str`, *optional*, defaults to `"keep_end"`): + Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`. + This argument is required if you want to use the default data collator. + generate_during_eval (`bool`, *optional*, defaults to `False`): + If `True`, generates and logs completions from the model to W&B or Comet during evaluation. + is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`): + When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument, + you need to specify if the model returned by the callable is an encoder-decoder model. + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a + string. + dataset_num_proc (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + max_length = 1024, + max_prompt_length = 512, + max_completion_length = None, + beta = 0.1, + disable_dropout = True, + label_pad_token_id = -100, + padding_value = None, + truncation_mode = 'keep_end', + generate_during_eval = False, + is_encoder_decoder = None, + model_init_kwargs = None, + dataset_num_proc = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + max_length = max_length, + max_prompt_length = max_prompt_length, + max_completion_length = max_completion_length, + beta = beta, + disable_dropout = disable_dropout, + label_pad_token_id = label_pad_token_id, + padding_value = padding_value, + truncation_mode = truncation_mode, + generate_during_eval = generate_during_eval, + is_encoder_decoder = is_encoder_decoder, + model_init_kwargs = model_init_kwargs, + dataset_num_proc = dataset_num_proc,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothORPOTrainer(Trainer): + r""" + Initialize ORPOTrainer. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + args (`ORPOConfig`): + The ORPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + """ + + _tag_names = ["trl", "orpo"] + + def __init__( + self, + model: Optional[Union[PreTrainedModel, nn.Module, str]] = None, + args: Optional[ORPOConfig] = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional[dict] = None, + compute_metrics: Optional[Callable[[EvalLoopOutput], dict]] = None, + ): + if args.model_init_kwargs is None: + model_init_kwargs = {} + elif not isinstance(model, str): + raise ValueError("You passed model_kwargs to the ORPOTrainer. But your model is already instantiated.") + else: + model_init_kwargs = args.model_init_kwargs + dtype = model_init_kwargs.get("dtype") + if dtype is not None: + # Convert to `torch.dtype` if an str is passed + if isinstance(dtype, str) and dtype != "auto": + dtype = getattr(torch, dtype) + if dtype != "auto" and not isinstance(dtype, torch.dtype): + raise ValueError( + f"Invalid `dtype` passed to the ORPOConfig. Expected a string with either `torch.dtype` or 'auto', but got {dtype}." + ) + model_init_kwargs["dtype"] = dtype + + if isinstance(model, str): + model = AutoModelForCausalLM.from_pretrained(model, **model_init_kwargs) + + # Initialize this variable to False. This helps tracking the case when `peft_module_casting_to_bf16` + # has been called in order to properly call autocast if needed. + self._peft_has_been_casted_to_bf16 = False + + if not is_peft_available() and peft_config is not None: + raise ValueError( + "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT models" + ) + elif is_peft_available() and peft_config is not None: + # if model is a peft model and we have a peft_config, we merge and unload it first + if isinstance(model, PeftModel): + model = model.merge_and_unload() + + if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False): + _support_gc_kwargs = hasattr( + args, "gradient_checkpointing_kwargs" + ) and "gradient_checkpointing_kwargs" in list( + inspect.signature(prepare_model_for_kbit_training).parameters + ) + + prepare_model_kwargs = {"use_gradient_checkpointing": args.gradient_checkpointing} + + if _support_gc_kwargs: + prepare_model_kwargs["gradient_checkpointing_kwargs"] = args.gradient_checkpointing_kwargs + + model = prepare_model_for_kbit_training(model, **prepare_model_kwargs) + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + # get peft model with the given config + model = model + if args.bf16 and getattr(model, "is_loaded_in_4bit", False): + peft_module_casting_to_bf16(model) + # If args.bf16 we need to explicitly call `generate` with torch amp autocast context manager + self._peft_has_been_casted_to_bf16 = True + + # For models that use gradient_checkpointing, we need to attach a hook that enables input + # to explicitly have `requires_grad=True`, otherwise training will either silently + # fail or completely fail. + elif args.gradient_checkpointing: + # For backward compatibility with older versions of transformers + if hasattr(model, "enable_input_require_grads"): + model.enable_input_require_grads() + else: + + def make_inputs_require_grad(module, input, output): + output.requires_grad_(True) + + model.get_input_embeddings().register_forward_hook(make_inputs_require_grad) + + if args.generate_during_eval and not (is_wandb_available() or is_comet_available()): + raise ValueError( + "`generate_during_eval=True` requires Weights and Biases or Comet to be installed." + " Please install `wandb` or `comet-ml` to resolve." + ) + + if model is not None: + self.is_encoder_decoder = model.config.is_encoder_decoder + elif args.is_encoder_decoder is None: + raise ValueError("When no model is provided, you need to pass the parameter is_encoder_decoder.") + else: + self.is_encoder_decoder = args.is_encoder_decoder + + if self.is_encoder_decoder: + self.decoder_start_token_id = model.config.decoder_start_token_id + self.pad_token_id = model.config.pad_token_id + + if processing_class is None: + raise ValueError("processing_class must be specified to tokenize a ORPO dataset.") + if args.max_length is None: + logger.warning( + "`max_length` is not set in the ORPOConfig's init" + " it will default to `512` by default, but you should do it yourself in the future.", + ) + max_length = 512 + else: + max_length = args.max_length + if args.max_prompt_length is None: + logger.warning( + "`max_prompt_length` is not set in the ORPOConfig's init" + " it will default to `128` by default, but you should do it yourself in the future.", + ) + max_prompt_length = 128 + else: + max_prompt_length = args.max_prompt_length + + if args.max_completion_length is None and self.is_encoder_decoder: + logger.warning( + "When using an encoder decoder architecture, you should set `max_completion_length` in the ORPOConfig's init" + " it will default to `128` by default, but you should do it yourself in the future.", + ) + self.max_completion_length = 128 + else: + self.max_completion_length = args.max_completion_length + + if data_collator is None: + data_collator = DPODataCollatorWithPadding( + pad_token_id=processing_class.pad_token_id, + label_pad_token_id=args.label_pad_token_id, + is_encoder_decoder=self.is_encoder_decoder, + ) + + if args.remove_unused_columns: + args.remove_unused_columns = False + # warn users + logger.warning( + "When using DPODataCollatorWithPadding, you should set `remove_unused_columns=False` in your TrainingArguments" + " we have set it for you, but you should do it yourself in the future.", + ) + + self.use_dpo_data_collator = True + else: + self.use_dpo_data_collator = False + + # Disable dropout in the model and reference model + if args.disable_dropout: + disable_dropout_in_model(model) + + self.max_length = max_length + self.generate_during_eval = args.generate_during_eval + self.label_pad_token_id = args.label_pad_token_id + self.padding_value = args.padding_value if args.padding_value is not None else processing_class.pad_token_id + self.max_prompt_length = max_prompt_length + self.truncation_mode = args.truncation_mode + self.processing_class = processing_class + + self.beta = args.beta + self.aux_loss_enabled = getattr(model.config, "output_router_logits", False) + self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0) + if self.aux_loss_enabled and self.aux_loss_coef == 0.0: + logger.warning( + "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to " + "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value " + "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary " + "loss.", + ) + + self._stored_metrics = defaultdict(lambda: defaultdict(list)) + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in ORPO, the sampled data does not include the + # "input_ids" key. Instead, the available keys are "prompt_input_ids", "chosen_input_ids", and + # "rejected_input_ids". As a result, the trainer issues the warning: "Could not estimate the number of tokens + # of the input, floating-point operations will not be computed." To suppress this warning, we set the + # "estimate_tokens" key in the model's "warnings_issued" dictionary to True. This acts as a flag to indicate + # that the warning has already been issued. + model.warnings_issued["estimate_tokens"] = True + + # Compute that only on the main process for faster data processing. + # see: https://github.com/huggingface/trl/pull/1255 + with PartialState().main_process_first(): + # Extract the prompt if needed, and apply the chat template if needed + train_dataset = train_dataset.map(maybe_extract_prompt, num_proc=args.dataset_num_proc) + train_dataset = train_dataset.map( + maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}, num_proc=args.dataset_num_proc + ) + train_dataset = train_dataset.map(self.tokenize_row, num_proc=args.dataset_num_proc) + if eval_dataset is not None: + eval_dataset = eval_dataset.map(maybe_extract_prompt, num_proc=args.dataset_num_proc) + eval_dataset = eval_dataset.map( + maybe_apply_chat_template, + fn_kwargs={"tokenizer": processing_class}, + num_proc=args.dataset_num_proc, + ) + eval_dataset = eval_dataset.map(self.tokenize_row, num_proc=args.dataset_num_proc) + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + model_init=model_init, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + if not hasattr(self, "accelerator"): + raise AttributeError( + "Your `Trainer` does not have an `accelerator` object. Consider upgrading `transformers`." + ) + + def build_tokenized_answer(self, prompt, answer): + """ + Llama tokenizer does satisfy `enc(a + b) = enc(a) + enc(b)`. It does ensure `enc(a + b) = enc(a) + enc(a + + b)[len(enc(a)):]`. Reference: + https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 + """ + + full_tokenized = self.processing_class(prompt + answer, add_special_tokens=False) + prompt_input_ids = self.processing_class(prompt, add_special_tokens=False)["input_ids"] + + answer_input_ids = full_tokenized["input_ids"][len(prompt_input_ids) :] + answer_attention_mask = full_tokenized["attention_mask"][len(prompt_input_ids) :] + + # Concat tokens to form `enc(a) + enc(a + b)[len(enc(a)):]` + full_concat_input_ids = np.concatenate([prompt_input_ids, answer_input_ids]) + + # Prepare input tokens for token by token comparison + full_input_ids = np.array(full_tokenized["input_ids"]) + + if len(full_input_ids) != len(full_concat_input_ids): + raise ValueError("Prompt input ids and answer input ids should have the same length.") + + # On some tokenizers, like Llama-2 tokenizer, there are occasions where tokens + # can be merged together when tokenizing prompt+answer. This could result + # on the last token from the prompt being different when tokenized on its own + # vs when done as prompt+answer. + response_token_ids_start_idx = len(prompt_input_ids) + + # If tokenized prompt is different than both prompt+answer, then it means the + # last token has changed due to merging. + if prompt_input_ids != full_tokenized["input_ids"][:response_token_ids_start_idx]: + response_token_ids_start_idx -= 1 + + prompt_input_ids = full_tokenized["input_ids"][:response_token_ids_start_idx] + prompt_attention_mask = full_tokenized["attention_mask"][:response_token_ids_start_idx] + + if len(prompt_input_ids) != len(prompt_attention_mask): + raise ValueError("Prompt input ids and attention mask should have the same length.") + + answer_input_ids = full_tokenized["input_ids"][response_token_ids_start_idx:] + answer_attention_mask = full_tokenized["attention_mask"][response_token_ids_start_idx:] + + return dict( + prompt_input_ids=prompt_input_ids, + prompt_attention_mask=prompt_attention_mask, + input_ids=answer_input_ids, + attention_mask=answer_attention_mask, + ) + + def tokenize_row(self, feature, model: Optional[Union[PreTrainedModel, nn.Module]] = None) -> dict: + """Tokenize a single row from a ORPO specific dataset. + + At this stage, we don't convert to PyTorch tensors yet; we just handle the truncation in case the prompt + + chosen or prompt + rejected responses is/are too long. First we truncate the prompt; if we're still too long, + we truncate the chosen/rejected. + + We also create the labels for the chosen/rejected responses, which are of length equal to the sum of the length + of the prompt and the chosen/rejected response, with label_pad_token_id for the prompt tokens. + """ + batch = {} + prompt = feature["prompt"] + chosen = feature["chosen"] + rejected = feature["rejected"] + + if not self.is_encoder_decoder: + # Check issues below for more details + # 1. https://github.com/huggingface/trl/issues/907 + # 2. https://github.com/EleutherAI/lm-evaluation-harness/pull/531#issuecomment-1595586257 + # 3. https://github.com/LianjiaTech/BELLE/issues/337 + + if not isinstance(prompt, str): + raise ValueError(f"prompt should be an str but got {type(prompt)}") + prompt_tokens = self.processing_class(prompt, add_special_tokens=False) + prompt_tokens = {f"prompt_{k}": v for k, v in prompt_tokens.items()} + + if not isinstance(chosen, str): + raise ValueError(f"chosen should be an str but got {type(chosen)}") + chosen_tokens = self.build_tokenized_answer(prompt, chosen) + + if not isinstance(rejected, str): + raise ValueError(f"rejected should be an str but got {type(rejected)}") + rejected_tokens = self.build_tokenized_answer(prompt, rejected) + + # Last prompt token might get merged by tokenizer and + # it should not be included for generation if that happens + prompt_len_input_ids = len(prompt_tokens["prompt_input_ids"]) + + chosen_prompt_len_input_ids = len(chosen_tokens["prompt_input_ids"]) + rejected_prompt_len_input_ids = len(rejected_tokens["prompt_input_ids"]) + prompt_len_input_ids = min(chosen_prompt_len_input_ids, rejected_prompt_len_input_ids) + + for k, v in prompt_tokens.items(): + prompt_tokens[k] = v[:prompt_len_input_ids] + + # Make sure prompts only have one different token at most an + # and length only differs by 1 at most + num_diff_tokens = sum( + [a != b for a, b in zip(chosen_tokens["prompt_input_ids"], rejected_tokens["prompt_input_ids"])] + ) + num_diff_len = abs(chosen_prompt_len_input_ids - rejected_prompt_len_input_ids) + if num_diff_tokens > 1 or num_diff_len > 1: + raise ValueError( + "Chosen and rejected prompt_input_ids might only differ on the " + "last token due to tokenizer merge ops." + ) + + # add BOS token to head of prompt. Avoid adding if it's already there + prompt_tokens, chosen_tokens, rejected_tokens = add_bos_token_if_needed( + self.processing_class.bos_token_id, + prompt_len_input_ids, + prompt_tokens, + chosen_prompt_len_input_ids, + chosen_tokens, + rejected_prompt_len_input_ids, + rejected_tokens, + ) + + # add EOS token to end of answer. Avoid adding if it's already there + chosen_tokens, rejected_tokens = add_eos_token_if_needed( + self.processing_class.eos_token_id, chosen_tokens, rejected_tokens + ) + + longer_response_length = max(len(chosen_tokens["input_ids"]), len(rejected_tokens["input_ids"])) + + # if combined sequence is too long, truncate the prompt + for answer_tokens in [chosen_tokens, rejected_tokens, prompt_tokens]: + if len(answer_tokens["prompt_input_ids"]) + longer_response_length > self.max_length: + if self.truncation_mode == "keep_start": + for k in ["prompt_input_ids", "prompt_attention_mask"]: + answer_tokens[k] = answer_tokens[k][: self.max_prompt_length] + elif self.truncation_mode == "keep_end": + for k in ["prompt_input_ids", "prompt_attention_mask"]: + answer_tokens[k] = answer_tokens[k][-self.max_prompt_length :] + else: + raise ValueError(f"Unknown truncation mode: {self.truncation_mode}") + + # if that's still too long, truncate the response + for answer_tokens in [chosen_tokens, rejected_tokens]: + if len(answer_tokens["prompt_input_ids"]) + longer_response_length > self.max_length: + for k in ["input_ids", "attention_mask"]: + answer_tokens[k] = answer_tokens[k][: self.max_length - self.max_prompt_length] + + # Create labels + chosen_sequence_tokens = { + k: chosen_tokens[f"prompt_{k}"] + chosen_tokens[k] for k in ["input_ids", "attention_mask"] + } + rejected_sequence_tokens = { + k: rejected_tokens[f"prompt_{k}"] + rejected_tokens[k] for k in ["input_ids", "attention_mask"] + } + chosen_sequence_tokens["labels"] = chosen_sequence_tokens["input_ids"][:] + chosen_sequence_tokens["labels"][: len(chosen_tokens["prompt_input_ids"])] = [ + self.label_pad_token_id + ] * len(chosen_tokens["prompt_input_ids"]) + rejected_sequence_tokens["labels"] = rejected_sequence_tokens["input_ids"][:] + rejected_sequence_tokens["labels"][: len(rejected_tokens["prompt_input_ids"])] = [ + self.label_pad_token_id + ] * len(rejected_tokens["prompt_input_ids"]) + + for k, toks in { + "chosen_": chosen_sequence_tokens, + "rejected_": rejected_sequence_tokens, + "": prompt_tokens, + }.items(): + for type_key, tokens in toks.items(): + if type_key == "token_type_ids": + continue + batch[f"{k}{type_key}"] = tokens + + else: + chosen_tokens = self.processing_class( + chosen, truncation=True, max_length=self.max_completion_length, add_special_tokens=True + ) + rejected_tokens = self.processing_class( + rejected, truncation=True, max_length=self.max_completion_length, add_special_tokens=True + ) + prompt_tokens = self.processing_class( + prompt, truncation=True, max_length=self.max_prompt_length, add_special_tokens=True + ) + + batch["chosen_labels"] = chosen_tokens["input_ids"] + batch["rejected_labels"] = rejected_tokens["input_ids"] + batch["prompt_input_ids"] = prompt_tokens["input_ids"] + batch["prompt_attention_mask"] = prompt_tokens["attention_mask"] + + if model is not None and hasattr(model, "prepare_decoder_input_ids_from_labels"): + batch["rejected_decoder_input_ids"] = model.prepare_decoder_input_ids_from_labels( + labels=torch.tensor(batch["rejected_labels"]) + ) + batch["chosen_decoder_input_ids"] = model.prepare_decoder_input_ids_from_labels( + labels=torch.tensor(batch["chosen_labels"]) + ) + + if is_torch_xla_available(): + # Pad the sequences to global max_length to avoid TorchXLA recompilation + for k in batch: + if "labels" in k or self.is_encoder_decoder: + pad_value = self.label_pad_token_id + elif k.endswith("_input_ids"): + pad_value = self.padding_value + elif k.endswith("_attention_mask"): + pad_value = 0 + batch[k] = batch[k] + [pad_value] * (self.max_length - len(batch[k])) + return batch + + @staticmethod + def concatenated_inputs( + batch: dict[str, Union[list, torch.LongTensor]], + is_encoder_decoder: bool = False, + label_pad_token_id: int = -100, + padding_value: int = 0, + device: Optional[torch.device] = None, + ) -> dict[str, torch.LongTensor]: + """Concatenate the chosen and rejected inputs into a single tensor. + + Args: + batch: + A batch of data. Must contain the keys 'chosen_input_ids' and 'rejected_input_ids', which are tensors + of shape (batch_size, sequence_length). + is_encoder_decoder: + Whether the model is an encoder-decoder model. + label_pad_token_id: + The label pad token id. + padding_value: + The padding value to use for the concatenated inputs_ids. + device: + The device for the concatenated inputs. + + Returns: + A dictionary containing the concatenated inputs under the key 'concatenated_input_ids'. + """ + concatenated_batch = {} + + if is_encoder_decoder: + max_length = max(batch["chosen_labels"].shape[1], batch["rejected_labels"].shape[1]) + else: + max_length = max(batch["chosen_input_ids"].shape[1], batch["rejected_input_ids"].shape[1]) + + for k in batch: + if k.startswith("chosen") and isinstance(batch[k], torch.Tensor): + if "labels" in k or is_encoder_decoder: + pad_value = label_pad_token_id + elif k.endswith("_input_ids"): + pad_value = padding_value + elif k.endswith("_attention_mask"): + pad_value = 0 + concatenated_key = k.replace("chosen", "concatenated") + concatenated_batch[concatenated_key] = pad_to_length(batch[k], max_length, pad_value=pad_value) + for k in batch: + if k.startswith("rejected") and isinstance(batch[k], torch.Tensor): + if "labels" in k or is_encoder_decoder: + pad_value = label_pad_token_id + elif k.endswith("_input_ids"): + pad_value = padding_value + elif k.endswith("_attention_mask"): + pad_value = 0 + concatenated_key = k.replace("rejected", "concatenated") + concatenated_batch[concatenated_key] = torch.cat( + ( + concatenated_batch[concatenated_key], + pad_to_length(batch[k], max_length, pad_value=pad_value), + ), + dim=0, + ).to(device=device) + + if is_encoder_decoder: + concatenated_batch["concatenated_input_ids"] = batch["prompt_input_ids"].repeat(2, 1).to(device=device) + concatenated_batch["concatenated_attention_mask"] = ( + batch["prompt_attention_mask"].repeat(2, 1).to(device=device) + ) + + return concatenated_batch + + def odds_ratio_loss( + self, + policy_chosen_logps: torch.FloatTensor, + policy_rejected_logps: torch.FloatTensor, + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Compute ORPO's odds ratio (OR) loss for a batch of policy and reference model log probabilities. + + Args: + policy_chosen_logps: + Log probabilities of the policy model for the chosen responses. Shape: (batch_size,) + policy_rejected_logps: + Log probabilities of the policy model for the rejected responses. Shape: (batch_size,) + + Returns: + A tuple of three tensors: (losses, chosen_rewards, rejected_rewards). The losses tensor contains the ORPO + loss for each example in the batch. The chosen_rewards and rejected_rewards tensors contain the rewards for + the chosen and rejected responses, respectively. The log odds ratio of the chosen responses over the + rejected responses ratio for logging purposes. The `log(sigmoid(log_odds_chosen))` for logging purposes. + """ + + # Derived from Eqs. (4) and (7) from https://huggingface.co/papers/2403.07691 by using log identities and exp(log(P(y|x)) = P(y|x) + log_odds = (policy_chosen_logps - policy_rejected_logps) - ( + torch.log1p(-torch.exp(policy_chosen_logps)) - torch.log1p(-torch.exp(policy_rejected_logps)) + ) + ratio = F.logsigmoid(log_odds) + losses = self.beta * ratio + + chosen_rewards = self.beta * (policy_chosen_logps.to(self.accelerator.device)).detach() + rejected_rewards = self.beta * (policy_rejected_logps.to(self.accelerator.device)).detach() + + return losses, chosen_rewards, rejected_rewards, torch.mean(ratio), torch.mean(log_odds) + + @staticmethod + def get_batch_logps( + logits: torch.FloatTensor, + labels: torch.LongTensor, + average_log_prob: bool = False, + label_pad_token_id: int = -100, + is_encoder_decoder: bool = False, + ) -> torch.FloatTensor: + """Compute the log probabilities of the given labels under the given logits. + + Args: + logits: Logits of the model (unnormalized). Shape: (batch_size, sequence_length, vocab_size) + labels: + Labels for which to compute the log probabilities. Label tokens with a value of label_pad_token_id are + ignored. Shape: (batch_size, sequence_length) + average_log_prob: + If True, return the average log probability per (non-masked) token. Otherwise, return the sum of the + log probabilities of the (non-masked) tokens. + label_pad_token_id: The label pad token id. + is_encoder_decoder: Whether the model is an encoder-decoder model. + + Returns: + A tensor of shape (batch_size,) containing the average/sum log probabilities of the given labels under the + given logits. + """ + if logits.shape[:-1] != labels.shape: + raise ValueError("Logits (batch and sequence length dim) and labels must have the same shape.") + + if not is_encoder_decoder: + labels = labels[:, 1:].clone() + logits = logits[:, :-1, :] + loss_mask = labels != label_pad_token_id + + # dummy token; we'll ignore the losses on these tokens later + labels = torch.where(labels == label_pad_token_id, 0, labels) + + per_token_logps = selective_log_softmax(logits, labels) + + if average_log_prob: + return (per_token_logps * loss_mask).sum(-1) / loss_mask.sum(-1) + else: + return (per_token_logps * loss_mask).sum(-1) + + def concatenated_forward( + self, model: nn.Module, batch: dict[str, Union[list, torch.LongTensor]] + ) -> tuple[torch.FloatTensor, torch.FloatTensor, torch.FloatTensor, torch.FloatTensor]: + """Run the given model on the given batch of inputs, concatenating the chosen and rejected inputs together. + + We do this to avoid doing two forward passes, because it's faster for FSDP. + """ + concatenated_batch = self.concatenated_inputs( + batch, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + padding_value=self.padding_value, + device=self.accelerator.device, + ) + len_chosen = batch["chosen_labels"].shape[0] + + model_kwargs = ( + { + "decoder_input_ids": self._shift_right(concatenated_batch["concatenated_labels"]), + } + if self.is_encoder_decoder + else {} + ) + + if self.aux_loss_enabled: + model_kwargs["output_router_logits"] = True + + outputs = model( + concatenated_batch["concatenated_input_ids"], + attention_mask=concatenated_batch["concatenated_attention_mask"], + use_cache=False, + **model_kwargs, + ) + all_logits = outputs.logits + + def cross_entropy_loss(logits, labels): + if not self.is_encoder_decoder: + # Shift so that tokens < n predict n + logits = logits[..., :-1, :].contiguous() + labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = nn.CrossEntropyLoss() + logits = logits.view(-1, logits.shape[-1]) + labels = labels.view(-1) + # Enable model parallelism + labels = labels.to(logits.device) + loss = loss_fct(logits, labels) + return loss + + if self.is_encoder_decoder: + labels = concatenated_batch["concatenated_labels"].clone() + else: + labels = concatenated_batch["concatenated_input_ids"].clone() + attention_mask = concatenated_batch["concatenated_attention_mask"] + labels = torch.where(attention_mask == 1, labels, self.label_pad_token_id) + # orpo chosen nll loss is computed over the full prompt and response + chosen_nll_loss = cross_entropy_loss(all_logits[:len_chosen], labels[:len_chosen]) + + all_logps = self.get_batch_logps( + all_logits, + concatenated_batch["concatenated_labels"], + average_log_prob=True, + is_encoder_decoder=self.is_encoder_decoder, + label_pad_token_id=self.label_pad_token_id, + ) + + chosen_logps = all_logps[:len_chosen] + rejected_logps = all_logps[len_chosen:] + + if not self.is_encoder_decoder: + chosen_logits = all_logits[:len_chosen, :-1, :] + rejected_logits = all_logits[len_chosen:, :-1, :] + else: + chosen_logits = all_logits[:len_chosen] + rejected_logits = all_logits[len_chosen:] + + if self.aux_loss_enabled: + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_nll_loss, outputs.aux_loss) + + return (chosen_logps, rejected_logps, chosen_logits, rejected_logits, chosen_nll_loss) + + def get_batch_loss_metrics( + self, + model, + batch: dict[str, Union[list, torch.LongTensor]], + train_eval: Literal["train", "eval"] = "train", + ): + """Compute the ORPO loss and other metrics for the given batch of inputs for train or test.""" + metrics = {} + + forward_output = self.concatenated_forward(model, batch) + ( + policy_chosen_logps, + policy_rejected_logps, + policy_chosen_logits, + policy_rejected_logits, + policy_nll_loss, + ) = forward_output[:5] + if self.aux_loss_enabled: + aux_loss = forward_output[5] + + losses, chosen_rewards, rejected_rewards, log_odds_ratio, log_odds_chosen = self.odds_ratio_loss( + policy_chosen_logps, policy_rejected_logps + ) + # full ORPO loss + loss = policy_nll_loss - losses.mean() + + reward_accuracies = (chosen_rewards > rejected_rewards).float() + + prefix = "eval_" if train_eval == "eval" else "" + metrics[f"{prefix}rewards/chosen"] = self.accelerator.gather_for_metrics(chosen_rewards).mean() + metrics[f"{prefix}rewards/rejected"] = self.accelerator.gather_for_metrics(rejected_rewards).mean() + metrics[f"{prefix}rewards/accuracies"] = self.accelerator.gather_for_metrics(reward_accuracies).mean() + metrics[f"{prefix}rewards/margins"] = self.accelerator.gather_for_metrics( + chosen_rewards - rejected_rewards + ).mean() + metrics[f"{prefix}logps/rejected"] = self.accelerator.gather_for_metrics(policy_rejected_logps).detach().mean() + metrics[f"{prefix}logps/chosen"] = self.accelerator.gather_for_metrics(policy_chosen_logps).detach().mean() + metrics[f"{prefix}logits/rejected"] = self.accelerator.gather_for_metrics( + policy_rejected_logits.detach().mean() + ).mean() + metrics[f"{prefix}logits/chosen"] = self.accelerator.gather_for_metrics( + policy_chosen_logits.detach().mean() + ).mean() + metrics[f"{prefix}nll_loss"] = self.accelerator.gather_for_metrics(policy_nll_loss).detach().mean() + metrics[f"{prefix}log_odds_ratio"] = self.accelerator.gather_for_metrics(log_odds_ratio).detach().mean() + metrics[f"{prefix}log_odds_chosen"] = self.accelerator.gather_for_metrics(log_odds_chosen).detach().mean() + if is_torch_xla_available(): + xm.mark_step() # needed because .item() calls + for k, v in metrics.items(): + metrics[k] = v.item() + if self.aux_loss_enabled: + loss += self.aux_loss_coef * aux_loss + + return loss, metrics + + def compute_loss( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + return_outputs=False, + num_items_in_batch=None, + ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, torch.Tensor]]]: + compute_loss_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with compute_loss_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="train") + + # Make sure to move the loss to the device the original accumulating loss is at back in the `Trainer` class: + loss = loss.to(self.args.device) + + # force log the metrics + self.store_metrics(metrics, train_eval="train") + + if return_outputs: + return (loss, metrics) + return loss + + def generate_from_model(self, model, batch: dict[str, torch.LongTensor]) -> str: + """Generate samples from the model and reference model for the given batch of inputs.""" + + # If one uses `generate_during_eval` with peft + bf16, we need to explicitly call generate with + # the torch amp context manager as some hidden states are silently casted to full precision. + generate_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with generate_context_manager: + policy_output = model.generate( + input_ids=batch["prompt_input_ids"], + attention_mask=batch["prompt_attention_mask"], + max_length=self.max_length, + do_sample=True, + pad_token_id=self.processing_class.pad_token_id, + ) + + policy_output = pad_to_length(policy_output, self.max_length, self.processing_class.pad_token_id) + policy_output_decoded = self.processing_class.batch_decode(policy_output, skip_special_tokens=True) + + return policy_output_decoded + + def prediction_step( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[list[str]] = None, + ): + if not self.use_dpo_data_collator: + logger.warning( + "prediction_step is only implemented for DPODataCollatorWithPadding, and you passed a datacollator that is different than " + "DPODataCollatorWithPadding - you might see unexpected behavior. Alternatively, you can implement your own prediction_step method if you are using a custom data collator" + ) + if ignore_keys is None: + if hasattr(model, "config"): + ignore_keys = getattr(model.config, "keys_to_ignore_at_inference", []) + else: + ignore_keys = [] + + prediction_context_manager = ( + autocast(self.accelerator.device.type) if self._peft_has_been_casted_to_bf16 else nullcontext() + ) + + with torch.no_grad(), prediction_context_manager: + loss, metrics = self.get_batch_loss_metrics(model, inputs, train_eval="eval") + + # force log the metrics + self.store_metrics(metrics, train_eval="eval") + + if prediction_loss_only: + return (loss.detach(), None, None) + + # logits for the chosen and rejected samples from model + logits_dict = { + "eval_logits/chosen": metrics["eval_logits/chosen"], + "eval_logits/rejected": metrics["eval_logits/rejected"], + } + logits = [v for k, v in logits_dict.items() if k not in ignore_keys] + logits = torch.tensor(logits, device=self.accelerator.device) + labels = torch.zeros(logits.shape[0], device=self.accelerator.device) + + return (loss.detach(), logits, labels) + + def store_metrics(self, metrics: dict[str, float], train_eval: Literal["train", "eval"] = "train") -> None: + for key, value in metrics.items(): + self._stored_metrics[train_eval][key].append(value) + + def evaluation_loop( + self, + dataloader: DataLoader, + description: str, + prediction_loss_only: Optional[bool] = None, + ignore_keys: Optional[list[str]] = None, + metric_key_prefix: str = "eval", + ) -> EvalLoopOutput: + """ + Overriding built-in evaluation loop to store metrics for each batch. Prediction/evaluation loop, shared by + `Trainer.evaluate()` and `Trainer.predict()`. + + Works both with or without labels. + """ + + # Sample and save to game log if requested (for one batch to save time) + if self.generate_during_eval: + # Generate random indices within the range of the total number of samples + num_samples = len(dataloader.dataset) + random_indices = random.sample(range(num_samples), k=self.args.eval_batch_size) + + # Use dataloader.dataset.select to get the random batch without iterating over the DataLoader + random_batch_dataset = dataloader.dataset.select(random_indices) + random_batch = self.data_collator(random_batch_dataset) + random_batch = self._prepare_inputs(random_batch) + + policy_output_decoded = self.generate_from_model(self.model, random_batch) + + table = pd.DataFrame( + columns=["Prompt", "Policy"], + data=[ + [prompt, pol[len(prompt) :]] for prompt, pol in zip(random_batch["prompt"], policy_output_decoded) + ], + ) + if "wandb" in self.args.report_to: + wandb.log({"game_log": wandb.Table(data=table)}) + + if "comet_ml" in self.args.report_to: + log_table_to_comet_experiment( + name="game_log.csv", + table=table, + ) + + # Base evaluation + initial_output = super().evaluation_loop( + dataloader, description, prediction_loss_only, ignore_keys, metric_key_prefix + ) + + return initial_output + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + """ + Log `logs` on the various objects watching training, including stored metrics. + + Args: + logs (`dict[str, float]`): + The values to log. + start_time (`float` or `None`, *optional*, defaults to `None`): + Start time of the training. + """ + # logs either has 'loss' or 'eval_loss' + train_eval = "train" if "loss" in logs else "eval" + # Add averaged stored metrics to logs + for key, metrics in self._stored_metrics[train_eval].items(): + logs[key] = torch.tensor(metrics).mean().item() + del self._stored_metrics[train_eval] + return super().log(logs, start_time) + + def _shift_right(self, input_ids): + if self.decoder_start_token_id is None: + raise ValueError( + "model.config.decoder_start_token_id has to be defined. It is usually set to the pad_token_id." + ) + + # shift inputs to the right + if is_torch_fx_proxy(input_ids): + # Item assignment is not supported natively for proxies. + shifted_input_ids = torch.full(input_ids.shape[:-1] + (1,), self.decoder_start_token_id) + shifted_input_ids = torch.cat([shifted_input_ids, input_ids[..., :-1]], dim=-1) + else: + shifted_input_ids = input_ids.new_zeros(input_ids.shape) + shifted_input_ids[..., 1:] = input_ids[..., :-1].clone() + shifted_input_ids[..., 0] = self.decoder_start_token_id + + if self.pad_token_id is None: + raise ValueError("model.config.pad_token_id has to be defined.") + # replace possible -100 values in labels by `pad_token_id` + shifted_input_ids.masked_fill_(shifted_input_ids == -100, self.pad_token_id) + + return shifted_input_ids + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{hong2024orpo, + title = {{ORPO: Monolithic Preference Optimization without Reference Model}}, + author = {Jiwoo Hong and Noah Lee and James Thorne}, + year = 2024, + eprint = {arXiv:2403.07691} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="ORPO", + trainer_citation=citation, + paper_title="ORPO: Monolithic Preference Optimization without Reference Model", + paper_id="2403.07691", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothORPOTrainer(_UnslothORPOTrainer): + """ + +Initialize ORPOTrainer. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForSequenceClassification`. + args (`ORPOConfig`): + The ORPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + + """ + def __init__( + self, + model = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + model_init = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + compute_metrics = None, + **kwargs + ): + if args is None: args = UnslothORPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('orpo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + model_init = model_init, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config, + compute_metrics = compute_metrics,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py b/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..a2306bf2979e6a94198c72cc7c936d58238cc0e8 --- /dev/null +++ b/unsloth_compiled_cache/UnslothOnlineDPOTrainer.py @@ -0,0 +1,2403 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.online_dpo_trainer import (Any, AutoModelForCausalLM, AutoModelForSequenceClassification, AutoTokenizer, BasePairwiseJudge, Callable, DPODataCollatorWithPadding, DataCollator, DataLoader, Dataset, EvalPrediction, F, FSDP, GenerationConfig, IterableDataset, MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES, OnlineDPOConfig, OnlineDPOTrainer, OptimizerNames, Optional, Path, PeftConfig, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, RewardFunc, SIMPLE_CHAT_TEMPLATE, Trainer, TrainerCallback, Union, VLLMClient, apply_chat_template, broadcast_object_list, create_reference_model, disable_dropout_in_model, empty_cache, gather_object, generate_model_card, get_comet_experiment_url, is_conversational, is_flash_attn_2_available, is_peft_model, is_vllm_available, is_wandb_available, jinja2, logger, logging, maybe_apply_chat_template, nn, nullcontext, os, pad, prepare_deepspeed, prepare_peft_model, profiling_context, re, seed_worker, textwrap, torch, truncate_right, unwrap_model_for_generation, version, warnings, wraps, F, apply_chat_template, is_conversational, re, F, FSDP, is_peft_model, nn, nullcontext, os, re, version, F, Optional, PreTrainedModel, Trainer, logger, os, re, torch, F, FSDP, nn, os, re, F, FSDP, nn, re, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +def vLLMSamplingParams(**kwargs): + from vllm import SamplingParams + sampling_params = SamplingParams(**kwargs) + sampling_params._set_kwargs = kwargs + return sampling_params +@dataclass +class UnslothOnlineDPOConfig(OnlineDPOConfig): + """ + +Configuration class for the [`OnlineDPOTrainer`]. + +This class includes only the parameters that are specific to Online DPO training. For a full list of training +arguments, please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this +class may differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + reward_model_path (`str` or `None`, *optional*, defaults to `None`): + Path to the reward model. Either `judge` or `reward_model_path` must be set, but not both. + judge (`str` or `None`, *optional*, defaults to `None`): + Name of the judge to use. Either `judge` or `reward_model_path` must be set, but not both. + max_new_tokens (`int`, *optional*, defaults to `64`): + Maximum number of tokens to generate per completion. + max_length (`int`, *optional*, defaults to `256`): + Maximum total length of the sequence (prompt + completion) used to compute log probabilities. If the + sequence exceeds this limit, the leftmost tokens will be truncated to preserve as much of the completion as + possible. + temperature (`float`, *optional*, defaults to `0.9`): + Temperature for sampling. The higher the temperature, the more random the completions. + missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`): + Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to + generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive + value. This parameter only works when using `reward_funcs` and not when using `judge`. + beta (`float` or `list[float]`, *optional*, defaults to `0.1`): + Parameter controlling the deviation from the reference model. Higher β means less deviation from the + reference model. For the IPO loss (`loss_type="ipo"`), β is the regularization parameter denoted by τ in + the [paper](https://huggingface.co/papers/2310.12036). If a list of floats is provided then the β is + selected for each new epoch and the last β is used for the rest of the epochs. + loss_type (`str`, *optional*, defaults to `"sigmoid"`): + Type of loss to use. Possible values are: + + - `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper. + - `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper. + + dataset_num_proc (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model and reference model. + + > Parameters that control generation + + top_p (`float`, *optional*, defaults to `1.0`): + Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to + `1.0` to consider all tokens. + top_k (`int` or `None`, *optional*, defaults to `None`): + Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is + disabled and all tokens are considered. + min_p (`float` or `None`, *optional*, defaults to `None`): + Minimum token probability, which will be scaled by the probability of the most likely token. It must be a + value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range. + repetition_penalty (`float`, *optional*, defaults to `1.0`): + Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far. + Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat + tokens. + use_transformers_paged (`bool`, *optional*, defaults to `False`): + Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers` + paged implementation will be used for generation instead of the default padded implementation. This + parameter is only effective when `use_vllm` is set to `False`. + cache_implementation (`str` or `None`, *optional*, defaults to `None`): + Implementation of the cache method for faster generation when `use_vllm` is set to `False`. + generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if + using vLLM) when sampling completions. This can be used to further customize the generation behavior, such + as setting `supress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation + parameters (like `min_p`, `top_p`, etc.), they will override them. + + > Parameters that control generation acceleration powered by vLLM + + use_vllm (`bool`, *optional*, defaults to `False`): + Whether to use vLLM for generating completions. If set to `True`, the trainer will use vLLM for generation + instead of the default model.generate(). Requires `vllm` to be installed. + vllm_model_impl (`str`, *optional*, defaults to `"vllm"`): + Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use + the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model + implementation. + vllm_mode (`str`, *optional*, defaults to `"server"`): + Mode to use for vLLM integration when `use_vllm` is set to `True`. Must be one of `"server"` or + `"colocate"`. + + - `"server"`: The trainer will send generation requests to a separate vLLM server. Make sure a TRL vLLM + server is running (start with `trl vllm-serve`). + - `"colocate"`: vLLM will run in the same process and share the training GPUs. This avoids the need for a + separate server but may cause resource contention with training. + vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`): + Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled. + + > Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`) + + vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`): + Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and + `vllm_server_port` are ignored. + vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`): + Host of the vLLM server to connect to. Ignored if `vllm_server_base_url` is provided. + vllm_server_port (`int`, *optional*, defaults to `8000`): + Port of the vLLM server to connect to. Ignored if `vllm_server_base_url` is provided. + vllm_server_timeout (`float`, *optional*, defaults to `240.0`): + Total timeout duration in seconds to wait for the vLLM server to be up. If the server is not up after the + timeout, a `ConnectionError` is raised. + + > Parameters that control colocated vLLM execution (only used when `vllm_mode` is `"colocate"`) + + vllm_gpu_memory_utilization (`float`, *optional*, defaults to `0.55`): + Control the GPU memory utilization for vLLM. This setting only applies when `vllm_mode` is set to + `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when + launching the vLLM server via the `--vllm_gpu_memory_utilization` flag. + vllm_tensor_parallel_size (`int`, *optional*, defaults to `1`): + Control the tensor parallel size for vLLM. This setting only applies when `vllm_mode` is set to + `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when + launching the vLLM server via the `--vllm_tensor_parallel_size` flag. + + > Other parameters + + ds3_gather_for_generation (`bool`, *optional*, defaults to `True`): + This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for generation, + improving generation speed. However, disabling this option allows training models that exceed the VRAM + capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible + with vLLM generation. + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a + string. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + reward_model_path = None, + judge = None, + max_new_tokens = 64, + max_length = 512, + temperature = 0.9, + top_p = 1.0, + top_k = None, + min_p = None, + repetition_penalty = 1.0, + generation_kwargs = {}, + use_transformers_paged = False, + cache_implementation = None, + missing_eos_penalty = None, + loss_type = 'sigmoid', + disable_dropout = True, + use_vllm = False, + vllm_model_impl = 'vllm', + vllm_guided_decoding_regex = None, + vllm_gpu_memory_utilization = 0.55, + vllm_mode = 'colocate', + vllm_server_base_url = None, + vllm_server_host = '0.0.0.0', + vllm_server_port = 8000, + vllm_server_timeout = 240.0, + vllm_tensor_parallel_size = 1, + ds3_gather_for_generation = True, + model_init_kwargs = None, + reward_weights = None, + dataset_num_proc = None, + gpu_memory_utilization = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + reward_model_path = reward_model_path, + judge = judge, + max_new_tokens = max_new_tokens, + max_length = max_length, + temperature = temperature, + top_p = top_p, + top_k = top_k, + min_p = min_p, + repetition_penalty = repetition_penalty, + generation_kwargs = generation_kwargs, + use_transformers_paged = use_transformers_paged, + cache_implementation = cache_implementation, + missing_eos_penalty = missing_eos_penalty, + loss_type = loss_type, + disable_dropout = disable_dropout, + use_vllm = use_vllm, + vllm_model_impl = vllm_model_impl, + vllm_guided_decoding_regex = vllm_guided_decoding_regex, + vllm_gpu_memory_utilization = vllm_gpu_memory_utilization, + vllm_mode = vllm_mode, + vllm_server_base_url = vllm_server_base_url, + vllm_server_host = vllm_server_host, + vllm_server_port = vllm_server_port, + vllm_server_timeout = vllm_server_timeout, + vllm_tensor_parallel_size = vllm_tensor_parallel_size, + ds3_gather_for_generation = ds3_gather_for_generation, + model_init_kwargs = model_init_kwargs, + reward_weights = reward_weights, + dataset_num_proc = dataset_num_proc, + gpu_memory_utilization = gpu_memory_utilization,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothOnlineDPOTrainer(Trainer): + r""" + Initialize OnlineDPOTrainer. + + Args: + model (`Union[str, nn.Module, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + ref_model (`transformers.PreTrainedModel` or `torch.nn.Module` or `None`): + The reference model to use for training. If None is specified, the reference model will be created from the + model. + judge (`BasePairwiseJudge`): + The judge to use for pairwise comparison of model completions. + reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*, defaults to `None`): + Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward + functions with the prompts and completions and sum the rewards. Can be either: + + - A single reward function: Can be a string (path to model), a [`~transformers.PreTrainedModel`], or a + custom callable function. + - A list of reward functions: Must all be of compatible types. + + Note: Only one of `judge`, or `reward_funcs` should be provided. + args (`OnlineDPOConfig`): + The online DPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + The dataset to use for training. + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`): + Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either: + + - A single processing class: Used when `reward_funcs` contains only one reward function. + - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`. + + If set to `None`, the tokenizer for each model-based reward function is automatically loaded using + [`~transformers.AutoTokenizer.from_pretrained`]. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + + .. deprecated:: 0.22.0 + The following parameters are deprecated and will be removed in a future version: + + * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`. + * `reward_processing_class`: Use `reward_processing_classes` instead. For example, change + `reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`. + """ + + _tag_names = ["trl", "online-dpo"] + + def __init__( + self, + model: Union[PreTrainedModel, nn.Module, str], + ref_model: Union[PreTrainedModel, nn.Module, None] = None, + reward_funcs: Optional[Union[RewardFunc, list[RewardFunc]]] = None, + judge: Optional[BasePairwiseJudge] = None, + args: Optional[OnlineDPOConfig] = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None, + processing_class: Optional[Union[PreTrainedTokenizerBase, ProcessorMixin]] = None, + reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None, + peft_config: Optional["PeftConfig"] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + # Deprecated parameters + reward_model: Optional[Union[PreTrainedModel, nn.Module]] = None, + reward_processing_class: Optional[PreTrainedTokenizerBase] = None, + ) -> None: + + if hasattr(model, 'vllm_engine') and hasattr(args, 'use_vllm'): + if (getattr(args, 'use_vllm', False) == False): + args.use_vllm = True + if ref_model is model: + raise ValueError( + "`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the " + "same as `model`, either omit the `ref_model` argument or pass `None`." + ) + + self.ref_model = ref_model + + # Handle deprecated parameters for backward compatibility + if reward_model is not None: + warnings.warn( + "The `reward_model` parameter is deprecated and will be removed in version 0.25.0. " + "Please use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`.", + ) + # Convert old reward_model to new reward_funcs format + if reward_funcs is None: + reward_funcs = reward_model + else: + warnings.warn( + "Both `reward_model` and `reward_funcs` are provided. Using `reward_funcs` and ignoring " + "`reward_model`.", + ) + + if reward_processing_class is not None: + warnings.warn( + "The `reward_processing_class` parameter is deprecated and will be removed in version 0.25.0. " + "Please use `reward_processing_classes` instead. For example, change " + "`reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`.", + ) + # Convert old reward_processing_class to new reward_processing_classes format + if reward_processing_classes is None: + reward_processing_classes = reward_processing_class + else: + warnings.warn( + "Both `reward_processing_class` and `reward_processing_classes` are provided. Using " + "`reward_processing_classes` and ignoring `reward_processing_class`.", + ) + + # Validate reward configuration - must have exactly one of: judge, or reward_funcs + reward_configs = sum(x is not None for x in [judge, reward_funcs]) + if reward_configs == 0: + raise ValueError("One of `judge` or `reward_funcs` must be provided.") + elif reward_configs > 1: + if judge is not None: + logger.warning( + "Both `judge` and `reward_funcs` are provided. Using `judge` and ignoring `reward_funcs`.", + UserWarning, + ) + reward_funcs = None + self.judge = judge + + # Handle reward_funcs + if reward_funcs is not None: + if not isinstance(reward_funcs, list): + reward_funcs = [reward_funcs] + self.reward_func_names = [] + + # Process reward functions [convert strings to models, collect names] + model_init_kwargs = args.model_init_kwargs or {} + for i, reward_func in enumerate(reward_funcs): + if isinstance(reward_func, str): + # Load model from string path + reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained( + reward_func, num_labels=1, **model_init_kwargs + ) + if isinstance(reward_funcs[i], nn.Module): + self.reward_func_names.append(reward_funcs[i].config._name_or_path.split("/")[-1]) + else: + self.reward_func_names.append(reward_funcs[i].__name__) + self.reward_funcs = reward_funcs + + # Handle reward processing classes for reward_funcs + if reward_processing_classes is None: + reward_processing_classes = [None] * len(reward_funcs) + elif not isinstance(reward_processing_classes, list): + reward_processing_classes = [reward_processing_classes] + else: + if len(reward_processing_classes) != len(reward_funcs): + raise ValueError( + "The number of reward processing classes must match the number of reward functions." + ) + + self.reward_processing_classes = [] + for reward_processing_class_i, reward_func in zip(reward_processing_classes, reward_funcs): + if isinstance(reward_func, PreTrainedModel): + if reward_processing_class_i is None: + reward_processing_class_i = AutoTokenizer.from_pretrained(reward_func.config._name_or_path) + if reward_processing_class_i.pad_token_id is None: + reward_processing_class_i.pad_token = reward_processing_class_i.eos_token + # Set pad token ID on reward model config + reward_func.config.pad_token_id = reward_processing_class_i.pad_token_id + self.reward_processing_classes.append(reward_processing_class_i) + else: + self.reward_funcs = None + self.reward_func_names = [] + self.reward_processing_classes = [] + + # Handle reward_weights + if reward_funcs is not None: + if args.reward_weights is not None: + if len(args.reward_weights) != len(self.reward_funcs): + raise ValueError( + f"Number of reward weights ({len(args.reward_weights)}) must match number of reward " + f"functions ({len(self.reward_funcs)})" + ) + self.reward_weights = torch.tensor(args.reward_weights, dtype=torch.float32) + else: + self.reward_weights = torch.ones(len(self.reward_funcs), dtype=torch.float32) + else: + self.reward_weights = None + + if args.missing_eos_penalty is not None and reward_funcs is None and judge is None: + # Check if this is the old reward_model case + if reward_model is not None: + logger.warning( + "The `missing_eos_penalty` parameter is deprecated when used with the deprecated `reward_model` parameter. " + "Please use `reward_funcs` instead of `reward_model` to continue using this feature.", + DeprecationWarning, + stacklevel=2, + ) + else: + raise ValueError("`missing_eos_penalty` is only supported when `reward_funcs` is provided.") + + if args is None: + raise ValueError("`args` must be provided.") + + # Check that the processing_class is provided + if processing_class is None: + raise ValueError("`processing_class` must be provided.") + + model_init_kwargs = args.model_init_kwargs or {} + if isinstance(model, str): + model_id = model + + # Handle dtype in model_init_kwargs + dtype = model_init_kwargs.get("dtype") + if isinstance(dtype, torch.dtype) or dtype == "auto" or dtype is None: + pass + elif isinstance(dtype, str): + dtype = getattr(torch, dtype) + model_init_kwargs["dtype"] = dtype + else: + raise ValueError( + "Invalid `dtype` passed to `OnlineDPOConfig`. Expected either 'auto' or a string " + f"representing a `torch.dtype` (e.g., 'float32'), but got {dtype}." + ) + + model = AutoModelForCausalLM.from_pretrained(model_id, **model_init_kwargs) + else: + if args.model_init_kwargs is not None: + raise ValueError( + "You passed `model_init_kwargs` to the `OnlineDPOConfig`, but your model is already instantiated. " + "This argument can only be used when the `model` argument is a string." + ) + self.is_encoder_decoder = model.config.is_encoder_decoder + self.is_vision_model = model.config.model_type in MODEL_FOR_IMAGE_TEXT_TO_TEXT_MAPPING_NAMES.keys() + + if False: + model = prepare_peft_model(model, peft_config, args) + + # Enable gradient checkpointing if requested + if args.gradient_checkpointing: + model = self._enable_gradient_checkpointing(model, args) + + # Disable dropout in the model and reference model + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) + + # Handle the ref_model + # Usually, the user wants the ref model to be the initial version of the model. When using PEFT, it's easy to + # get the ref model, as it's just the model with a disabled adapter. When not using PEFT, we need to create + # the ref model from the model by copying it and disable the gradients and set it in evaluation mode. + if ref_model is None: # No ref model provided, the most common case + if False: + self.ref_model = create_reference_model(model) # copy, disable gradients, set eval mode + else: + self.ref_model = None # we don't need a ref model here, we can just disable the adapter. + else: # rare case, the user provided a ref model + self.ref_model = ref_model + self.ref_model.eval() + + # Disable the gradient and set the reward model in eval mode + if reward_funcs is not None: + for reward_func in reward_funcs: + if isinstance(reward_func, PreTrainedModel): + reward_func.eval() + + self.max_length = args.max_length + + self.stats = { + "objective/kl": [], + "objective/entropy": [], + "objective/non_score_reward": [], + "rewards/chosen": [], + "rewards/rejected": [], + "rewards/accuracies": [], + "rewards/margins": [], + "logps/chosen": [], + "logps/rejected": [], + "val/contain_eos_token": [], + "beta": [], + } + if self.reward_funcs is not None: + self.stats["objective/rlhf_reward"] = [] + self.stats["objective/scores_margin"] = [] + self.stats["objective/scores"] = [] + + # Store generation parameters for later use + self.use_vllm = args.use_vllm + self.num_generations = 2 # Generate 2 completions per prompt for Online DPO + self.temperature = args.temperature + self.top_p = args.top_p + self.top_k = args.top_k + self.min_p = args.min_p + self.repetition_penalty = args.repetition_penalty + self.use_transformers_paged = args.use_transformers_paged + self.vllm_mode = args.vllm_mode if args.use_vllm else None + self.vllm_gpu_memory_utilization = args.vllm_gpu_memory_utilization + self.vllm_tensor_parallel_size = args.vllm_tensor_parallel_size + self.vllm_model_impl = args.vllm_model_impl + + # Handle pad token for processors or tokenizers + if isinstance(processing_class, ProcessorMixin): + tokenizer = processing_class.tokenizer + elif isinstance(processing_class, PreTrainedTokenizerBase): + tokenizer = processing_class + else: + raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + self.pad_token = tokenizer.pad_token + self.pad_token_id = tokenizer.pad_token_id + self.eos_token_id = tokenizer.eos_token_id + + # Vision tokens for VLM support + self.image_token_id = getattr(processing_class, "image_token_id", None) + self.vision_start_token_id = getattr(processing_class, "vision_start_token_id", None) + self.vision_end_token_id = getattr(processing_class, "vision_end_token_id", None) + # Get the image token string for token collapsing + self.image_token = None + if self.image_token_id is not None: + self.image_token = tokenizer.decode([self.image_token_id]) + + # Define the collator if not provided + if data_collator is None: + data_collator = DPODataCollatorWithPadding(pad_token_id=self.pad_token_id) + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in Online DPO, the sampled data does not include + # the "input_ids" key. As a result, the trainer issues the warning: "Could not estimate the number of tokens + # of the input, floating-point operations will not be computed." To suppress this warning, we set the + # "estimate_tokens" key in the model's "warnings_issued" dictionary to True. This acts as a flag to indicate + # that the warning has already been issued. + model.warnings_issued["estimate_tokens"] = True + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + self._beta = args.beta + + # Set up generation configuration and vLLM after super[].__init__ + if self.use_vllm: + if not is_vllm_available(): + raise ImportError( + "vLLM is not available and `use_vllm` is set to True. Please install vLLM with " + "`pip install vllm` to use it." + ) + + if self.vllm_mode == "server": + if self.accelerator.is_main_process: + if args.vllm_server_base_url is not None: + base_url = args.vllm_server_base_url + else: + base_url = f"http://{args.vllm_server_host}:{args.vllm_server_port}" + self.vllm_client = VLLMClient(base_url=base_url, connection_timeout=args.vllm_server_timeout) + self.vllm_client.init_communicator(device=torch.cuda.current_device()) + else: + self.vllm_client = None + elif self.vllm_mode == "colocate": + vllm_kwargs = { + "model": model.name_or_path, + "tensor_parallel_size": self.vllm_tensor_parallel_size, + "gpu_memory_utilization": self.vllm_gpu_memory_utilization, + "model_impl": self.vllm_model_impl, + "max_num_seqs": self.args.per_device_train_batch_size * self.vllm_tensor_parallel_size, + "max_model_len": args.max_length + args.max_new_tokens, + "distributed_executor_backend": "external_launcher", + "seed": self.accelerator.process_index // self.vllm_tensor_parallel_size, + "max_num_batched_tokens": 4096, + } + os.environ["RANK"] = str(self.accelerator.process_index) + os.environ["LOCAL_RANK"] = str(self.accelerator.local_process_index) + os.environ["WORLD_SIZE"] = str(self.accelerator.num_processes) + os.environ["MASTER_ADDR"] = os.environ.get("MASTER_ADDR", "localhost") + os.environ["MASTER_PORT"] = os.environ.get("MASTER_PORT", "12345") + + self.llm = model.vllm_engine + else: + raise ValueError(f"vllm_mode must be either 'server' or 'colocate', got '{self.vllm_mode}'.") + self.guided_decoding_regex = args.vllm_guided_decoding_regex + self._last_loaded_step = -1 + generation_params = { + "n": 2, + "repetition_penalty": self.repetition_penalty, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": -1 if self.top_k is None else self.top_k, + "min_p": 0.0 if self.min_p is None else self.min_p, + "max_tokens": args.max_new_tokens, + "detokenize": False, + } + if args.generation_kwargs is not None: + generation_params.update(args.generation_kwargs) + if self.guided_decoding_regex: + generation_params["guided_decoding"] = GuidedDecodingParams(regex=self.guided_decoding_regex) + self.generation_config = SamplingParams(**generation_params) + self.accelerator.wait_for_everyone() + else: + # Set up transformers generation config + generation_kwargs = { + "max_new_tokens": args.max_new_tokens, + "do_sample": True, + "pad_token_id": self.pad_token_id, + "bos_token_id": tokenizer.bos_token_id, + "eos_token_id": self.eos_token_id, + "temperature": self.temperature, + "top_k": self.top_k, + "top_p": self.top_p, + "repetition_penalty": self.repetition_penalty, + "use_cache": True if not self.args.gradient_checkpointing else False, + } + # Add min_p if supported + if self.min_p is not None: + generation_kwargs["min_p"] = self.min_p + if args.generation_kwargs is not None: + generation_kwargs.update(args.generation_kwargs) + if self.use_transformers_paged: + generation_kwargs["max_batch_tokens"] = 512 + generation_kwargs["num_blocks"] = 1024 + generation_kwargs["block_size"] = 128 + # Remove None values + generation_kwargs = {k: v for k, v in generation_kwargs.items() if v is not None} + self.generation_config = GenerationConfig(**generation_kwargs) + + if self.is_deepspeed_enabled: + if self.ref_model is not None: + self.ref_model = prepare_deepspeed( + self.ref_model, args.per_device_train_batch_size, args.fp16, args.bf16 + ) + # Prepare reward function models for DeepSpeed + if self.reward_funcs is not None: + for i, reward_func in enumerate(self.reward_funcs): + if isinstance(reward_func, PreTrainedModel): + self.reward_funcs[i] = prepare_deepspeed(reward_func, self.accelerator) + else: + if self.ref_model is not None: + self.ref_model = self.ref_model.to(self.accelerator.device) + # Prepare reward function models for FSDP/regular training + if self.reward_funcs is not None: + for i, reward_func in enumerate(self.reward_funcs): + if isinstance(reward_func, PreTrainedModel): + # Set device placement to True to make `prepare_model` move `reward_func` to device when using fsdp + self.reward_funcs[i] = self.accelerator.prepare_model( + reward_func, evaluation_mode=True, device_placement=True + ) + + @property + def beta(self): + if isinstance(self._beta, list): + epoch = self.state.epoch + return self._beta[epoch] if epoch < len(self._beta) else self._beta[-1] + else: + return self._beta + + @staticmethod + def tokenize_row(feature, is_encoder_decoder: bool, tokenizer: PreTrainedTokenizerBase) -> dict[str, Any]: + """Tokenize a single row from a DPO specific dataset.""" + if not is_encoder_decoder: + batch = tokenizer(feature["prompt"], add_special_tokens=False) + # Add BOS token to head of prompt. Avoid adding if it's already there + if tokenizer.bos_token_id is not None: + prompt_len_input_ids = len(batch["input_ids"]) + if prompt_len_input_ids == 0 or tokenizer.bos_token_id != batch["input_ids"][0]: + batch["input_ids"] = [tokenizer.bos_token_id] + batch["input_ids"] + batch["attention_mask"] = [1] + batch["attention_mask"] + else: + batch = tokenizer(feature["prompt"], add_special_tokens=True) + batch = {f"prompt_{key}": value for key, value in batch.items()} + return batch + + # Same as Trainer.get_train_dataloader but skip the "remove_unused_columns". + @wraps(Trainer.get_train_dataloader) + def get_train_dataloader(self) -> DataLoader: + if self.train_dataset is None: + raise ValueError("Trainer: training requires a train_dataset.") + + train_dataset = self.train_dataset + data_collator = self.data_collator + dataloader_params = { + "batch_size": self._train_batch_size, + "collate_fn": data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "persistent_workers": self.args.dataloader_persistent_workers, + } + + if not isinstance(train_dataset, torch.utils.data.IterableDataset): + dataloader_params["sampler"] = self._get_train_sampler() + dataloader_params["drop_last"] = self.args.dataloader_drop_last + dataloader_params["worker_init_fn"] = seed_worker + dataloader_params["prefetch_factor"] = self.args.dataloader_prefetch_factor + + return self.accelerator.prepare(DataLoader(train_dataset, **dataloader_params)) + + # Same as Trainer.get_eval_dataloader but skip the "remove_unused_columns". + @wraps(Trainer.get_eval_dataloader) + def get_eval_dataloader(self, eval_dataset: Optional[Union[str, Dataset]] = None) -> DataLoader: + if eval_dataset is None and self.eval_dataset is None: + raise ValueError("Trainer: evaluation requires an eval_dataset.") + + # If we have persistent workers, don't do a fork bomb especially as eval datasets + # don't change during training + dataloader_key = eval_dataset if isinstance(eval_dataset, str) else "eval" + if ( + hasattr(self, "_eval_dataloaders") + and dataloader_key in self._eval_dataloaders + and self.args.dataloader_persistent_workers + ): + return self.accelerator.prepare(self._eval_dataloaders[dataloader_key]) + + eval_dataset = ( + self.eval_dataset[eval_dataset] + if isinstance(eval_dataset, str) + else eval_dataset + if eval_dataset is not None + else self.eval_dataset + ) + data_collator = self.data_collator + + dataloader_params = { + "batch_size": self.args.eval_batch_size, + "collate_fn": data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "persistent_workers": self.args.dataloader_persistent_workers, + } + + if not isinstance(eval_dataset, torch.utils.data.IterableDataset): + dataloader_params["sampler"] = self._get_eval_sampler(eval_dataset) + dataloader_params["drop_last"] = self.args.dataloader_drop_last + dataloader_params["prefetch_factor"] = self.args.dataloader_prefetch_factor + + # accelerator.free_memory() will destroy the references, so + # we need to store the non-prepared version + eval_dataloader = DataLoader(eval_dataset, **dataloader_params) + if self.args.dataloader_persistent_workers: + if hasattr(self, "_eval_dataloaders"): + self._eval_dataloaders[dataloader_key] = eval_dataloader + else: + self._eval_dataloaders = {dataloader_key: eval_dataloader} + + return self.accelerator.prepare(eval_dataloader) + + def _enable_gradient_checkpointing(self, model: PreTrainedModel, args: OnlineDPOConfig) -> PreTrainedModel: + """Enables gradient checkpointing for the model.""" + # Ensure use_cache is disabled + model.config.use_cache = False + + # Enable gradient checkpointing on the base model for PEFT + if is_peft_model(model): + model.base_model.gradient_checkpointing_enable() + # Enable gradient checkpointing for non-PEFT models + else: + model.gradient_checkpointing_enable() + + gradient_checkpointing_kwargs = args.gradient_checkpointing_kwargs or {} + use_reentrant = ( + "use_reentrant" not in gradient_checkpointing_kwargs or gradient_checkpointing_kwargs["use_reentrant"] + ) + + if use_reentrant: + model.enable_input_require_grads() + + return model + + def _generate_vllm(self, prompts, images=None): + eos_token_id = self.eos_token_id + pad_token_id = self.pad_token_id + + # Generate completion_ids and prompt_ids based on mode + if self.vllm_mode == "server": + completion_ids, prompt_ids = self._generate_vllm_server(prompts, images) + elif self.vllm_mode == "colocate": + completion_ids, prompt_ids = self._generate_vllm_colocate(prompts, images) + + # Shared padding, masking, and tensor conversion logic + max_prompt_length = max(len(ids) for ids in prompt_ids) + prompt_mask = [[0] * (max_prompt_length - len(ids)) + [1] * len(ids) for ids in prompt_ids] + prompt_ids = [[pad_token_id] * (max_prompt_length - len(ids)) + ids for ids in prompt_ids] + max_tokens = self.generation_config.max_tokens + completion_mask = [[1] * len(ids) + [0] * (max_tokens - len(ids)) for ids in completion_ids] + completion_ids = [ + ids + [eos_token_id] if ids[-1] != eos_token_id and len(ids) < max_tokens else ids + for ids in completion_ids + ] + completion_ids = [ids + [pad_token_id] * (max_tokens - len(ids)) for ids in completion_ids] + + # Convert to tensors + prompt_ids = torch.tensor(prompt_ids, device=self.accelerator.device) + prompt_mask = torch.tensor(prompt_mask, device=self.accelerator.device) + completion_ids = torch.tensor(completion_ids, device=self.accelerator.device) + completion_mask = torch.tensor(completion_mask, device=self.accelerator.device) + + return prompt_ids, prompt_mask, completion_ids, completion_mask + + def _generate_vllm_server(self, prompts, images=None): + """Generate completions using vLLM server mode""" + has_images = images is not None + + # Update vLLM server weights if needed + if hasattr(self, "_last_loaded_step") and self.state.global_step != self._last_loaded_step: + self._move_model_to_vllm() + self._last_loaded_step = self.state.global_step + elif not hasattr(self, "_last_loaded_step"): + self._move_model_to_vllm() + self._last_loaded_step = self.state.global_step + + # Apply chat template if conversational + if is_conversational({"prompt": prompts[0]}): + prompts_text = [apply_chat_template({"prompt": p}, self.processing_class)["prompt"] for p in prompts] + else: + prompts_text = prompts + # Gather all prompts to main process + all_prompts = gather_object(prompts_text) + if has_images: + all_images = gather_object(images) + + if self.accelerator.is_main_process: + # Since 'prompts' contains 'num_generations' duplicates, we first take unique prompts, and generate + # num_generations outputs for each one. This is faster than generating outputs for each duplicate + # prompt individually. + ordered_set_of_prompts = all_prompts[:: self.num_generations] + if has_images: + ordered_set_of_images = all_images[:: self.num_generations] + else: + ordered_set_of_images = None + completion_ids = self.vllm_client.generate( + prompts=ordered_set_of_prompts, + images=ordered_set_of_images, + n=self.num_generations, + repetition_penalty=self.repetition_penalty, + temperature=self.temperature, + top_p=self.top_p, + top_k=-1 if self.top_k is None else self.top_k, + min_p=0.0 if self.min_p is None else self.min_p, + max_tokens=self.generation_config.max_tokens, + guided_decoding_regex=self.guided_decoding_regex if hasattr(self, "guided_decoding_regex") else None, + generation_kwargs=self.args.generation_kwargs, + ) + # Flatten: each prompt generates 2 completions + completion_ids = [[comp_id] for prompt_completions in completion_ids for comp_id in prompt_completions] + else: + completion_ids = [None] * (len(all_prompts) * 2) + + # Broadcast completions to all processes + completion_ids = broadcast_object_list(completion_ids, from_process=0) + + # Each process takes its slice + process_slice = slice( + self.accelerator.process_index * len(prompts) * 2, + (self.accelerator.process_index + 1) * len(prompts) * 2, + ) + completion_ids = completion_ids[process_slice] + + # Create prompt_ids by tokenizing locally + prompt_inputs = self.processing_class( + text=prompts_text, + return_tensors="pt", + padding=True, + padding_side="left", + add_special_tokens=False, + ) + prompt_ids = [] + for prompt_tokens in prompt_inputs["input_ids"]: + prompt_ids.extend([prompt_tokens.tolist(), prompt_tokens.tolist()]) # 2 copies for 2 completions + return completion_ids, prompt_ids + + def _generate_vllm_colocate(self, prompts, images=None): + """Generate completions using vLLM colocate mode""" + # Update model weights if needed + self._move_model_to_vllm() + + # Apply chat template if conversational + if is_conversational({"prompt": prompts[0]}): + prompts_text = [apply_chat_template({"prompt": p}, self.processing_class)["prompt"] for p in prompts] + else: + prompts_text = prompts + + # Prepare vLLM inputs with images if available + if images is not None: + vllm_inputs = [] + for prompt, image in zip(prompts_text, images): + if image is not None: + vllm_inputs.append({"prompt": prompt, "multi_modal_data": {"image": image}}) + else: + vllm_inputs.append(prompt) + else: + vllm_inputs = prompts_text + + outputs = self.llm.generate(vllm_inputs, self.generation_config, use_tqdm=False, lora_request = self.model.load_lora('online_dpo_trainer_lora_model', load_tensors = True)) + + completion_ids = [list(output.outputs[i].token_ids) for i in range(2) for output in outputs] + prompt_ids = [list(output.prompt_token_ids) for _ in range(2) for output in outputs] + + return completion_ids, prompt_ids + + def _move_model_to_vllm(self): + """Synchronize model weights to vLLM server with support for PEFT, DeepSpeed, and FSDP""" + # For DeepSpeed ZeRO-3 and FSDP, we need to gather all parameters before operations + deepspeed_plugin = self.accelerator.state.deepspeed_plugin + zero_stage_3 = deepspeed_plugin is not None and deepspeed_plugin.zero_stage == 3 + if zero_stage_3: + import deepspeed + + gather_if_zero3 = deepspeed.zero.GatheredParameters + else: + gather_if_zero3 = nullcontext + + if is_peft_model(self.model): + # With PEFT and FSDP/DeepSpeed ZeRO Stage 3, we must gather the full model at once before merging, as + # merging adapters in a sharded manner is not supported. + # TODO: does this work with FSDP? + with gather_if_zero3(list(self.model.parameters())): + self.model.merge_adapter() + + # Update vLLM weights while parameters are gathered + if self.is_fsdp_enabled: # note if using FSDP, gather_if_zero3 is nullcontext + # Update vLLM weights while parameters are gathered + # For PEFT with FSDP we need to use the memory efficient post-order traversal + fsdp_plugin = getattr(self.accelerator.state, "fsdp_plugin", None) + fsdp_version = getattr(fsdp_plugin, "fsdp_version", 1) if fsdp_plugin else 1 + if fsdp_version == 1: + # use memory-efficient post-order traversal for FSDP + self._sync_fsdp1_params_to_vllm(self.model) + elif fsdp_version == 2: + self._sync_fsdp2_params_to_vllm(self.model) + else: + # DeepSpeed ZeRO-3 with PEFT + for name, param in self.model.named_parameters(): + # When using PEFT, we need to recover the original parameter name and discard some parameters + name = name.removeprefix("base_model.model.").replace(".base_layer", "") + if self.model.prefix in name: + continue + # When module to save, remove its prefix and discard the original module + if "original_module" in name: + continue + name = self._fix_param_name_to_vllm(name, extra_prefixes=["modules_to_save.default."]) + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + # Unmerge adapters while parameters are still gathered + self.model.unmerge_adapter() + # Parameters will automatically be repartitioned when exiting the context + else: + # For non-PEFT models, simply gather (if needed) and update each parameter individually. + if self.is_fsdp_enabled: + fsdp_plugin = getattr(self.accelerator.state, "fsdp_plugin", None) + fsdp_version = getattr(fsdp_plugin, "fsdp_version", 1) if fsdp_plugin else 1 + if fsdp_version == 1: + self._sync_fsdp1_params_to_vllm(self.model) # use memory-efficient post-order traversal for FSDP + elif fsdp_version == 2: + self._sync_fsdp2_params_to_vllm(self.model) + else: + for name, param in self.model.named_parameters(): + name = self._fix_param_name_to_vllm(name) + with gather_if_zero3([param]): + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + + # Reset cache on vLLM + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.reset_prefix_cache() + elif self.vllm_mode == "colocate": + self.llm.reset_prefix_cache() + + def _sync_fsdp1_params_to_vllm(self, module: nn.Module, prefix: str = "", visited=None): + """Memory-efficient post-order traversal of FSDP modules to extract full parameters and sync with vLLM.""" + # For FSDP1, we need to recurse into children and also use summon_full_params + if visited is None: + visited = set() + for child_name, child_module in module.named_children(): + child_prefix = f"{prefix}.{child_name}" if prefix else child_name + self._sync_fsdp1_params_to_vllm( + child_module, prefix=child_prefix, visited=visited + ) # recurse into the child + + if isinstance(module, FSDP): + with FSDP.summon_full_params(module, recurse=False, writeback=False): + for param_name, param in module.named_parameters(): + full_name = f"{prefix}.{param_name}" if prefix else param_name + full_name = self._fix_param_name_to_vllm(full_name, extra_prefixes=["_fsdp_wrapped_module."]) + + if full_name in visited: + continue # skip FSDP subtrees already traversed + visited.add(full_name) + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(full_name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + + def _sync_fsdp2_params_to_vllm(self, module: nn.Module): + # For FSDP2, module already covers all parameters, so no need for recursion + for name, param in module.items(): + if param.is_cpu: + param = param.to(torch.device("cuda")) + param = param.full_tensor() + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param) + elif self.vllm_mode == "colocate": + + pass + + pass + + def _fix_param_name_to_vllm(self, name, extra_prefixes: Optional[list[str]] = None): + """Clean parameter names for vLLM compatibility""" + extra_prefixes = extra_prefixes or [] + prefixes = ["_checkpoint_wrapped_module."] + extra_prefixes + for prefix in prefixes: + name = name.replace(prefix, "") + return name + + def process_vision_row( + self, features: dict[str, Union[list, torch.Tensor]], processing_class=None + ) -> dict[str, list[int]]: + """ + Process a vision row for VLM models (adapted from DPO trainer) + """ + processor = processing_class or self.processing_class + processed_features = processor(images=[features["image"]], text=features["prompt"], add_special_tokens=False) + + prompt_input_ids = processed_features["input_ids"][0] + + # Create the output dict with required fields + output = { + "prompt_input_ids": prompt_input_ids, + "prompt_attention_mask": processed_features["attention_mask"][0], + } + + # Add vision-specific fields + if "pixel_values" in processed_features: + output["pixel_values"] = processed_features["pixel_values"][0] + if "pixel_attention_mask" in processed_features: + output["pixel_attention_mask"] = processed_features["pixel_attention_mask"][0] + if "image_sizes" in processed_features: + output["image_sizes"] = processed_features["image_sizes"][0] + + return output + + def _generate(self, model, prompts, images=None): + """Generate completions using the model""" + device = next(model.parameters()).device + eos_token_id = self.eos_token_id + pad_token_id = self.pad_token_id + + # Apply chat template and tokenize the input + inputs = [{"prompt": prompt} for prompt in prompts] + + # Add images if provided (VLM support) + if images is not None: + for i, image in enumerate(images): + inputs[i]["image"] = image + + # Apply chat template to get text prompts + prompts_text = [maybe_apply_chat_template(x, self.processing_class)["prompt"] for x in inputs] + + # Handle image token collapsing/removal + # The chat template sometimes inserts a single image token into the prompt text. However, when this text is + # later tokenized, the single image token string is expanded into multiple image token IDs, depending on the + # image size. We need to handle this properly. + if self.image_token is not None and images is not None: + escaped_img_token = re.escape(self.image_token) + # Search for the image token in the chat template + if hasattr(self.processing_class, "chat_template") and self.processing_class.chat_template: + if re.search(escaped_img_token, self.processing_class.chat_template): + # Collapse repeated image tokens back into a single token + prompts_text = [ + re.sub(rf"({escaped_img_token})+", self.image_token, text) for text in prompts_text + ] + else: + # If the chat template doesn't use the image token, remove all instances + if self.vision_end_token_id is not None: + escaped_eoi_token = re.escape( + self.processing_class.tokenizer.decode([self.vision_end_token_id]) + ) + prompts_text = [ + re.sub(rf"({escaped_img_token})+{escaped_eoi_token}", "", text) for text in prompts_text + ] + else: + # If vision_end_token_id is None, just remove the image tokens + prompts_text = [re.sub(rf"({escaped_img_token})+", "", text) for text in prompts_text] + + # Prepare kwargs for processing class + kwargs = {} + if images is not None: + kwargs = {"images": [[img] for img in images]} + + # Process inputs using the processing class (handles both VLM and LLM) + prompt_inputs = self.processing_class( + text=prompts_text, + return_tensors="pt", + padding=True, + padding_side="left", + add_special_tokens=False, + **kwargs, + ) + + prompt_inputs = {k: v.to(device) for k, v in prompt_inputs.items()} + # Convert vision inputs to model's dtype for proper computation + if "pixel_values" in prompt_inputs: + # Handle DataParallel wrapped models + model_dtype = getattr(model, "dtype", None) + if model_dtype is None and hasattr(model, "module"): + model_dtype = model.module.dtype + if model_dtype is not None: + prompt_inputs["pixel_values"] = prompt_inputs["pixel_values"].to(model_dtype) + + # Sample 2 completions per prompt of size `max_new_tokens` from the model + prompt_ids = prompt_inputs["input_ids"].repeat(2, 1) + prompt_mask = prompt_inputs["attention_mask"].repeat(2, 1) + + # Prepare vision inputs if available + vision_generation_kwargs = {} + if self.is_vision_model and images is not None: + if "pixel_values" in prompt_inputs: + vision_generation_kwargs["pixel_values"] = prompt_inputs["pixel_values"].repeat(2, 1, 1, 1) + if "pixel_attention_mask" in prompt_inputs: + vision_generation_kwargs["pixel_attention_mask"] = prompt_inputs["pixel_attention_mask"].repeat(2, 1) + if "image_sizes" in prompt_inputs: + vision_generation_kwargs["image_sizes"] = prompt_inputs["image_sizes"].repeat(2, 1) + if "image_grid_thw" in prompt_inputs: + vision_generation_kwargs["image_grid_thw"] = prompt_inputs["image_grid_thw"].repeat(2, 1) + + if self.use_transformers_paged: + previous_attn = self.model_wrapped.config._attn_implementation + + if is_flash_attn_2_available(): + self.model_wrapped.config._attn_implementation = "paged_attention" + else: + self.model_wrapped.config._attn_implementation = "sdpa_paged" + with ( + profiling_context(self, "transformers.generate_batch"), + unwrap_model_for_generation( + model, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model, + torch.no_grad(), + FSDP.summon_full_params(self.model_wrapped, recurse=False) if self.is_fsdp_enabled else nullcontext(), + ): + # Cast to the appropriate dtype based on training configuration + if self.args.bf16: + unwrapped_model.to(torch.bfloat16) + elif self.args.fp16: + unwrapped_model.to(torch.float16) + with torch.inference_mode(): + all_outputs = unwrapped_model.generate_batch( + prompt_ids.tolist(), + generation_config=self.generation_config, + progress_bar=False, + ) + completion_ids = [output.generated_tokens for output in all_outputs.values()] + completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids] + completion_ids = pad(completion_ids, padding_value=self.pad_token_id, padding_side="right") + prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1) + # Restore the original attention implementation, training mode + self.model_wrapped.config._attn_implementation = previous_attn + + # Extract completion_ids and create completion_mask + prompt_length = prompt_ids.size(1) + completion_ids = prompt_completion_ids[:, prompt_length:] + completion_ids, completion_mask = truncate_right(completion_ids, eos_token_id, pad_token_id) + + return prompt_ids, prompt_mask, completion_ids, completion_mask + else: + # Regular generation path + with ( + profiling_context(self, "transformers.generate"), + unwrap_model_for_generation( + model, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model, + torch.no_grad(), + FSDP.summon_full_params(self.model_wrapped, recurse=False) if self.is_fsdp_enabled else nullcontext(), + ): + # Setup cache implementation if specified + if self.args.cache_implementation is not None: + unwrapped_model.generation_config.cache_implementation = self.args.cache_implementation + + # Standard generation + output = unwrapped_model.generate( + input_ids=prompt_ids, + attention_mask=prompt_mask, + generation_config=self.generation_config, + **vision_generation_kwargs, + ) + + completion_ids = output[:, prompt_ids.size(1) :] + completion_ids, completion_mask = truncate_right(completion_ids, eos_token_id, pad_token_id) + + return prompt_ids, prompt_mask, completion_ids, completion_mask + + def _calculate_rewards_from_functions(self, prompts, completions, completion_ids_list, **reward_kwargs): + """ + Calculate rewards using reward functions + """ + device = self.accelerator.device + rewards_per_func = torch.zeros(len(prompts), len(self.reward_funcs), device=device) + + # Add trainer state to reward kwargs for dynamic reward shaping + reward_kwargs["trainer_state"] = self.state + + for i, (reward_func, reward_processing_class) in enumerate( + zip(self.reward_funcs, self.reward_processing_classes) + ): + if isinstance(reward_func, nn.Module): # Model-based reward function + # Handle conversational vs text input + if is_conversational({"prompt": prompts[0]}): + messages = [{"messages": p + c} for p, c in zip(prompts, completions)] + texts = [apply_chat_template(x, reward_processing_class)["text"] for x in messages] + else: + texts = [p + c for p, c in zip(prompts, completions)] + + # Tokenize and get reward scores + reward_inputs = reward_processing_class( + text=texts, return_tensors="pt", padding=True, padding_side="right", add_special_tokens=False + ) + reward_inputs = {k: v.to(device) for k, v in reward_inputs.items()} + + with torch.inference_mode(): + rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0] # Shape (B*G,) + else: + # Custom reward function + output_reward_func = reward_func( + prompts=prompts, completions=completions, completion_ids=completion_ids_list, **reward_kwargs + ) + # Convert None values to NaN + output_reward_func = [reward if reward is not None else torch.nan for reward in output_reward_func] + rewards_per_func[:, i] = torch.tensor(output_reward_func, dtype=torch.float32, device=device) + + # Weight and sum across all reward functions + if self.reward_weights is not None: + total_rewards = (rewards_per_func * self.reward_weights.to(device).unsqueeze(0)).nansum(dim=1) + else: + total_rewards = rewards_per_func.nansum(dim=1) + + return total_rewards + + def _forward(self, model, prompt_ids, prompt_mask, completion_ids, completion_mask, vision_inputs=None): + # Get the number of tokens to truncate from prompt + num_tokens_to_truncate = max(prompt_ids.size(1) + completion_ids.size(1) - self.max_length, 0) + + # Truncate left to avoid oom + prompt_ids = prompt_ids[:, num_tokens_to_truncate:] + prompt_mask = prompt_mask[:, num_tokens_to_truncate:] + + # Concat the prompt and completion + prompt_completion_ids = torch.cat((prompt_ids, completion_ids), dim=1) + prompt_completion_mask = torch.cat((prompt_mask, completion_mask), dim=1) + + # Prepare model kwargs with vision inputs if available + model_kwargs = {"attention_mask": prompt_completion_mask} + if vision_inputs is not None: + if "pixel_values" in vision_inputs: + model_kwargs["pixel_values"] = vision_inputs["pixel_values"] + if "pixel_attention_mask" in vision_inputs: + model_kwargs["pixel_attention_mask"] = vision_inputs["pixel_attention_mask"] + if "image_sizes" in vision_inputs: + model_kwargs["image_sizes"] = vision_inputs["image_sizes"] + if "image_grid_thw" in vision_inputs: + model_kwargs["image_grid_thw"] = vision_inputs["image_grid_thw"] + + # Get the logprobs of the completions from the model + output = model(prompt_completion_ids, **model_kwargs) + + # There is 1 offset, because the model predict the next token + prompt_len = prompt_ids.size(1) + start_idx = prompt_len - 1 if prompt_len > 0 else 0 + logits = output.logits[:, start_idx:-1] + + # Take the completion tokens logprob + logprobs = torch.take_along_dim(logits.log_softmax(dim=-1), completion_ids.unsqueeze(-1), dim=2).squeeze(-1) + return logprobs + + def training_step( + self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], num_items_in_batch: Optional[int] = None + ) -> torch.Tensor: + model.train() + + prompts = inputs["prompt"] + batch_size = len(prompts) + + # Handle images for VLM support + has_images = "image" in inputs + images = None + if has_images: + images = inputs["image"] + # Convert conversational prompts to include image tokens + for prompt in prompts: + if isinstance(prompt, list): + for message in prompt: + if not isinstance(message, dict): + continue + content = message.get("content") + role = message.get("role") + if isinstance(content, str): + if role == "user": + message["content"] = [{"type": "image"}, {"type": "text", "text": content}] + elif role == "system": + message["content"] = [{"type": "text", "text": content}] + + if self.args.use_vllm: + prompt_ids, prompt_mask, completion_ids, completion_mask = self._generate_vllm(prompts, images) + else: + prompt_ids, prompt_mask, completion_ids, completion_mask = self._generate(model, prompts, images) + + contain_eos_token = torch.any(completion_ids == self.eos_token_id, dim=-1) + + # Extract vision inputs if available for VLM support + vision_inputs = None + if has_images and self.is_vision_model and not self.args.use_vllm: + # For vision models with transformers generation, we need to prepare vision inputs + # Process the images to get vision inputs that can be passed through the forward pass + vision_inputs = {} + kwargs = {"images": [[img] for img in images]} + processed = self.processing_class( + text=[""] * len(images), # Dummy text for vision processing + return_tensors="pt", + **kwargs, + ) + # Handle DataParallel wrapped models + model_device = getattr(model, "device", None) + model_dtype = getattr(model, "dtype", None) + if model_device is None and hasattr(model, "module"): + model_device = model.module.device + model_dtype = model.module.dtype + # Move vision tensors to device and convert to model dtype + # Need to duplicate for 2 completions per prompt + if "pixel_values" in processed: + vision_inputs["pixel_values"] = ( + processed["pixel_values"].to(model_device, dtype=model_dtype).repeat(2, 1, 1, 1) + ) + if "pixel_attention_mask" in processed: + vision_inputs["pixel_attention_mask"] = processed["pixel_attention_mask"].to(model_device).repeat(2, 1) + if "image_sizes" in processed: + vision_inputs["image_sizes"] = processed["image_sizes"].to(model_device).repeat(2, 1) + if "image_grid_thw" in processed: + vision_inputs["image_grid_thw"] = processed["image_grid_thw"].to(model_device).repeat(2, 1) + + logprobs = self._forward(model, prompt_ids, prompt_mask, completion_ids, completion_mask, vision_inputs) + with torch.no_grad(): + if self.ref_model is not None: + ref_logprobs = self._forward( + self.ref_model, prompt_ids, prompt_mask, completion_ids, completion_mask, vision_inputs + ) + else: # peft case: we just need to disable the adapter + with self.model.disable_adapter(): + ref_logprobs = self._forward( + self.model, prompt_ids, prompt_mask, completion_ids, completion_mask, vision_inputs + ) + + # Decode the completions, and format them if the input is conversational + device = logprobs.device + completions = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True) + if is_conversational({"prompt": prompts[0]}): + completions = [[{"role": "assistant", "content": completion}] for completion in completions] + + # Get the reward from reward functions, judge, or deprecated reward_model + if self.reward_funcs is not None: + # First create completion_ids_list for custom reward functions + completion_ids_list = [completion_ids[i].tolist() for i in range(completion_ids.shape[0])] + + # Extract additional fields from inputs for reward functions + reward_kwargs = {} + keys = [key for key in inputs if key not in ["prompt"]] + for key in keys: + if isinstance(inputs[key], (list, tuple)): + # Repeat input fields to match number of completions (2 per prompt) + reward_kwargs[key] = inputs[key] * 2 + else: + reward_kwargs[key] = inputs[key] + + # Calculate rewards using reward functions + rewards = self._calculate_rewards_from_functions( + prompts=2 * prompts, completions=completions, completion_ids_list=completion_ids_list, **reward_kwargs + ) + + # Apply missing EOS penalty if configured + if self.args.missing_eos_penalty is not None: + rewards[~contain_eos_token] -= self.args.missing_eos_penalty + + # Split rewards into chosen/rejected pairs + first_half, second_half = rewards.split(batch_size) + mask = first_half >= second_half + elif self.judge is not None: + # Once formatted, conversational data may contain special tokens (such as <|im_start|>) that are not + # directly understandable by the judge and could alter its judgment. To avoid this and make the judge + # independent of the model's chat template, we use the raw conversation data, and apply our own chat + # template to it. + if is_conversational({"prompt": prompts[0]}): + environment = jinja2.Environment() + template = environment.from_string(SIMPLE_CHAT_TEMPLATE) + prompts = [template.render(messages=prompt) for prompt in prompts] + completions = [template.render(messages=completion) for completion in completions] + + ranks_of_first_completion = self.judge.judge( + prompts, list(zip(completions[:batch_size], completions[batch_size:])) + ) + + # convert ranks to a True/False mask: + # when rank == 0, it means the first completion is the best + # when rank == 1, it means the second completion is the best + mask = torch.tensor([rank == 0 for rank in ranks_of_first_completion], device=device) + + batch_range = torch.arange(batch_size, device=device) + chosen_indices = batch_range + (~mask * batch_size) + rejected_indices = batch_range + (mask * batch_size) + + # Build tensor so that the first half is the chosen examples and the second half the rejected examples + cr_indices = torch.cat((chosen_indices, rejected_indices), dim=0) # cr = chosen and rejected + cr_logprobs = logprobs[cr_indices] + cr_ref_logprobs = ref_logprobs[cr_indices] + + # mask out the padding tokens + padding_mask = ~completion_mask.bool() + cr_padding_mask = padding_mask[cr_indices] + + cr_logprobs_sum = (cr_logprobs * ~cr_padding_mask).sum(1) + cr_ref_logprobs_sum = (cr_ref_logprobs * ~cr_padding_mask).sum(1) + + # Split the chosen and rejected examples + chosen_logprobs_sum, rejected_logprobs_sum = torch.split(cr_logprobs_sum, batch_size) + chosen_ref_logprobs_sum, rejected_ref_logprobs_sum = torch.split(cr_ref_logprobs_sum, batch_size) + pi_logratios = chosen_logprobs_sum - rejected_logprobs_sum + ref_logratios = chosen_ref_logprobs_sum - rejected_ref_logprobs_sum + + logits = pi_logratios - ref_logratios + + if self.args.loss_type == "sigmoid": + losses = -F.logsigmoid(self.beta * logits) + elif self.args.loss_type == "ipo": + losses = (logits - 1 / (2 * self.beta)) ** 2 + else: + raise NotImplementedError(f"invalid loss type {self.loss_type}") + + loss = losses.mean() + + # Log everything + if self.reward_funcs is not None: + # When using reward_funcs, we have rewards instead of scores + scores_margin = rewards[chosen_indices] - rewards[rejected_indices] + self.stats["objective/scores_margin"].append( + self.accelerator.gather_for_metrics(scores_margin.mean()).mean().item() + ) + self.stats["objective/scores"].append(self.accelerator.gather_for_metrics(rewards.mean()).mean().item()) + self.stats["val/contain_eos_token"].append(contain_eos_token.float().mean().item()) + self.stats["logps/chosen"].append(self.accelerator.gather_for_metrics(chosen_logprobs_sum).mean().item()) + self.stats["logps/rejected"].append(self.accelerator.gather_for_metrics(rejected_logprobs_sum).mean().item()) + + kl = logprobs - ref_logprobs + mean_kl = kl.sum(1).mean() + self.stats["objective/kl"].append(self.accelerator.gather_for_metrics(mean_kl).mean().item()) + non_score_reward = (-self.beta * kl).sum(1) + mean_non_score_reward = non_score_reward.mean() + self.stats["objective/non_score_reward"].append( + self.accelerator.gather_for_metrics(mean_non_score_reward).mean().item() + ) + if self.reward_funcs is not None: + # Calculate RLHF reward by combining rewards with non_score_reward + rlhf_reward = rewards + non_score_reward + self.stats["objective/rlhf_reward"].append(self.accelerator.gather_for_metrics(rlhf_reward).mean().item()) + + mean_entropy = -logprobs.sum(1).mean() + self.stats["objective/entropy"].append(self.accelerator.gather_for_metrics(mean_entropy).mean().item()) + chosen_rewards = self.beta * (chosen_logprobs_sum - chosen_ref_logprobs_sum) + gathered_chosen_rewards = self.accelerator.gather_for_metrics(chosen_rewards) + self.stats["rewards/chosen"].append(gathered_chosen_rewards.mean().item()) + rejected_rewards = self.beta * (rejected_logprobs_sum - rejected_ref_logprobs_sum) + gathered_rejected_rewards = self.accelerator.gather_for_metrics(rejected_rewards) + self.stats["rewards/rejected"].append(gathered_rejected_rewards.mean().item()) + margin = gathered_chosen_rewards - gathered_rejected_rewards + self.stats["rewards/margins"].append(margin.mean().item()) + accuracy = margin > 0 + self.stats["rewards/accuracies"].append(accuracy.float().mean().item()) + self.stats["beta"].append(self.beta) + + if ( + self.args.torch_empty_cache_steps is not None + and self.state.global_step % self.args.torch_empty_cache_steps == 0 + ): + empty_cache() + + kwargs = {} + + # For LOMO optimizers you need to explicitly use the learning rate + if self.args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: + kwargs["learning_rate"] = self._get_learning_rate() + + if self.args.n_gpu > 1: + loss = loss.mean() # mean() to average on multi-gpu parallel training + + if self.use_apex: + with amp.scale_loss(loss, self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + self.accelerator.backward(loss, **kwargs) + + return loss.detach() / self.args.gradient_accumulation_steps + + # Same as Trainer._maybe_log_save_evaluate but log our metrics + def _maybe_log_save_evaluate( + self, tr_loss, grad_norm, model, trial, epoch, ignore_keys_for_eval, start_time, learning_rate=None + ): + if self.control.should_log and self.state.global_step > self._globalstep_last_logged: + logs: dict[str, float] = {} + + # all_gather + mean() to get average loss over all processes + tr_loss_scalar = self._nested_gather(tr_loss).mean().item() + + # reset tr_loss to zero + tr_loss -= tr_loss + + logs["loss"] = round(tr_loss_scalar / (self.state.global_step - self._globalstep_last_logged), 4) + if grad_norm is not None: + logs["grad_norm"] = grad_norm.detach().item() if isinstance(grad_norm, torch.Tensor) else grad_norm + if learning_rate is not None: + logs["learning_rate"] = learning_rate + else: + logs["learning_rate"] = self._get_learning_rate() + + # Add our metrics + for key, val in self.stats.items(): + logs[key] = sum(val) / len(val) + self.stats = {key: [] for key in self.stats} # reset stats + + self._total_loss_scalar += tr_loss_scalar + self._globalstep_last_logged = self.state.global_step + self.store_flos() + self.log(logs, start_time) + + metrics = None + if self.control.should_evaluate: + metrics = self._evaluate(trial, ignore_keys_for_eval) + is_new_best_metric = self._determine_best_metric(metrics=metrics, trial=trial) + + if self.args.save_strategy == "best": + self.control.should_save = is_new_best_metric + + if self.control.should_save: + self._save_checkpoint(model, trial) + self.control = self.callback_handler.on_save(self.args, self.state, self.control) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{guo2024direct, + title = {{Direct Language Model Alignment from Online AI Feedback}}, + author = {Shangmin Guo and Biao Zhang and Tianlin Liu and Tianqi Liu and Misha Khalman and Felipe Llinares and Alexandre Ram{\'{e}} and Thomas Mesnard and Yao Zhao and Bilal Piot and Johan Ferret and Mathieu Blondel}, + year = 2024, + eprint = {arXiv:2402.04792} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="Online DPO", + trainer_citation=citation, + paper_title="Direct Language Model Alignment from Online AI Feedback", + paper_id="2402.04792", + ) + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothOnlineDPOTrainer(_UnslothOnlineDPOTrainer): + """ + +Initialize OnlineDPOTrainer. + +Args: + model (`Union[str, nn.Module, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + ref_model (`transformers.PreTrainedModel` or `torch.nn.Module` or `None`): + The reference model to use for training. If None is specified, the reference model will be created from the + model. + judge (`BasePairwiseJudge`): + The judge to use for pairwise comparison of model completions. + reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*, defaults to `None`): + Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward + functions with the prompts and completions and sum the rewards. Can be either: + + - A single reward function: Can be a string (path to model), a [`~transformers.PreTrainedModel`], or a + custom callable function. + - A list of reward functions: Must all be of compatible types. + + Note: Only one of `judge`, or `reward_funcs` should be provided. + args (`OnlineDPOConfig`): + The online DPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + The dataset to use for training. + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`): + Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either: + + - A single processing class: Used when `reward_funcs` contains only one reward function. + - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`. + + If set to `None`, the tokenizer for each model-based reward function is automatically loaded using + [`~transformers.AutoTokenizer.from_pretrained`]. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + +.. deprecated:: 0.22.0 + The following parameters are deprecated and will be removed in a future version: + + * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`. + * `reward_processing_class`: Use `reward_processing_classes` instead. For example, change + `reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`. + + """ + def __init__( + self, + model, + ref_model = None, + reward_funcs = None, + judge = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + reward_processing_classes = None, + peft_config = None, + compute_metrics = None, + callbacks = None, + preprocess_logits_for_metrics = None, + reward_model = None, + reward_processing_class = None, + **kwargs + ): + if args is None: args = UnslothOnlineDPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('online_dpo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + ref_model = ref_model, + reward_funcs = reward_funcs, + judge = judge, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + reward_processing_classes = reward_processing_classes, + peft_config = peft_config, + compute_metrics = compute_metrics, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + reward_model = reward_model, + reward_processing_class = reward_processing_class,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothPPOTrainer.py b/unsloth_compiled_cache/UnslothPPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..4daa950e44cb32d079f157a8d4ef12eb66250a3d --- /dev/null +++ b/unsloth_compiled_cache/UnslothPPOTrainer.py @@ -0,0 +1,1566 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.ppo_trainer import (Accelerator, BaseImageProcessor, CallbackHandler, DEFAULT_CALLBACKS, DEFAULT_PROGRESS_CALLBACK, DataCollatorWithPadding, DataLoader, Dataset, ExportableState, FeatureExtractionMixin, GenerationConfig, INVALID_LOGPROB, OnlineTrainerState, Optional, PPOConfig, PPOTrainer, Path, PeftConfig, PeftModel, PolicyAndValueWrapper, PreTrainedTokenizerBase, PrinterCallback, ProcessorMixin, Trainer, TrainerCallback, TrainerControl, Union, batch_generation, broadcast, contextmanager, create_reference_model, defaultdict, disable_dropout_in_model, empty_cache, exact_div, first_true_indices, forward, gather_object, gc, generate_model_card, get_comet_experiment_url, get_peft_model, get_reporting_integration_callbacks, get_reward, is_peft_available, is_rich_available, is_wandb_available, log_table_to_comet_experiment, masked_mean, masked_whiten, math, nn, np, nullcontext, os, pd, peft_module_casting_to_bf16, prepare_deepspeed, print_rich_table, selective_log_softmax, textwrap, time, torch, truncate_response, unwrap_model_for_generation, Optional, PeftModel, Trainer, is_peft_available, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothPPOConfig(PPOConfig): + """ + +Configuration class for the [`PPOTrainer`]. + +This class includes only the parameters that are specific to PPO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] and [`OnPolicyConfig`] documentation. Note that default +values in this class may differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + exp_name (`str`, *optional*, defaults to `os.path.basename(__file__)[:-3]`): + Name of this experiment. + reward_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`): + Path to the reward model. + model_adapter_name (`str` or `None`, *optional*, defaults to `None`): + Name of the train target PEFT adapter, when using LoRA with multiple adapters. + ref_adapter_name (`str` or `None`, *optional*, defaults to `None`): + Name of the reference PEFT adapter, when using LoRA with multiple adapters. + num_ppo_epochs (`int`, *optional*, defaults to `4`): + Number of epochs to train. + whiten_rewards (`bool`, *optional*, defaults to `False`): + Whether to whiten the rewards. + kl_coef (`float`, *optional*, defaults to `0.05`): + KL coefficient. + kl_estimator (`Literal["k1", "k3"]`, *optional*, defaults to `"k1"`): + Which estimator for KL-Divergence to use from [Approximating KL + Divergence](http://joschu.net/blog/kl-approx.html). Defaults to "k1", a straightforward, unbiased + estimator. Can be set to "k3", an unbiased estimator with lower variance which "appears to be a strictly + better estimator". Cannot be set to "k2", as it is used for logging purposes. + cliprange (`float`, *optional*, defaults to `0.2`): + Clip range. + vf_coef (`float`, *optional*, defaults to `0.1`): + Value function coefficient. + cliprange_value (`float`, *optional*, defaults to `0.2`): + Clip range for the value function. + gamma (`float`, *optional*, defaults to `1.0`): + Discount factor. + lam (`float`, *optional*, defaults to `0.95`): + Lambda value for GAE. + ds3_gather_for_generation (`bool`, *optional*, defaults to `True`): + This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for generation, + improving generation speed. However, disabling this option allows training models that exceed the VRAM + capacity of a single GPU, albeit at the cost of slower generation. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + dataset_num_proc = None, + num_mini_batches = 1, + total_episodes = None, + local_rollout_forward_batch_size = 64, + num_sample_generations = 10, + response_length = 53, + stop_token = None, + stop_token_id = None, + temperature = 0.7, + missing_eos_penalty = None, + sft_model_path = 'EleutherAI/pythia-160m', + world_size = None, + num_total_batches = None, + micro_batch_size = None, + local_batch_size = None, + batch_size = None, + local_mini_batch_size = None, + mini_batch_size = None, + exp_name = 'ppo_config', + reward_model_path = 'EleutherAI/pythia-160m', + model_adapter_name = None, + ref_adapter_name = None, + num_ppo_epochs = 4, + whiten_rewards = False, + kl_coef = 0.05, + kl_estimator = 'k1', + cliprange = 0.2, + vf_coef = 0.1, + cliprange_value = 0.2, + gamma = 1.0, + lam = 0.95, + ds3_gather_for_generation = True, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + dataset_num_proc = dataset_num_proc, + num_mini_batches = num_mini_batches, + total_episodes = total_episodes, + local_rollout_forward_batch_size = local_rollout_forward_batch_size, + num_sample_generations = num_sample_generations, + response_length = response_length, + stop_token = stop_token, + stop_token_id = stop_token_id, + temperature = temperature, + missing_eos_penalty = missing_eos_penalty, + sft_model_path = sft_model_path, + world_size = world_size, + num_total_batches = num_total_batches, + micro_batch_size = micro_batch_size, + local_batch_size = local_batch_size, + batch_size = batch_size, + local_mini_batch_size = local_mini_batch_size, + mini_batch_size = mini_batch_size, + exp_name = exp_name, + reward_model_path = reward_model_path, + model_adapter_name = model_adapter_name, + ref_adapter_name = ref_adapter_name, + num_ppo_epochs = num_ppo_epochs, + whiten_rewards = whiten_rewards, + kl_coef = kl_coef, + kl_estimator = kl_estimator, + cliprange = cliprange, + vf_coef = vf_coef, + cliprange_value = cliprange_value, + gamma = gamma, + lam = lam, + ds3_gather_for_generation = ds3_gather_for_generation,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + +pass + +class _UnslothPPOTrainer(Trainer): + """Trainer for Proximal Policy Optimization (PPO). + + For details on PPO, see the paper: [Proximal Policy Optimization + Algorithms](https://huggingface.co/papers/1707.06347). + + Args: + args ([`PPOConfig`]): + Training arguments. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`]): + Class to process the data. + model (`torch.nn.Module`): + Model to be trained. This is the policy model. + ref_model (`torch.nn.Module`, *optional*): + Reference model used to compute the KL divergence. If `None`, a copy of the policy model is created. + reward_model (`torch.nn.Module`): + Reward model used to compute the rewards. + train_dataset ([`~datasets.Dataset`]): + Dataset for training. + value_model (`torch.nn.Module`): + Value model used to predict the value of a state. + data_collator ([`~transformers.DataCollatorWithPadding`], *optional*): + Data collator to batch and pad samples from the dataset. If `None`, a default data collator is created + using the `processing_class`. + eval_dataset ([`~datasets.Dataset`] or `dict` of [`~datasets.Dataset`], *optional*): + Dataset for evaluation. + optimizers (`tuple` of `torch.optim.Optimizer` and `torch.optim.lr_scheduler.LambdaLR`, *optional*, defaults to `(None, None)`): + Tuple containing the optimizer and the learning rate scheduler to use for training. If `None`, the + optimizer and the learning rate scheduler are created using the + [`~transformers.Trainer.create_optimizer_and_scheduler`] method. + callbacks (`list` of [`~transformers.TrainerCallback`], *optional*): + Callbacks to use during training. + peft_config ([`~peft.config.PeftConfig`], *optional*): + PEFT configuration to use PEFT for training. If `None`, PEFT is not used. If provided, the policy `model` + will be wrapped with the specified PEFT adapter. + """ + + _tag_names = ["trl", "ppo"] + + def __init__( + self, + args: PPOConfig, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ], + model: nn.Module, + ref_model: Optional[nn.Module], + reward_model: nn.Module, + train_dataset: Dataset, + value_model: nn.Module, + data_collator: Optional[DataCollatorWithPadding] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + # less commonly used + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + callbacks: Optional[list[TrainerCallback]] = None, + peft_config: Optional["PeftConfig"] = None, + ) -> None: + if ref_model is model: + raise ValueError( + "`model` and `ref_model` cannot be the same object. If you want `ref_model` to be the " + "same as `model`, you must make a copy of it, or `None` if you use peft." + ) + + self.args = args + self.processing_class = processing_class + self.policy_model = model + + # Define the collator if not provided + if data_collator is None: + data_collator = DataCollatorWithPadding(self.processing_class) + + # Handle stop token settings: update policy model's generation_config to use provided stop token + if args.stop_token and args.stop_token_id: + raise ValueError("You cannot set both `stop_token` and `stop_token_id`.") + elif args.stop_token: + if args.stop_token == "eos": + self.policy_model.generation_config.eos_token_id = self.stop_token_id = processing_class.eos_token_id + else: + raise ValueError( + f"Unknown `stop_token` {args.stop_token}. Allowed values are: `'eos'` and `None` (no stop token)." + ) + else: + self.policy_model.generation_config.eos_token_id = self.stop_token_id = args.stop_token_id # None or int + + # Check that the kl estimator is valid + if self.args.kl_estimator not in {"k1", "k3"}: + raise ValueError( + "kl_estimator must be either 'k1' (straightforward, unbiased) or 'k3' (lower variance, unbiased, " + "appears to be a strictly better estimator). See " + "[Approximating KL Divergence](http://joschu.net/blog/kl-approx.html) for details." + ) + + # peft support + if not is_peft_available() and peft_config is not None: + raise ImportError( + "PEFT is not installed and you passed a `peft_config` in the trainer's kwargs, please install it to use the PEFT models" + ) + elif is_peft_available() and peft_config is not None: + # if model is a peft model and we have a peft_confg, we merge and unload it first + if isinstance(self.policy_model, PeftModel): + self.policy_model = self.policy_model.merge_and_unload() + + # get peft model with the given config + self.policy_model = get_peft_model(self.policy_model, peft_config) + if args.bf16 and getattr(self.policy_model, "is_loaded_in_4bit", False): + peft_module_casting_to_bf16(self.policy_model) + + self.is_peft_model = is_peft_available() and isinstance(self.policy_model, PeftModel) + self.model_adapter_name = args.model_adapter_name + self.ref_adapter_name = args.ref_adapter_name + + if ref_model: + self.ref_model = ref_model + elif self.is_peft_model: + self.ref_model = None + else: + self.ref_model = create_reference_model(self.policy_model) + + self.reward_model = reward_model + self.train_dataset = train_dataset + self.train_dataset_len = len(train_dataset) + self.value_model = value_model + self.data_collator = data_collator + self.eval_dataset = eval_dataset + self.optimizer, self.lr_scheduler = optimizers + self.optimizer_cls_and_kwargs = None # needed for transformers >= 4.47 + + ######### + # calculate various batch sizes + ######### + if args.total_episodes is None: # allow the users to define episodes in terms of epochs. + args.total_episodes = int(args.num_train_epochs * self.train_dataset_len) + accelerator = Accelerator(gradient_accumulation_steps=args.gradient_accumulation_steps) + self.accelerator = accelerator + args.world_size = accelerator.num_processes + args.local_batch_size = args.per_device_train_batch_size * args.gradient_accumulation_steps + args.micro_batch_size = int(args.per_device_train_batch_size * args.world_size) + args.batch_size = int(args.local_batch_size * args.world_size) + args.mini_batch_size = exact_div( + args.batch_size, args.num_mini_batches, "`batch_size` must be a multiple of `num_mini_batches`" + ) + args.local_mini_batch_size = exact_div( + args.local_batch_size, args.num_mini_batches, "`local_batch_size` must be a multiple of `num_mini_batches`" + ) + if args.whiten_rewards: + assert args.local_mini_batch_size >= 8, ( + f"Per-rank minibatch size {args.local_mini_batch_size} is insufficient for whitening" + ) + # `per_rank_rollout_batch_size` is our `args.local_batch_size` + # `per_rank_minibatch_size` is our `args.local_mini_batch_size` + args.num_total_batches = math.ceil( + args.total_episodes / args.batch_size + ) # we may train for more than `total_episodes` + time_tensor = torch.tensor(int(time.time()), device=accelerator.device) + time_int = broadcast(time_tensor, 0).item() # avoid different timestamps across processes + args.run_name = f"{args.exp_name}__{args.seed}__{time_int}" + self.local_seed = args.seed + accelerator.process_index * 100003 # Prime + if args.num_sample_generations > 0: + self.sample_generations_freq = max(1, args.num_total_batches // args.num_sample_generations) + self.local_dataloader_batch_size = args.local_batch_size + + ######### + # setup model, optimizer, and others + ######### + for module in [self.policy_model, self.ref_model, self.value_model, self.reward_model]: + if module is not None: + disable_dropout_in_model(module) + self.model = PolicyAndValueWrapper(self.policy_model, self.value_model) + self.model.config = self.policy_model.config # needed for pushing to hub + self.create_optimizer_and_scheduler( + num_training_steps=args.num_total_batches + ) # note that we are calling `self.lr_scheduler.step[]` manually only at the batch level + + ######### + ### trainer specifics + ######### + default_callbacks = DEFAULT_CALLBACKS + get_reporting_integration_callbacks(self.args.report_to) + self.callbacks = default_callbacks if callbacks is None else default_callbacks + callbacks + self.callback_handler = CallbackHandler( + self.callbacks, self.model, self.processing_class, self.optimizer, self.lr_scheduler + ) + self.add_callback(PrinterCallback if self.args.disable_tqdm else DEFAULT_PROGRESS_CALLBACK) + self.control = TrainerControl() + self.state = OnlineTrainerState( + is_local_process_zero=self.is_local_process_zero(), + is_world_process_zero=self.is_world_process_zero(), + stateful_callbacks=[ + cb for cb in self.callback_handler.callbacks + [self.control] if isinstance(cb, ExportableState) + ], + ) + self.current_flos = 0 + self.hp_search_backend = None + self.is_deepspeed_enabled = getattr(self.accelerator.state, "deepspeed_plugin", None) is not None + self.is_fsdp_enabled = getattr(self.accelerator.state, "fsdp_plugin", None) is not None + # Create distant repo and output directory if needed + self.hub_model_id = None + if self.args.push_to_hub: + self.init_hf_repo() + if self.args.should_save: + os.makedirs(self.args.output_dir, exist_ok=True) + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + ######### + ### setup dataloader + ######### + self.dataloader = DataLoader( + self.train_dataset, + batch_size=self.local_dataloader_batch_size, + shuffle=True, + collate_fn=self.data_collator, + drop_last=True, # needed; otherwise the last batch will be of ragged shape + ) + # sync random states for DataLoader[shuffle=True] before `accelerator.prepare` + # see https://gist.github.com/vwxyzjn/2581bff1e48e185e0b85b6dfe1def79c + torch.manual_seed(args.seed) + self.model, self.optimizer, self.dataloader = accelerator.prepare(self.model, self.optimizer, self.dataloader) + torch.manual_seed(self.local_seed) # reset the local seed again + + self.eval_dataloader = DataLoader( + self.eval_dataset, + batch_size=args.per_device_eval_batch_size, + collate_fn=self.data_collator, + drop_last=True, + ) # no need to shuffle eval dataset + self.eval_dataloader = accelerator.prepare(self.eval_dataloader) + + if self.is_deepspeed_enabled: + self.reward_model = prepare_deepspeed( + self.reward_model, args.per_device_train_batch_size, args.fp16, args.bf16 + ) + + if self.ref_model is None: + if not self.is_peft_model: + raise ValueError("No reference model and model is not a Peft model.") + else: + self.ref_model = prepare_deepspeed( + self.ref_model, args.per_device_train_batch_size, args.fp16, args.bf16 + ) + else: + if self.ref_model is None: + if not self.is_peft_model: + raise ValueError("No reference model and model is not a Peft model.") + else: + self.ref_model = self.ref_model.to(self.accelerator.device) + self.reward_model = self.reward_model.to(self.accelerator.device) + + def get_train_dataloader(self) -> DataLoader: + return self.dataloader + + def get_eval_dataloader(self) -> DataLoader: + return self.eval_dataloader + + @contextmanager + def null_ref_context(self): + """Context manager for handling null reference model (that is, peft adapter manipulation).""" + with ( + self.accelerator.unwrap_model(self.model.policy).disable_adapter() + if self.is_peft_model and not self.ref_adapter_name + else nullcontext() + ): + if self.ref_adapter_name: + self.model.policy.set_adapter(self.ref_adapter_name) + yield + if self.ref_adapter_name: + self.model.policy.set_adapter(self.model_adapter_name or "default") + + def save_model(self, output_dir: Optional[str] = None, _internal_call: bool = False): + backup_model = self.model + self.model = self.model.policy # save only the policy + + if self.is_deepspeed_enabled: + backup_deepspeed = self.deepspeed + self.deepspeed = self.model + + super().save_model(output_dir, _internal_call) + + self.model = backup_model + + if self.is_deepspeed_enabled: + self.deepspeed = backup_deepspeed + + def train(self): + args = self.args + accelerator = self.accelerator + optimizer = self.optimizer + model = self.model + ref_policy = self.ref_model + reward_model = self.reward_model + processing_class = self.processing_class + dataloader = self.dataloader + device = accelerator.device + + def repeat_generator(): + while True: + yield from dataloader + + iter_dataloader = iter(repeat_generator()) + generation_config = GenerationConfig( + max_new_tokens=args.response_length, + temperature=(args.temperature + 1e-7), + top_k=0.0, + top_p=1.0, + do_sample=True, + ) + + accelerator.print("===training policy===") + start_time = time.time() + stats_shape = (args.num_ppo_epochs, args.num_mini_batches, args.gradient_accumulation_steps) + approxkl_stats = torch.zeros(stats_shape, device=device) + pg_clipfrac_stats = torch.zeros(stats_shape, device=device) + pg_loss_stats = torch.zeros(stats_shape, device=device) + vf_loss_stats = torch.zeros(stats_shape, device=device) + vf_clipfrac_stats = torch.zeros(stats_shape, device=device) + entropy_stats = torch.zeros(stats_shape, device=device) + ratio_stats = torch.zeros(stats_shape, device=device) + model.train() + + # trainer state initialization + self.state.global_step = 0 + self.state.episode = 0 + self.state.max_steps = args.num_total_batches + self.state.num_train_epochs = args.total_episodes / self.train_dataset_len + # Compute absolute values for logging, eval, and save if given as ratio + if args.logging_steps is not None: + if args.logging_steps < 1: + self.state.logging_steps = math.ceil(self.state.max_steps * args.logging_steps) + else: + self.state.logging_steps = args.logging_steps + if args.eval_steps is not None: + if args.eval_steps < 1: + self.state.eval_steps = math.ceil(self.state.max_steps * args.eval_steps) + else: + self.state.eval_steps = args.eval_steps + if args.save_steps is not None: + if args.save_steps < 1: + self.state.save_steps = math.ceil(self.state.max_steps * args.save_steps) + else: + self.state.save_steps = args.save_steps + self.control = self.callback_handler.on_train_begin(args, self.state, self.control) + + # backward compatibility + if self.is_deepspeed_enabled: + self.deepspeed = self.model + self.model_wrapped = self.model + + for update in range(1, args.num_total_batches + 1): + self.state.episode += 1 * args.batch_size + data = next(iter_dataloader) + with torch.no_grad(): + queries = data["input_ids"].to(device) + context_length = queries.shape[1] + responses = [] + postprocessed_responses = [] + logprobs = [] + ref_logprobs = [] + scores = [] + sequence_lengths = [] + values = [] + with unwrap_model_for_generation( + self.model, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model: + query_responses, logitss = batch_generation( + unwrapped_model.policy, + queries, + args.local_rollout_forward_batch_size, + processing_class.pad_token_id, + generation_config, + ) + + for i in range(0, queries.shape[0], args.local_rollout_forward_batch_size): + query = queries[i : i + args.local_rollout_forward_batch_size] + query_response = query_responses[i : i + args.local_rollout_forward_batch_size] + response = query_response[:, context_length:] + logits = logitss[i : i + args.local_rollout_forward_batch_size] + logprob = selective_log_softmax(logits, response) + del logits + empty_cache() + + if ref_policy is None: + with self.null_ref_context(): + ref_output = forward(model.policy, query_response, processing_class.pad_token_id) + else: + ref_output = forward(ref_policy, query_response, processing_class.pad_token_id) + ref_logits = ref_output.logits[:, context_length - 1 : -1] + ref_logits /= args.temperature + 1e-7 + ref_logprob = selective_log_softmax(ref_logits, response) + del ref_output, ref_logits + empty_cache() + + # Response Processing 1. truncate response after the first occurrence of `stop_token_id` + postprocessed_response = response + if self.stop_token_id is not None: # handle the edge case when stop_token_id exists but is 0 + postprocessed_response = truncate_response( + self.stop_token_id, processing_class.pad_token_id, response + ) + + # Response Processing 2. run reward model on the truncated responses + postprocessed_query_response = torch.cat((query, postprocessed_response), 1) + sequence_length = first_true_indices(postprocessed_response == processing_class.pad_token_id) - 1 + unwrapped_value_model = accelerator.unwrap_model(model).value_model + full_value, _, _ = get_reward( + unwrapped_value_model, query_response, processing_class.pad_token_id, context_length + ) + value = full_value[:, context_length - 1 : -1].squeeze(-1) + _, score, _ = get_reward( + reward_model, postprocessed_query_response, processing_class.pad_token_id, context_length + ) + + responses.append(response) + postprocessed_responses.append(postprocessed_response) + logprobs.append(logprob) + ref_logprobs.append(ref_logprob) + sequence_lengths.append(sequence_length) + scores.append(score) + values.append(value) + responses = torch.cat(responses, 0) + postprocessed_responses = torch.cat(postprocessed_responses, 0) + logprobs = torch.cat(logprobs, 0) + ref_logprobs = torch.cat(ref_logprobs, 0) + sequence_lengths = torch.cat(sequence_lengths, 0) + scores = torch.cat(scores, 0) + values = torch.cat(values, 0) + del (logprob, ref_logprob, full_value, value, score, unwrapped_model) + empty_cache() + gc.collect() + + # Response Processing 3. Filter completion. Ensure that the sample contains stop_token_id + # Completions not passing that filter will receive a lower score. + contain_eos_token = torch.any(postprocessed_responses == self.processing_class.eos_token_id, dim=-1) + if self.args.missing_eos_penalty is not None: + scores[~contain_eos_token] -= self.args.missing_eos_penalty + # accelerator.print(f"{scores=}, {(contain_eos_token.sum() / len(contain_eos_token))=}") + + # be very careful with `padding_mask_p1`; see https://excalidraw.com/#json=LWnzG4w2k5DjF_EOL_xPt,e2w3a-hFJ_gX5vOfeyXGTw + response_idxs = torch.arange(responses.shape[1], device=responses.device).repeat(responses.shape[0], 1) + padding_mask = response_idxs > sequence_lengths.unsqueeze(1) + logprobs = torch.masked_fill(logprobs, padding_mask, INVALID_LOGPROB) + ref_logprobs = torch.masked_fill(ref_logprobs, padding_mask, INVALID_LOGPROB) + sequence_lengths_p1 = sequence_lengths + 1 + padding_mask_p1 = response_idxs > (sequence_lengths_p1.unsqueeze(1)) + values = torch.masked_fill(values, padding_mask_p1, 0) + + # 4. compute rewards + # Formula used by http://joschu.net/blog/kl-approx.html for the k1 and k3 estimators + logr = ref_logprobs - logprobs + kl = -logr if args.kl_estimator == "k1" else (logr.exp() - 1) - logr # Else statement is k3 + non_score_reward = -args.kl_coef * kl + rewards = non_score_reward.clone() + actual_start = torch.arange(rewards.size(0), device=rewards.device) + actual_end = torch.where(sequence_lengths_p1 < rewards.size(1), sequence_lengths_p1, sequence_lengths) + rewards[[actual_start, actual_end]] += scores + + # 5. whiten rewards + if args.whiten_rewards: + rewards = masked_whiten(rewards, mask=~padding_mask_p1, shift_mean=False) + rewards = torch.masked_fill(rewards, padding_mask_p1, 0) + + # 6. compute advantages and returns + lastgaelam = 0 + advantages_reversed = [] + gen_length = responses.shape[1] + for t in reversed(range(gen_length)): + nextvalues = values[:, t + 1] if t < gen_length - 1 else 0.0 + delta = rewards[:, t] + args.gamma * nextvalues - values[:, t] + lastgaelam = delta + args.gamma * args.lam * lastgaelam + advantages_reversed.append(lastgaelam) + advantages = torch.stack(advantages_reversed[::-1], axis=1) + returns = advantages + values + advantages = masked_whiten(advantages, ~padding_mask) + advantages = torch.masked_fill(advantages, padding_mask, 0) + empty_cache() + + # Do multiple epochs of PPO training, with a fresh random shuffle in each epoch + for ppo_epoch_idx in range(args.num_ppo_epochs): + b_inds = np.random.permutation(args.local_batch_size) + minibatch_idx = 0 + for mini_batch_start in range(0, args.local_batch_size, args.local_mini_batch_size): + mini_batch_end = mini_batch_start + args.local_mini_batch_size + mini_batch_inds = b_inds[mini_batch_start:mini_batch_end] + gradient_accumulation_idx = 0 + for micro_batch_start in range(0, args.local_mini_batch_size, args.per_device_train_batch_size): + with accelerator.accumulate(model): + micro_batch_end = micro_batch_start + args.per_device_train_batch_size + micro_batch_inds = mini_batch_inds[micro_batch_start:micro_batch_end] + mb_advantage = advantages[micro_batch_inds] + mb_responses = responses[micro_batch_inds] + mb_query_responses = query_responses[micro_batch_inds] + mb_logprobs = logprobs[micro_batch_inds] + mb_return = returns[micro_batch_inds] + mb_values = values[micro_batch_inds] + + output, vpred_temp = forward(model, mb_query_responses, processing_class.pad_token_id) + logits = output.logits[:, context_length - 1 : -1] + logits /= args.temperature + 1e-7 + new_logprobs = selective_log_softmax(logits, mb_responses) + new_logprobs = torch.masked_fill( + new_logprobs, padding_mask[micro_batch_inds], INVALID_LOGPROB + ) + vpred = vpred_temp[:, context_length - 1 : -1].squeeze(-1) + vpred = torch.masked_fill(vpred, padding_mask_p1[micro_batch_inds], 0) + vpredclipped = torch.clamp( + vpred, + mb_values - args.cliprange_value, + mb_values + args.cliprange_value, + ) + vf_losses1 = torch.square(vpred - mb_return) + vf_losses2 = torch.square(vpredclipped - mb_return) + vf_loss_max = torch.max(vf_losses1, vf_losses2) + vf_loss = 0.5 * masked_mean(vf_loss_max, ~padding_mask_p1[micro_batch_inds]) + vf_clipfrac = masked_mean( + (vf_losses2 > vf_losses1).float(), ~padding_mask_p1[micro_batch_inds] + ) + logprobs_diff = new_logprobs - mb_logprobs + ratio = torch.exp(logprobs_diff) + pg_losses = -mb_advantage * ratio + pg_losses2 = -mb_advantage * torch.clamp(ratio, 1.0 - args.cliprange, 1.0 + args.cliprange) + pg_loss_max = torch.max(pg_losses, pg_losses2) + pg_loss = masked_mean(pg_loss_max, ~padding_mask[micro_batch_inds]) + loss = pg_loss + args.vf_coef * vf_loss + accelerator.backward(loss) + optimizer.step() + optimizer.zero_grad() + with torch.no_grad(): + pg_clipfrac = masked_mean( + (pg_losses2 > pg_losses).float(), ~padding_mask[micro_batch_inds] + ) + prob_dist = torch.nn.functional.softmax(logits, dim=-1, dtype = torch.float32).to(logits.dtype) + entropy = torch.logsumexp(logits, dim=-1) - torch.sum(prob_dist * logits, dim=-1) + approxkl = 0.5 * (logprobs_diff**2).mean() + approxkl_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = approxkl + pg_clipfrac_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = ( + pg_clipfrac + ) + pg_loss_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = pg_loss + vf_loss_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = vf_loss + vf_clipfrac_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = ( + vf_clipfrac + ) + entropy_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = entropy.mean() + ratio_stats[ppo_epoch_idx, minibatch_idx, gradient_accumulation_idx] = ratio.mean() + gradient_accumulation_idx += 1 + minibatch_idx += 1 + # del everything and empty cache + # fmt: off + del ( + output, vpred_temp, logits, new_logprobs, vpred, vpredclipped, + vf_losses1, vf_losses2, vf_loss, vf_clipfrac, logprobs_diff, ratio, pg_losses, pg_losses2, pg_loss_max, + pg_loss, loss, pg_clipfrac, prob_dist, entropy, approxkl, mb_return, + mb_advantage, mb_values, mb_responses, mb_query_responses, mb_logprobs, + ) + # fmt: on + empty_cache() + with torch.no_grad(): + mean_kl = kl.sum(1).mean() + mean_entropy = (-logprobs).sum(1).mean() + mean_non_score_reward = non_score_reward.sum(1).mean() + rlhf_reward = mean_non_score_reward + scores.mean() + eps = int(self.state.episode / (time.time() - start_time)) + metrics = {} + metrics["eps"] = eps + metrics["objective/kl"] = self.accelerator.gather_for_metrics(mean_kl).mean().item() + metrics["objective/entropy"] = self.accelerator.gather_for_metrics(mean_entropy).mean().item() + metrics["objective/non_score_reward"] = ( + self.accelerator.gather_for_metrics(mean_non_score_reward).mean().item() + ) + metrics["objective/rlhf_reward"] = self.accelerator.gather_for_metrics(rlhf_reward).mean().item() + metrics["objective/scores"] = self.accelerator.gather_for_metrics(scores.mean()).mean().item() + metrics["policy/approxkl_avg"] = self.accelerator.gather_for_metrics(approxkl_stats).mean().item() + metrics["policy/clipfrac_avg"] = self.accelerator.gather_for_metrics(pg_clipfrac_stats).mean().item() + metrics["loss/policy_avg"] = self.accelerator.gather_for_metrics(pg_loss_stats).mean().item() + metrics["loss/value_avg"] = self.accelerator.gather_for_metrics(vf_loss_stats).mean().item() + metrics["val/clipfrac_avg"] = self.accelerator.gather_for_metrics(vf_clipfrac_stats).mean().item() + metrics["policy/entropy_avg"] = self.accelerator.gather_for_metrics(entropy_stats).mean().item() + metrics["val/ratio"] = self.accelerator.gather_for_metrics(ratio_stats).mean().item() + metrics["val/ratio_var"] = self.accelerator.gather_for_metrics(ratio_stats).var().item() + metrics["val/num_eos_tokens"] = (responses == processing_class.eos_token_id).sum().item() + metrics["lr"] = self.lr_scheduler.get_last_lr()[0] + metrics["episode"] = self.state.episode + self.state.epoch = self.state.episode / self.train_dataset_len # used by self.log + self.state.global_step += 1 + self.log(metrics) + + self.lr_scheduler.step() + self.control = self.callback_handler.on_step_end(args, self.state, self.control) + if self.control.should_save: + self._save_checkpoint(model, trial=None) + self.control = self.callback_handler.on_save(self.args, self.state, self.control) + del kl, mean_kl, mean_entropy, mean_non_score_reward, scores, metrics, non_score_reward + empty_cache() + gc.collect() + + if args.num_sample_generations > 0 and (update - 1) % self.sample_generations_freq == 0: + self.generate_completions(sampling=True) + empty_cache() + del ( + query_responses, + responses, + postprocessed_responses, + logprobs, + ref_logprobs, + values, + sequence_lengths, + contain_eos_token, + sequence_lengths_p1, + response_idxs, + padding_mask, + padding_mask_p1, + rewards, + actual_start, + actual_end, + advantages, + returns, + ) + empty_cache() + + # HF trainer specifics + self.control = self.callback_handler.on_train_end(args, self.state, self.control) + if self.control.should_save: + self._save_checkpoint(model, trial=None) + self.control = self.callback_handler.on_save(self.args, self.state, self.control) + + def generate_completions(self, sampling: bool = False): + args = self.args + processing_class = self.processing_class + generation_config = GenerationConfig( + max_new_tokens=self.args.response_length, + temperature=(0.01 + 1e-7), + top_k=0.0, + top_p=1.0, + do_sample=True, + ) + + table = defaultdict(list) + with unwrap_model_for_generation( + self.model, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model: + for batch in self.eval_dataloader: + query = batch["input_ids"] + with torch.no_grad(): + context_length = query.shape[1] + query_response, _ = batch_generation( + unwrapped_model.policy, + query, + query.shape[0], + processing_class.pad_token_id, + generation_config, + ) + response = query_response[:, context_length:] + postprocessed_response = response + if self.stop_token_id is not None: # handle the edge case when stop_token_id exists but is 0 + postprocessed_response = truncate_response( + self.stop_token_id, processing_class.pad_token_id, response + ) + table["query"].extend( + gather_object(processing_class.batch_decode(query, skip_special_tokens=True)) + ) + table["model response"].extend( + gather_object(processing_class.batch_decode(postprocessed_response)) + ) + + postprocessed_query_response = torch.cat((query, postprocessed_response), 1) + _, score, _ = get_reward( + self.reward_model, postprocessed_query_response, processing_class.pad_token_id, context_length + ) + table["score"].extend(self.accelerator.gather_for_metrics(score).float().cpu().numpy()) + + if sampling: + break + df = pd.DataFrame(table) + + if self.accelerator.is_main_process: + if is_rich_available(): + print_rich_table(df.iloc[0 : 0 + 5]) + if "wandb" in args.report_to: + import wandb + + if wandb.run is not None: + wandb.log({"completions": wandb.Table(dataframe=df)}) + + if "comet_ml" in args.report_to: + log_table_to_comet_experiment( + name="completions.csv", + table=df, + ) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{mziegler2019fine-tuning, + title = {{Fine-Tuning Language Models from Human Preferences}}, + author = {Daniel M. Ziegler and Nisan Stiennon and Jeffrey Wu and Tom B. Brown and Alec Radford and Dario Amodei and Paul F. Christiano and Geoffrey Irving}, + year = 2019, + eprint = {arXiv:1909.08593} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="PPO", + trainer_citation=citation, + paper_title="Fine-Tuning Language Models from Human Preferences", + paper_id="1909.08593", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothPPOTrainer(_UnslothPPOTrainer): + """ + Trainer for Proximal Policy Optimization (PPO). + +For details on PPO, see the paper: [Proximal Policy Optimization +Algorithms](https://huggingface.co/papers/1707.06347). + +Args: + args ([`PPOConfig`]): + Training arguments. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`]): + Class to process the data. + model (`torch.nn.Module`): + Model to be trained. This is the policy model. + ref_model (`torch.nn.Module`, *optional*): + Reference model used to compute the KL divergence. If `None`, a copy of the policy model is created. + reward_model (`torch.nn.Module`): + Reward model used to compute the rewards. + train_dataset ([`~datasets.Dataset`]): + Dataset for training. + value_model (`torch.nn.Module`): + Value model used to predict the value of a state. + data_collator ([`~transformers.DataCollatorWithPadding`], *optional*): + Data collator to batch and pad samples from the dataset. If `None`, a default data collator is created + using the `processing_class`. + eval_dataset ([`~datasets.Dataset`] or `dict` of [`~datasets.Dataset`], *optional*): + Dataset for evaluation. + optimizers (`tuple` of `torch.optim.Optimizer` and `torch.optim.lr_scheduler.LambdaLR`, *optional*, defaults to `(None, None)`): + Tuple containing the optimizer and the learning rate scheduler to use for training. If `None`, the + optimizer and the learning rate scheduler are created using the + [`~transformers.Trainer.create_optimizer_and_scheduler`] method. + callbacks (`list` of [`~transformers.TrainerCallback`], *optional*): + Callbacks to use during training. + peft_config ([`~peft.config.PeftConfig`], *optional*): + PEFT configuration to use PEFT for training. If `None`, PEFT is not used. If provided, the policy `model` + will be wrapped with the specified PEFT adapter. + + """ + def __init__( + self, + args, + processing_class, + model, + ref_model, + reward_model, + train_dataset, + value_model, + data_collator = None, + eval_dataset = None, + callbacks = None, + peft_config = None, + **kwargs + ): + if args is None: args = UnslothPPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('ppo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + args = args, + processing_class = processing_class, + model = model, + ref_model = ref_model, + reward_model = reward_model, + train_dataset = train_dataset, + value_model = value_model, + data_collator = data_collator, + eval_dataset = eval_dataset, + callbacks = callbacks, + peft_config = peft_config,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass diff --git a/unsloth_compiled_cache/UnslothPRMTrainer.py b/unsloth_compiled_cache/UnslothPRMTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..d9c99f6d219a63c8b76283b0f89125292d91a906 --- /dev/null +++ b/unsloth_compiled_cache/UnslothPRMTrainer.py @@ -0,0 +1,1038 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.prm_trainer import (BaseImageProcessor, Callable, DataCollator, DataCollatorForTokenClassification, Dataset, EvalPrediction, FeatureExtractionMixin, Optional, PRMConfig, PRMTrainer, PartialState, Path, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, Trainer, TrainerCallback, Union, chain, compute_accuracy, disable_dropout_in_model, features, generate_model_card, is_wandb_available, nn, os, prepare_peft_model, textwrap, torch, Optional, PreTrainedModel, Trainer, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothPRMConfig(PRMConfig): + """ + +Configuration class for the [`PRMTrainer`]. + +This class includes only the parameters that are specific to PRM training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the sequences (prompt + completion) used for truncation. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt used for truncation. + max_completion_length (`int` or `None`, *optional*, defaults to `None`): + Maximum length of the completion used for truncation. The completion is the concatenation of the steps. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. + step_separator (`str`, *optional*, defaults to `"\n"`): + Separator used to separate each step of the reasoning process. + train_on_last_step_only (`bool`, *optional*, defaults to `False`): + Whether to train only on the last step. + dataset_num_proc (`int`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + max_length = 1024, + max_prompt_length = 512, + max_completion_length = None, + disable_dropout = True, + step_separator = '\ +', + train_on_last_step_only = False, + dataset_num_proc = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + max_length = max_length, + max_prompt_length = max_prompt_length, + max_completion_length = max_completion_length, + disable_dropout = disable_dropout, + step_separator = step_separator, + train_on_last_step_only = train_on_last_step_only, + dataset_num_proc = dataset_num_proc,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothPRMTrainer(Trainer): + """ + Initialize PRMTrainer. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForTokenClassification`. + args (`PRMConfig`): + The arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DataCollatorForTokenClassification`) will be used which will pad the sequences to the maximum length of + the sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + compute_metrics (`Callable[[transformers.EvalPrediction], dict]`, *optional* defaults to `compute_accuracy`): + The metrics to use for evaluation. If no metrics are specified, the default metric (`compute_accuracy`) + will be used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + """ + + _tag_names = ["trl", "prm"] + + def __init__( + self, + model: Optional[Union[PreTrainedModel, nn.Module]] = None, + args: Optional[PRMConfig] = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = ( + None, + None, + ), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional[dict] = None, + ): + if False: + model = prepare_peft_model(model, peft_config, args) + + # Disable dropout in the model + if args.disable_dropout: + disable_dropout_in_model(model) + + if compute_metrics is None: + compute_metrics = compute_accuracy + + if data_collator is None: + if processing_class is None: + raise ValueError( + "A processing_class must be specified when using the default DataCollatorForTokenClassification" + ) + data_collator = DataCollatorForTokenClassification(processing_class, max_length=args.max_length) + + if "input_ids" not in train_dataset.column_names: + with PartialState().main_process_first(): + fn_kwargs = { + "tokenizer": processing_class, + "step_separator": args.step_separator, + "max_length": args.max_length, + "max_prompt_length": args.max_prompt_length, + "max_completion_length": args.max_completion_length, + "train_on_last_step_only": args.train_on_last_step_only, + } + train_fn_kwargs = {**fn_kwargs, "is_eval": False} + train_dataset = train_dataset.map( + self.tokenize_row, + fn_kwargs=train_fn_kwargs, + num_proc=args.dataset_num_proc, + remove_columns=train_dataset.features, + desc="Tokenizing train dataset", + features=features.Features( # needed to avoid map to cast labels to bool + { + "labels": features.Sequence(features.Value("int64")), + "input_ids": features.Sequence(features.Value("int64")), + } + ), + ) + + eval_fn_kwargs = {**fn_kwargs, "is_eval": True} + if eval_dataset is not None: + eval_dataset = eval_dataset.map( + self.tokenize_row, + fn_kwargs=eval_fn_kwargs, + num_proc=args.dataset_num_proc, + remove_columns=eval_dataset.features, + desc="Tokenizing eval dataset", + features=features.Features( # needed to avoid map to cast labels to bool + { + "labels": features.Sequence(features.Value("int64")), + "input_ids": features.Sequence(features.Value("int64")), + } + ), + ) + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + model_init=model_init, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + @staticmethod + def tokenize_row( + features, + tokenizer, + step_separator, + max_length, + max_prompt_length, + max_completion_length, + train_on_last_step_only, + is_eval, + ): + r""" + Tokenize a row of the dataset. + + Args: + features (`dict[str, str]`): + Row of the dataset, should contain the keys `"prompt"`, `"completions"`, and `"labels"`. + tokenizer (`PreTrainedTokenizerBase`): + Tokenizer used to process the data. + step_separator (`str`): + Separator between steps in the completion. + max_length (`int` or `None`): + Maximum length of the sequences (prompt + completion). If `None`, the sequences are not truncated. + max_prompt_length (`int` or `None`): + Maximum length of the prompt. If `None`, the prompt is not truncated. + max_completion_length (`int` or `None`): + Maximum length of the completion sequences. If `None`, the completion sequences are not truncated. + train_on_last_step_only (`bool`): + Whether to train only on the last step. If `True`, the labels are `-100` for all tokens except the last + token of the completion. + is_eval (`bool`): + Whether the function is used to tokenize samples from a training or an evaluation dataset. Used only if + `train_on_last_step_only` is set to `True`. + + Returns: + `dict[str, list[int]]`: + Tokenized sequences with the keys `"input_ids"`, and `"labels". + + Example: + ```python + >>> from transformers import AutoTokenizer + + >>> tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-0.5B") + >>> features = { + ... "prompt": "Which number is larger, 9.8 or 9.11?", + ... "completions": ["11 is greater than 8.", "Hence, 9.11 > 9.8."], + ... "labels": [True, False], + ... } + >>> PRMTrainer.tokenize_row( + ... features, tokenizer, "\n", max_completion_length=None, train_on_last_step_only=False, is_eval=False + ... ) + {'input_ids': [23085, 1372, 374, 8131, 11, 220, 24, 13, 23, 476, 220, 24, 13, 16, 16, 30, 16, 16, 374, 7046, 1091, 220, 23, 13, 198, 39, 763, 11, 220, 24, 13, 16, 16, 861, 220, 24, 13, 23, 13, 198], + 'labels': [-100, -100, -100, -100, -100, -100, -100, -100, 1, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, -100, 0]} + ``` + """ + # Tokenize the prompt and completions + prompt_ids = tokenizer(features["prompt"], add_special_tokens=False)["input_ids"] + completions_ids = [ + tokenizer(completion, add_special_tokens=False)["input_ids"] for completion in features["completions"] + ] + if train_on_last_step_only and not is_eval: + labels = [-100] * (len(features["labels"]) - 1) + [int(features["labels"][-1])] + else: + labels = [int(label) for label in features["labels"]] + + # Get the ID of the separator token and add it to the completions + separator_ids = tokenizer.encode(step_separator, add_special_tokens=False) + completions_ids = [completion + separator_ids for completion in completions_ids] + + # Create the label + labels = [[-100] * (len(completion) - 1) + [label] for completion, label in zip(completions_ids, labels)] + + # Join the completions and labels steps + completion_ids = list(chain(*completions_ids)) + labels = list(chain(*labels)) + + if tokenizer.bos_token_id is not None: + prompt_ids = [tokenizer.bos_token_id] + prompt_ids + + # Truncate prompt and completion sequences + if max_prompt_length is not None: + prompt_ids = prompt_ids[-max_prompt_length:] + if max_completion_length is not None: + completion_ids = completion_ids[:max_completion_length] + labels = labels[:max_completion_length] + + input_ids = prompt_ids + completion_ids + labels = [-100] * len(prompt_ids) + labels + + if max_length is not None: + input_ids = input_ids[:max_length] + labels = labels[:max_length] + + return {"input_ids": input_ids, "labels": labels} + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{uesato2022solving, + title = {{Solving Math Word Problems With Process- and Outcome-Based Feedback}}, + author = {Uesato, Jonathan and Kushman, Nate and Kumar, Ramana and Song, Francis and Siegel, Noah and Wang, Lisa and Creswell, Antonia and Irving, Geoffrey and Higgins, Irina}, + year = 2022, + journal = {arXiv preprint arXiv:2211.14275} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + trainer_name="PRM", + trainer_citation=citation, + paper_title="Solving math word problems with process-and outcome-based feedback", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothPRMTrainer(_UnslothPRMTrainer): + """ + +Initialize PRMTrainer. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForTokenClassification`. + args (`PRMConfig`): + The arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DataCollatorForTokenClassification`) will be used which will pad the sequences to the maximum length of + the sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + compute_metrics (`Callable[[transformers.EvalPrediction], dict]`, *optional* defaults to `compute_accuracy`): + The metrics to use for evaluation. If no metrics are specified, the default metric (`compute_accuracy`) + will be used. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + peft_config (`dict`, defaults to `None`): + The PEFT configuration to use for training. If you pass a PEFT configuration, the model will be wrapped in + a PEFT model. + + """ + def __init__( + self, + model = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + model_init = None, + compute_metrics = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + **kwargs + ): + if args is None: args = UnslothPRMConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('prm_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + model_init = model_init, + compute_metrics = compute_metrics, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass diff --git a/unsloth_compiled_cache/UnslothRLOOTrainer.py b/unsloth_compiled_cache/UnslothRLOOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..10c9d2ebe2a8eb1d32666087e6ee9da4a102184f --- /dev/null +++ b/unsloth_compiled_cache/UnslothRLOOTrainer.py @@ -0,0 +1,2465 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.rloo_trainer import (Any, AutoConfig, AutoModelForSequenceClassification, AutoProcessor, AutoTokenizer, DataLoader, Dataset, FSDP, GenerationConfig, IterableDataset, Optional, Path, PeftConfig, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, RLOOConfig, RLOOTrainer, RepeatSampler, RewardFunc, Sampler, SyncRefModelCallback, Trainer, TrainerCallback, Union, VLLMClient, apply_chat_template, broadcast_object_list, copy, datasets, defaultdict, deque, disable_dropout_in_model, entropy_from_logits, gather, gather_object, generate_model_card, get_comet_experiment_url, identity, inspect, is_conversational, is_datasets_available, is_flash_attn_2_available, is_peft_model, is_rich_available, is_vllm_available, is_wandb_available, logger, logging, maybe_apply_chat_template, nanmax, nanmin, nanstd, nn, nullcontext, os, pad, partial, prepare_deepspeed, prepare_fsdp, prepare_peft_model, print_prompt_completions_sample, profiling_context, profiling_decorator, re, seed_worker, selective_log_softmax, set_seed, shuffle_sequence_dict, split_pixel_values_by_grid, split_tensor_dict, textwrap, torch, transformers, truncate_with_protected_tokens, unsplit_pixel_values_by_grid, unwrap_model_for_generation, warnings, Any, FSDP, Union, apply_chat_template, broadcast_object_list, copy, gather, gather_object, is_conversational, is_flash_attn_2_available, logging, maybe_apply_chat_template, nanstd, nullcontext, os, pad, profiling_context, re, torch, transformers, truncate_with_protected_tokens, unwrap_model_for_generation, FSDP, gather, is_peft_model, nn, nullcontext, os, profiling_decorator, re, Any, Union, profiling_decorator, re, shuffle_sequence_dict, split_pixel_values_by_grid, split_tensor_dict, torch, unsplit_pixel_values_by_grid, Optional, PreTrainedModel, Trainer, logger, os, re, torch, FSDP, nn, os, re, FSDP, nn, re, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +def vLLMSamplingParams(**kwargs): + from vllm import SamplingParams + sampling_params = SamplingParams(**kwargs) + sampling_params._set_kwargs = kwargs + return sampling_params +@dataclass +class UnslothRLOOConfig(RLOOConfig): + """ + +Configuration class for the [`RLOOTrainer`]. + +This class includes only the parameters that are specific to RLOO training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + > Parameters that control the model and reference model + + model_init_kwargs (`str`, `dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model` + argument of the [`GRPOTrainer`] is provided as a string. + disable_dropout (`bool`, *optional*, defaults to `False`): + Whether to disable dropout in the model. This is useful for training with a reference model, as it prevents + the model from generating different logprobs for the same input. + + > Parameters that control the data preprocessing + + remove_unused_columns (`bool`, *optional*, defaults to `False`): + Whether to only keep the column `"prompt"` in the dataset. If you use a custom reward function that + requires any column other than `"prompts"` and `"completions"`, you should keep this to `False`. + max_prompt_length (`int` or `None`, *optional*, defaults to `512`): + Maximum length of the prompt. If the prompt is longer than this value, it will be truncated left. + num_generations (`int` or `None`, *optional*, defaults to `2`): + Number of generations per prompt to sample. The effective batch size (num_processes * per_device_batch_size + * gradient_accumulation_steps) must be evenly divisible by this value. + max_completion_length (`int` or `None`, *optional*, defaults to `256`): + Maximum length of the generated completion. + ds3_gather_for_generation (`bool`, *optional*, defaults to `True`): + This setting applies to DeepSpeed ZeRO-3. If enabled, the policy model weights are gathered for generation, + improving generation speed. However, disabling this option allows training models that exceed the VRAM + capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible + with vLLM generation. + shuffle_dataset (`bool`, *optional*, defaults to `True`): + Whether to shuffle the training dataset. + + > Parameters that control generation + + generation_batch_size: (`int` or `None`, *optional*, defaults to `None`): + Batch size to use for generation. If `None`, it defaults to the effective training batch size: + `per_device_train_batch_size * num_processes * steps_per_generation`. In other words, there is one + generation batch processed per optimization step. Mutually exclusive with `steps_per_generation`. + steps_per_generation: (`int` or `None`, *optional*, defaults to `None`): + Number of steps per generation. If `None`, it defaults to `gradient_accumulation_steps`. Mutually exclusive + with `generation_batch_size`. + temperature (`float`, defaults to `1.0`): + Temperature for sampling. The higher the temperature, the more random the completions. + top_p (`float`, *optional*, defaults to `1.0`): + Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to + `1.0` to consider all tokens. + top_k (`int` or `None`, *optional*, defaults to `None`): + Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is + disabled and all tokens are considered. + min_p (`float` or `None`, *optional*, defaults to `None`): + Minimum token probability, which will be scaled by the probability of the most likely token. It must be a + value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range. + repetition_penalty (`float`, *optional*, defaults to `1.0`): + Float that penalizes new tokens based on whether they appear in the prompt and the generated text so far. + Values > `1.0` encourage the model to use new tokens, while values < `1.0` encourage the model to repeat + tokens. + use_transformers_paged (`bool`, *optional*, defaults to `False`): + Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers` + paged implementation will be used for generation instead of the default padded implementation. This + parameter is only effective when `use_vllm` is set to `False`. + cache_implementation (`str` or `None`, *optional*, defaults to `None`): + Implementation of the cache method for faster generation when `use_vllm` is set to `False`. + generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if + using vLLM) when sampling completions. This can be used to further customize the generation behavior, such + as setting `suppress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation + parameters (like `min_p`, `top_p`, etc.), they will override them. + + > Parameters that control generation acceleration powered by vLLM + + use_vllm (`bool`, *optional*, defaults to `False`): + Whether to use vLLM for generating completions. If set to `True`, the trainer will use vLLM for generation + instead of the default model.generate(). Requires `vllm` to be installed. + vllm_mode (`str`, *optional*, defaults to `"server"`): + Mode to use for vLLM integration when `use_vllm` is set to `True`. Must be one of `"server"` or + `"colocate"`. + + - `"server"`: The trainer will send generation requests to a separate vLLM server. Make sure a TRL vLLM + server is running (start with `trl vllm-serve`). + - `"colocate"`: vLLM will run in the same process and share the training GPUs. This avoids the need for a + separate server but may cause resource contention with training. + vllm_model_impl (`str`, *optional*, defaults to `"vllm"`): + Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use + the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model + implementation. + vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`): + Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled. + + > Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`) + + vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`): + Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and + `vllm_server_port` are ignored. + vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`): + Host of the vLLM server to connect to. Ignored if `vllm_server_base_url` is provided. + vllm_server_port (`int`, *optional*, defaults to `8000`): + Port of the vLLM server to connect to. Ignored if `vllm_server_base_url` is provided. + vllm_server_timeout (`float`, *optional*, defaults to `240.0`): + Total timeout duration in seconds to wait for the vLLM server to be up. If the server is not up after the + timeout, a `ConnectionError` is raised. + + > Parameters that control colocated vLLM execution (only used when `vllm_mode` is `"colocate"`) + + vllm_gpu_memory_utilization (`float`, *optional*, defaults to `0.3`): + Control the GPU memory utilization for vLLM. This setting only applies when `vllm_mode` is set to + `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when + launching the vLLM server via the `--vllm_gpu_memory_utilization` flag. + vllm_tensor_parallel_size (`int`, *optional*, defaults to `1`): + Control the tensor parallel size for vLLM. This setting only applies when `vllm_mode` is set to + `"colocate"`. If you are using `vllm_mode="server"`, this parameter must be passed separately when + launching the vLLM server via the `--vllm_tensor_parallel_size` flag. + + > Parameters that control the training + + beta (`float`, *optional*, defaults to `0.05`): + KL coefficient. If `0.0`, the reference model is not loaded, reducing memory usage and improving training + speed. + num_iterations (`int`, *optional*, defaults to `1`): + Number of iterations per batch (denoted as μ in the algorithm). + epsilon (`float`, *optional*, defaults to `0.2`): + Epsilon value for clipping. + epsilon_high (`float` or `None`, *optional*, defaults to `None`): + Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound + specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`. + reward_weights (`list[float]` or `None`, *optional*, defaults to `None`): + Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are + weighted equally with weight `1.0`. + normalize_advantages (`bool`, *optional*, defaults to `False`): + Whether to normalize advantages. Normalization is done per generation batch to have mean `0.0` and standard + deviation of `1.0`. + reward_clip_range (`tuple[float, float]` or `None`, *optional*, defaults to `None`): + Clip range for rewards as (min, max). If `None`, no clipping is applied. + mask_truncated_completions (`bool`, *optional*, defaults to `False`): + When enabled, truncated completions are excluded from the loss calculation, preventing them from being + incorrectly penalized and introducing noise during training. According to the + [DAPO](https://huggingface.co/papers/2503.14476) paper, this is a good practice for training stability. + sync_ref_model (`bool`, *optional*, defaults to `False`): + Whether to synchronize the reference model with the active model every `ref_model_sync_steps` steps, using + the `ref_model_mixup_alpha` parameter. This synchronization originates from the + [TR-DPO](https://huggingface.co/papers/2404.09656) paper. + ref_model_mixup_alpha (`float`, *optional*, defaults to `0.6`): + α parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which controls the mix + between the current policy and the previous reference policy during updates. The reference policy is + updated according to the equation: `π_ref = α * π_θ + (1 - α) * π_ref_prev`. To use this parameter, you + must set `sync_ref_model=True`. + ref_model_sync_steps (`int`, *optional*, defaults to `512`): + τ parameter from the [TR-DPO](https://huggingface.co/papers/2404.09656) paper, which determines how + frequently the current policy is synchronized with the reference policy. To use this parameter, you must + set `sync_ref_model=True`. + + > Parameters that control the logging + + log_completions (`bool`, *optional*, defaults to `False`): + Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed, + it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`. + num_completions_to_print (`int` or `None`, *optional*, defaults to `None`): + Number of completions to print with `rich`. If `None`, all completions are logged. + wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`): + Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts + are logged. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = False, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + model_init_kwargs = None, + disable_dropout = False, + max_prompt_length = 512, + num_generations = 8, + max_completion_length = 256, + ds3_gather_for_generation = True, + shuffle_dataset = True, + generation_batch_size = None, + steps_per_generation = None, + temperature = 1.0, + top_p = 1.0, + top_k = None, + min_p = None, + generation_kwargs = {}, + repetition_penalty = 1.0, + use_transformers_paged = False, + cache_implementation = None, + use_vllm = False, + vllm_mode = 'colocate', + vllm_model_impl = 'vllm', + vllm_guided_decoding_regex = None, + vllm_server_base_url = None, + vllm_server_host = '0.0.0.0', + vllm_server_port = 8000, + vllm_server_timeout = 240.0, + vllm_gpu_memory_utilization = 0.3, + vllm_tensor_parallel_size = 1, + beta = 0.05, + num_iterations = 1, + epsilon = 0.2, + epsilon_high = None, + reward_weights = None, + normalize_advantages = False, + reward_clip_range = None, + mask_truncated_completions = False, + sync_ref_model = False, + ref_model_mixup_alpha = 0.6, + ref_model_sync_steps = 512, + log_completions = False, + num_completions_to_print = None, + wandb_log_unique_prompts = False, + rloo_k = None, + cliprange = None, + kl_coef = None, + exp_name = None, + normalize_reward = None, + num_ppo_epochs = None, + num_mini_batches = None, + total_episodes = None, + response_length = None, + token_level_kl = None, + dataset_num_proc = None, + local_rollout_forward_batch_size = None, + num_sample_generations = None, + stop_token = None, + stop_token_id = None, + missing_eos_penalty = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if (per_device_train_batch_size // num_generations) * num_generations != per_device_train_batch_size: + print('Unsloth: We now expect `per_device_train_batch_size` to be a multiple of `num_generations`.\nWe will change the batch size of ' + str(per_device_train_batch_size) + ' to the `num_generations` of ' + str(num_generations)) + per_device_train_batch_size = num_generations + + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + model_init_kwargs = model_init_kwargs, + disable_dropout = disable_dropout, + max_prompt_length = max_prompt_length, + num_generations = num_generations, + max_completion_length = max_completion_length, + ds3_gather_for_generation = ds3_gather_for_generation, + shuffle_dataset = shuffle_dataset, + generation_batch_size = generation_batch_size, + steps_per_generation = steps_per_generation, + temperature = temperature, + top_p = top_p, + top_k = top_k, + min_p = min_p, + generation_kwargs = generation_kwargs, + repetition_penalty = repetition_penalty, + use_transformers_paged = use_transformers_paged, + cache_implementation = cache_implementation, + use_vllm = use_vllm, + vllm_mode = vllm_mode, + vllm_model_impl = vllm_model_impl, + vllm_guided_decoding_regex = vllm_guided_decoding_regex, + vllm_server_base_url = vllm_server_base_url, + vllm_server_host = vllm_server_host, + vllm_server_port = vllm_server_port, + vllm_server_timeout = vllm_server_timeout, + vllm_gpu_memory_utilization = vllm_gpu_memory_utilization, + vllm_tensor_parallel_size = vllm_tensor_parallel_size, + beta = beta, + num_iterations = num_iterations, + epsilon = epsilon, + epsilon_high = epsilon_high, + reward_weights = reward_weights, + normalize_advantages = normalize_advantages, + reward_clip_range = reward_clip_range, + mask_truncated_completions = mask_truncated_completions, + sync_ref_model = sync_ref_model, + ref_model_mixup_alpha = ref_model_mixup_alpha, + ref_model_sync_steps = ref_model_sync_steps, + log_completions = log_completions, + num_completions_to_print = num_completions_to_print, + wandb_log_unique_prompts = wandb_log_unique_prompts, + rloo_k = rloo_k, + cliprange = cliprange, + kl_coef = kl_coef, + exp_name = exp_name, + normalize_reward = normalize_reward, + num_ppo_epochs = num_ppo_epochs, + num_mini_batches = num_mini_batches, + total_episodes = total_episodes, + response_length = response_length, + token_level_kl = token_level_kl, + dataset_num_proc = dataset_num_proc, + local_rollout_forward_batch_size = local_rollout_forward_batch_size, + num_sample_generations = num_sample_generations, + stop_token = stop_token, + stop_token_id = stop_token_id, + missing_eos_penalty = missing_eos_penalty,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + +pass + +class _UnslothRLOOTrainer(Trainer): + """ + Trainer for the Reinforce Leave One Out (RLOO) method. This algorithm was initially proposed in the paper [Back to + Basics: Revisiting REINFORCE Style Optimization for Learning from Human Feedback in LLMs] + (https://huggingface.co/papers/2402.14740). + + Example: + + ```python + from datasets import load_dataset + from trl import RLOOTrainer + + dataset = load_dataset("trl-lib/tldr", split="train") + def reward_func(completions, **kwargs): + # Dummy reward function that rewards completions with more unique letters. + return [float(len(set(completion))) for completion in completions] + trainer = RLOOTrainer( + model="Qwen/Qwen2-0.5B-Instruct", + reward_funcs=reward_func, + train_dataset=dataset, + ) + + trainer.train() + ``` + + Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + reward_funcs (`Union[RewardFunc, list[RewardFunc]]`): + Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward + functions with the prompts and completions and sum the rewards. Can be either: + + - A single reward function, such as: + - A string: The *model ID* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForSequenceClassification.from_pretrained`] with `num_labels=1` and the + keyword arguments in `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object: Only sequence classification models are supported. + - A custom reward function: The function is provided with the prompts and the generated completions, + plus any additional columns in the dataset. It should return a list of rewards. Custom reward + functions can also return `None` when the reward is not applicable to those samples. This is useful + for multi-task training where different reward functions apply to different types of samples. When a + reward function returns `None` for a sample, that reward function is excluded from the reward + calculation for that sample. For more details, see [Using a custom reward + function](#using-a-custom-reward-function). + + The trainer's state is also passed to the reward function. The trainer's state is an instance of + [`~transformers.TrainerState`] and can be accessed by accessing the `trainer_state` argument to the + reward function's signature. + - A list of reward functions, where each item can independently be any of the above types. Mixing different + types within the list (e.g., a string model ID and a custom reward function) is allowed. + args ([`RLOOConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is + ignored. The format of the samples can be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`): + Processing class used to process the data. The padding side must be set to "left". If `None`, the + processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A + padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token, + `tokenizer.eos_token` will be used as the default. + reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`): + Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either: + + - A single processing class: Used when `reward_funcs` contains only one reward function. + - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`. + If set to `None`, or if an element of the list corresponding to a [`~transformers.PreTrainedModel`] is + `None`, the tokenizer for the model is automatically loaded using + [`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward + functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes` + are ignored. + callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your + model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + """ + + _tag_names = ["trl", "rloo"] + + def __init__( + self, + # Note for dev: we can remove the default None when we remove the deprecated model parameter in version 0.25.0 + model: Union[str, PreTrainedModel] = None, + reward_funcs: Union[RewardFunc, list[RewardFunc]] = None, + args: Optional[RLOOConfig] = None, + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, IterableDataset, dict[str, Union[Dataset, IterableDataset]]]] = None, + processing_class: Optional[Union[PreTrainedTokenizerBase, ProcessorMixin]] = None, + reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), + peft_config: Optional["PeftConfig"] = None, + # Deprecated parameters + config=None, + reward_model=None, + policy=None, + ref_policy=None, + data_collator=None, + ): + + if hasattr(model, 'vllm_engine') and hasattr(args, 'use_vllm'): + if (getattr(args, 'use_vllm', False) == False): + args.use_vllm = True + # Handle deprecated parameters + if config is not None: + warnings.warn( + "Parameter 'config' is deprecated and will be removed in version 0.25.0. Please use 'args' instead. " + "We are setting args=config" + ) + if args is None: + args = config + else: + raise ValueError("Cannot specify both 'config' (deprecated) and 'args'. Please use 'args' only.") + + if reward_model is not None: + warnings.warn( + "Parameter 'reward_model' is deprecated and will be removed in version 0.25.0. Please use " + "'reward_funcs' instead. We are setting reward_funcs=reward_model" + ) + if reward_funcs is None: + reward_funcs = reward_model + else: + raise ValueError( + "Cannot specify both 'reward_model' (deprecated) and 'reward_funcs'. Please use 'reward_funcs' " + "only." + ) + if policy is not None: + warnings.warn( + "Parameter 'policy' is deprecated and will be removed in version 0.25.0. Please use 'model' instead. " + "We are setting model=policy" + ) + if model is None: + model = policy + else: + raise ValueError("Cannot specify both 'policy' (deprecated) and 'model'. Please use 'model' only.") + if ref_policy is not None: + warnings.warn( + "Parameter 'ref_policy' is deprecated and will be removed in version 0.25.0. To use the initial model " + "as the reference model, simply omit this parameter. The parameter is ignored." + ) + if data_collator is not None: + warnings.warn( + "Parameter 'data_collator' is deprecated and will be removed in version 0.25.0. The RLOOTrainer does " + "not use a data collator, so this parameter is ignored." + ) + if "input_ids" in train_dataset.column_names: + warnings.warn( + "The training dataset contains a column named 'input_ids', indicating that it is pre-tokenized. " + "Support for pre-tokenized datasets is deprecated and will be removed in version 0.25. Please provide " + "the raw dataset (conversational or standard) with a 'prompt' column instead." + ) + + def decode(example, tokenizer): + return {"prompt": tokenizer.decode(example["input_ids"])} + + train_dataset = train_dataset.map(decode, fn_kwargs={"tokenizer": processing_class}) + if eval_dataset is not None and "input_ids" in eval_dataset.column_names: + warnings.warn( + "The evaluation dataset contains a column named 'input_ids', indicating that it is pre-tokenized. " + "Support for pre-tokenized datasets is deprecated and will be removed in version 0.25. Please provide " + "the raw dataset (conversational or standard) with a 'prompt' column instead." + ) + + def decode(example, tokenizer): + return {"prompt": tokenizer.decode(example["input_ids"])} + + eval_dataset = eval_dataset.map(decode, fn_kwargs={"tokenizer": processing_class}) + + # Args + if args is None: + model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model_name.split("/")[-1] + args = RLOOConfig(f"{model_name}-RLOO") + + # Models + # Trained model + model_init_kwargs = args.model_init_kwargs or {} + if isinstance(model, str): + model_id = model + dtype = model_init_kwargs.get("dtype") + if isinstance(dtype, torch.dtype) or dtype == "auto" or dtype is None: + pass # dtype is already a torch.dtype or "auto" or None + elif isinstance(dtype, str): # it's a str, but not "auto" + dtype = getattr(torch, dtype) + model_init_kwargs["dtype"] = dtype + else: + raise ValueError( + "Invalid `dtype` passed to `RLOOConfig`. Expected either 'auto' or a string representing " + f"a `torch.dtype` (e.g., 'float32'), but got {dtype}." + ) + # Disable caching if gradient checkpointing is enabled [not supported] + config = AutoConfig.from_pretrained(model_id) + architecture = getattr(transformers, config.architectures[0]) + model = architecture.from_pretrained(model_id, **model_init_kwargs) + else: + model_id = model.config._name_or_path + if args.model_init_kwargs is not None: + logger.warning( + "You passed `model_init_kwargs` to the `RLOOConfig`, but your model is already instantiated. " + "The `model_init_kwargs` will be ignored." + ) + + # Some models [SmolVLM/Idefics3] don't support `logits_to_keep` argument and error out if we pass it + # Inspect the forward method before we wrap the model with PEFT + self.model_kwarg_keys = ( + inspect.signature(model.forward).parameters.keys() + if not hasattr(model, "get_base_model") + else inspect.signature(model.get_base_model().forward).parameters.keys() + ) + + if False: + model = prepare_peft_model(model, peft_config, args) + + # Processing class + if processing_class is None: + processing_class = AutoProcessor.from_pretrained(model.config._name_or_path) + + # Handle pad token for processors or tokenizers + if isinstance(processing_class, ProcessorMixin): + tokenizer = processing_class.tokenizer + elif isinstance(processing_class, PreTrainedTokenizerBase): + tokenizer = processing_class + else: + raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") + + if tokenizer.pad_token is None: + tokenizer.pad_token = tokenizer.eos_token + + self.pad_token = tokenizer.pad_token + self.pad_token_id = tokenizer.pad_token_id + self.eos_token_id = tokenizer.eos_token_id + + # Reward functions + if not isinstance(reward_funcs, list): + reward_funcs = [reward_funcs] + self.reward_func_names = [] + for i, reward_func in enumerate(reward_funcs): + if isinstance(reward_func, str): + reward_funcs[i] = AutoModelForSequenceClassification.from_pretrained( + reward_func, num_labels=1, **model_init_kwargs + ) + if isinstance(reward_funcs[i], nn.Module): # Use Module over PretrainedModel for compat w/ compiled models + self.reward_func_names.append(reward_funcs[i].config._name_or_path.split("/")[-1]) + else: + self.reward_func_names.append(reward_funcs[i].__name__) + self.reward_funcs = reward_funcs + + # Reward weights + if args.reward_weights is not None: + if len(args.reward_weights) != len(reward_funcs): + raise ValueError( + f"Number of reward weights ({len(args.reward_weights)}) must match number of reward " + f"functions ({len(reward_funcs)})" + ) + self.reward_weights = torch.tensor(args.reward_weights, dtype=torch.float32) + else: + self.reward_weights = torch.ones(len(reward_funcs), dtype=torch.float32) + + # Reward processing class + if reward_processing_classes is None: + reward_processing_classes = [None] * len(reward_funcs) + elif not isinstance(reward_processing_classes, list): + reward_processing_classes = [reward_processing_classes] + if len(reward_processing_classes) != len(reward_funcs): + raise ValueError( + f"The number of reward processing classes ({len(reward_processing_classes)}) must match the number of " + f"reward functions ({len(reward_funcs)})." + ) + + for i, (reward_processing_class, reward_func) in enumerate(zip(reward_processing_classes, reward_funcs)): + if isinstance(reward_func, PreTrainedModel): + if reward_processing_class is None: + reward_processing_class = AutoTokenizer.from_pretrained(reward_func.config._name_or_path) + if reward_processing_class.pad_token_id is None: + reward_processing_class.pad_token = reward_processing_class.eos_token + # The reward model computes the reward for the latest non-padded token in the input sequence. + # So it's important to set the pad token ID to the padding token ID of the processing class. + reward_func.config.pad_token_id = reward_processing_class.pad_token_id + reward_processing_classes[i] = reward_processing_class + + self.reward_processing_classes = reward_processing_classes + + # Training arguments + self.max_prompt_length = args.max_prompt_length + self.max_completion_length = args.max_completion_length + self.num_generations = args.num_generations + self.temperature = args.temperature + self.top_p = args.top_p + self.top_k = args.top_k + self.min_p = args.min_p + self.repetition_penalty = args.repetition_penalty + self.use_transformers_paged = args.use_transformers_paged + self.use_vllm = args.use_vllm + self.vllm_mode = args.vllm_mode + self.vllm_gpu_memory_utilization = args.vllm_gpu_memory_utilization # only applies to colocation mode + self.vllm_tensor_parallel_size = args.vllm_tensor_parallel_size # only applies to colocation mode + self.normalize_advantages = args.normalize_advantages + self.mask_truncated_completions = args.mask_truncated_completions + self.reward_clip_range = args.reward_clip_range + + # Datasets + self.shuffle_dataset = args.shuffle_dataset + + if ( + isinstance(train_dataset, IterableDataset) + or isinstance(eval_dataset, IterableDataset) + or ( + isinstance(eval_dataset, dict) and any(isinstance(ds, IterableDataset) for ds in eval_dataset.values()) + ) + ): + # See https://github.com/huggingface/trl/issues/3213 + raise NotImplementedError( + "Iterable datasets are not yet supported in RLOOTrainer. Please use a standard dataset instead." + ) + + # Multi-step + self.num_iterations = args.num_iterations + self.epsilon_low = args.epsilon + self.epsilon_high = args.epsilon_high if args.epsilon_high is not None else args.epsilon + # Tracks the number of iterations [forward + backward passes], including those within a grad accum cycle + self._step = 0 + # Buffer the batch to reuse generated outputs across multiple updates. For more details, see + # `_get_train_sampler` and `_prepare_inputs`. + self._buffered_inputs = None + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in RLOO, the sampled data does not include the + # "input_ids" key. Instead, the available keys is "prompt". As a result, the trainer issues the warning: + # "Could not estimate the number of tokens of the input, floating-point operations will not be computed." To + # suppress this warning, we set the "estimate_tokens" key in the model's "warnings_issued" dictionary to True. + # This acts as a flag to indicate that the warning has already been issued. + model.warnings_issued["estimate_tokens"] = True + + super().__init__( + model=model, + args=args, + data_collator=identity, # No data collation is needed in RLOO + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + callbacks=callbacks, + optimizers=optimizers, + ) + + # Reference model + self.beta = args.beta + if self.beta == 0.0: + # If beta is 0.0, the reference model is not needed + self.ref_model = None + elif is_peft_model(model): + # If PEFT is used, the reference model is not needed since the adapter can be disabled + # to revert to the initial model. + self.ref_model = None + else: + # For deepspeed, fsdp or non-distributed models, create a reference model from scratch + config = AutoConfig.from_pretrained(model_id) + architecture = getattr(transformers, config.architectures[0]) + self.ref_model = architecture.from_pretrained(model_id, **model_init_kwargs) + + # Disable dropout in the models + if args.disable_dropout: + disable_dropout_in_model(model) + if self.ref_model is not None: + disable_dropout_in_model(self.ref_model) + + # Initialize the metrics + self._metrics = {"train": defaultdict(list), "eval": defaultdict(list)} + self._total_train_tokens = 0 + self.log_completions = args.log_completions + self.wandb_log_unique_prompts = args.wandb_log_unique_prompts + self.num_completions_to_print = args.num_completions_to_print + # Keep logs sized to the generation batch to record only outputs from the latest model update. + self._logs = { + "prompt": deque(maxlen=args.generation_batch_size), + "completion": deque(maxlen=args.generation_batch_size), + "rewards": defaultdict(lambda: deque(maxlen=args.generation_batch_size)), + "advantages": deque(maxlen=args.generation_batch_size), + } + + # Ensure each process receives a unique seed to prevent duplicate completions when generating with + # transformers if num_generations exceeds per_device_train_batch_size. We could skip it if we use vLLM, but + # it's safer to set it in all cases. + set_seed(args.seed, device_specific=True) + + if self.use_vllm: + if not is_vllm_available(): + raise ImportError( + "vLLM is not available and `use_vllm` is set to True. Please install vLLM with " + "`pip install vllm` to use it." + ) + + if self.vllm_mode == "server": + if self.accelerator.is_main_process: + if args.vllm_server_base_url is not None: + base_url = args.vllm_server_base_url + else: + base_url = f"http://{args.vllm_server_host}:{args.vllm_server_port}" + self.vllm_client = VLLMClient(base_url=base_url, connection_timeout=args.vllm_server_timeout) + self.vllm_client.init_communicator(device=torch.cuda.current_device()) + + elif self.vllm_mode == "colocate": + if not self.accelerator.num_processes % self.vllm_tensor_parallel_size == 0: + raise ValueError( + f"vllm_tensor_parallel_size ({self.vllm_tensor_parallel_size}) must divide world size " + f"({self.accelerator.num_processes}) evenly." + ) + + if self.vllm_tensor_parallel_size > 1: + self.tp_group, _ = torch.distributed.new_subgroups_by_enumeration( + [ + list(range(i * self.vllm_tensor_parallel_size, (i + 1) * self.vllm_tensor_parallel_size)) + for i in range(self.accelerator.num_processes // self.vllm_tensor_parallel_size) + ] + ) + os.environ["RANK"] = str(self.accelerator.process_index) + os.environ["LOCAL_RANK"] = str(self.accelerator.local_process_index) + os.environ["WORLD_SIZE"] = str(self.accelerator.num_processes) + os.environ["MASTER_ADDR"] = os.environ.get("MASTER_ADDR", "localhost") + os.environ["MASTER_PORT"] = os.environ.get("MASTER_PORT", "12345") + + if self.max_prompt_length is not None and self.max_completion_length is not None: + max_model_len = self.max_prompt_length + self.max_completion_length + else: + max_model_len = None + self.llm = model.vllm_engine + else: + raise ValueError(f"vllm_mode must be either 'server' or 'colocate', got '{self.vllm_mode}'.") + self.guided_decoding_regex = args.vllm_guided_decoding_regex + + self._last_loaded_step = -1 + self.accelerator.wait_for_everyone() + else: + generation_kwargs = { + "max_new_tokens": self.max_completion_length, + "do_sample": True, + "pad_token_id": tokenizer.pad_token_id, + "bos_token_id": tokenizer.bos_token_id, + "eos_token_id": tokenizer.eos_token_id, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": self.top_k, + "min_p": self.min_p, + "repetition_penalty": self.repetition_penalty, + "cache_implementation": args.cache_implementation, + } + if args.use_transformers_paged: + generation_kwargs["max_batch_tokens"] = 512 + generation_kwargs["num_blocks"] = 1024 + generation_kwargs["block_size"] = 128 + if args.generation_kwargs is not None: + generation_kwargs.update(args.generation_kwargs) + self.generation_config = GenerationConfig(**generation_kwargs) + + # Gradient accumulation requires scaled loss. Normally, loss scaling in the parent class depends on whether the + # model accepts loss-related kwargs. Since we compute our own loss, this check is irrelevant. We set + # self.model_accepts_loss_kwargs to False to enable scaling. + self.model_accepts_loss_kwargs = False + + # Add tags to the model + self.model.add_model_tags(self._tag_names) + + if self.ref_model is not None: + if self.is_deepspeed_enabled: + self.ref_model = prepare_deepspeed(self.ref_model, self.accelerator) + elif self.is_fsdp_enabled: + self.ref_model = prepare_fsdp(self.ref_model, self.accelerator) + else: + self.ref_model = self.accelerator.prepare_model(self.ref_model, evaluation_mode=True) + + if args.sync_ref_model: + self.add_callback(SyncRefModelCallback(ref_model=self.ref_model, accelerator=self.accelerator)) + + for i, reward_func in enumerate(self.reward_funcs): + if isinstance(reward_func, PreTrainedModel): + if self.is_deepspeed_enabled: + self.reward_funcs[i] = prepare_deepspeed(reward_func, self.accelerator) + else: + # set device placement to True to make `prepare_model` move `reward_func` to device when using fsdp + self.reward_funcs[i] = self.accelerator.prepare_model( + reward_func, evaluation_mode=True, device_placement=True + ) + + def _set_signature_columns_if_needed(self): + # If `self.args.remove_unused_columns` is True, non-signature columns are removed. + # By default, this method sets `self._signature_columns` to the model's expected inputs. + # In RLOOTrainer, we preprocess data, so using the model's signature columns doesn't work. + # Instead, we set them to the columns expected by the `training_step` method, hence the override. + if self._signature_columns is None: + self._signature_columns = ["prompt"] + + # This method overrides `Trainer.get_train_dataloader` to support our custom batching strategy. + # Instead of returning a standard per-step batch (i.e., `per_device_batch_size), our dataloader loads an + # *generation* batch (i.e., `per_device_batch_size × steps_per_generation`). This allows us to generate completions + # once every steps_per_generation step—rather than once per accumulation step—which is significantly more + # efficient. The only change from the original implementation is multiplying the batch size by + # `steps_per_generation`. Thus, `_prepare_inputs` is called with this *generation* batch, and it handles the + # splitting internally. + # Maintenance note: This method is a copy-paste of the original `Trainer.get_train_dataloader` with only one line + # modification. As a result, some parts of the method aren't relevant to RLOO, but we keep them to stay one line + # apart from the super method, ensuring easier maintenance in the future. + def get_train_dataloader(self): + if self.train_dataset is None: + raise ValueError("Trainer: training requires a train_dataset.") + + train_dataset = self.train_dataset + data_collator = self.data_collator + if is_datasets_available() and isinstance(train_dataset, datasets.Dataset): + train_dataset = self._remove_unused_columns(train_dataset, description="training") + else: + data_collator = self._get_collator_with_removed_columns(data_collator, description="training") + + dataloader_params = { + "batch_size": self._train_batch_size * self.args.steps_per_generation, # < this is the change + "collate_fn": data_collator, + "num_workers": self.args.dataloader_num_workers, + "pin_memory": self.args.dataloader_pin_memory, + "persistent_workers": self.args.dataloader_persistent_workers, + } + + if not isinstance(train_dataset, torch.utils.data.IterableDataset): + dataloader_params["sampler"] = self._get_train_sampler() + dataloader_params["drop_last"] = self.args.dataloader_drop_last + dataloader_params["worker_init_fn"] = partial( + seed_worker, num_workers=self.args.dataloader_num_workers, rank=self.args.process_index + ) + + dataloader_params["prefetch_factor"] = self.args.dataloader_prefetch_factor + + return self.accelerator.prepare(DataLoader(train_dataset, **dataloader_params)) + + def _get_train_sampler(self, dataset: Optional[Dataset] = None) -> Sampler: + # Returns a sampler that + # 1. ensures each prompt is repeated across multiple processes. This guarantees that identical prompts are + # distributed to different GPUs, allowing rewards to be computed and normalized correctly within each prompt + # group. Using the same seed across processes ensures consistent prompt assignment, preventing discrepancies + # in group formation. + # 2. repeats the batch multiple times to allow reusing generations across multiple updates. Refer to + # _prepare_inputs to see how the generations are stored and reused. + + # In the following figure, the values are the prompt indices. The first row shows the first sampled batch, the + # second row shows the second sampled batch, and so on. + # + # | GPU 0 | GPU 1 | + # + # global_step step <-───> num_generations=2 + # <-───────> per_device_train_batch_size=3 + # grad_accum ▲ ▲ 0 0 0 0 1 1 2 2 <- Generate for the first `steps_per_generation` (prompts 0 to 11); store the completions; use the first slice to compute the loss + # =2 ▼ | 0 1 3 3 4 4 5 5 <- Take the stored generations and use the second slice to compute the loss + # | + # | 1 2 6 6 7 7 8 8 <- Take the stored generations and use the third slice to compute the loss + # steps_per_gen=4 ▼ 1 3 9 9 10 10 11 11 <- Take the stored generations and use the fourth slice to compute the loss + # + # 2 4 12 12 13 13 14 14 <- Generate for the second `steps_per_generation` (prompts 12 to 23); store the completions; use the first slice to compute the loss + # 2 5 15 15 16 16 17 17 <- Take the stored generations and use the second slice to compute the loss + # ... + if dataset is None: + dataset = self.train_dataset + return RepeatSampler( + data_source=dataset, + mini_repeat_count=self.num_generations, + batch_size=self.args.generation_batch_size // self.num_generations, + repeat_count=self.num_iterations * self.args.steps_per_generation, + shuffle=self.shuffle_dataset, + seed=self.args.seed, + ) + + def _get_eval_sampler(self, eval_dataset) -> Sampler: + # See _get_train_sampler for an explanation of the sampler. + return RepeatSampler( + data_source=eval_dataset, + mini_repeat_count=self.num_generations, + seed=self.args.seed, + ) + + @profiling_decorator + def _get_per_token_logps_and_entropies( + self, + model, + input_ids, + attention_mask, + logits_to_keep, + batch_size=None, + compute_entropy=False, + ) -> dict[str, Optional[torch.Tensor]]: + """Compute log-probs and (optionally) entropies for each token.""" + batch_size = batch_size or input_ids.size(0) # Chunk inputs into smaller batches to reduce memory peak + all_logps = [] + all_entropies = [] + for start in range(0, input_ids.size(0), batch_size): + input_ids_batch = input_ids[start : start + batch_size] + attention_mask_batch = attention_mask[start : start + batch_size] + + # Build model inputs - check if the model supports logits_to_keep (some models and VLMs don't) + model_inputs = {"input_ids": input_ids_batch, "attention_mask": attention_mask_batch} + + # Only add logits_to_keep if the model supports it + if "logits_to_keep" in self.model_kwarg_keys: + # We add 1 to `logits_to_keep` because the last logits of the sequence is later excluded + model_inputs["logits_to_keep"] = logits_to_keep + 1 + + model_inputs["use_cache"] = False # only used in generation; set False to suppress warnings + + logits = model(**model_inputs).logits + # Exclude the last value: it corresponds to the next token pred + logits = logits[:, :-1, :] # (B, L-1, H) + # Only keep the last logits_to_keep. For model that support logits_to_keep, this is a no-op. + logits = logits[:, -logits_to_keep:, :] # (B, logits_to_keep, H) + # Divide logits by sampling temperature. + # See https://huggingface.co/blog/the_n_implementation_details_of_rlhf_with_ppo#policy-training-implementation-details + logits = logits / self.temperature + + completion_ids = input_ids_batch[:, -logits_to_keep:] + logps = selective_log_softmax(logits, completion_ids) # compute logprobs + all_logps.append(logps) + + if compute_entropy: + with torch.no_grad(): + entropies = entropy_from_logits(logits) + all_entropies.append(entropies) + + logps = torch.cat(all_logps, dim=0) + entropies = torch.cat(all_entropies, dim=0) if compute_entropy else None + return logps, entropies + + def _fix_param_name_to_vllm(self, name, extra_prefixes: Optional[list[str]] = None): + extra_prefixes = extra_prefixes or [] + prefixes = ["_checkpoint_wrapped_module."] + extra_prefixes + for prefix in prefixes: + name = name.replace(prefix, "") + return name + + def _sync_fsdp1_params_to_vllm(self, module: nn.Module, prefix: str = "", visited=None): + """Memory-efficient post-order traversal of FSDP modules to extract full parameters and sync with vLLM.""" + # For FSDP1, we need to recurse into children and also use summon_full_params + if visited is None: + visited = set() + for child_name, child_module in module.named_children(): + child_prefix = f"{prefix}.{child_name}" if prefix else child_name + self._sync_fsdp1_params_to_vllm( + child_module, prefix=child_prefix, visited=visited + ) # recurse into the child + + if isinstance(module, FSDP): + with FSDP.summon_full_params(module, recurse=False, writeback=False): + for param_name, param in module.named_parameters(): + full_name = f"{prefix}.{param_name}" if prefix else param_name + full_name = self._fix_param_name_to_vllm(full_name, extra_prefixes=["_fsdp_wrapped_module."]) + + if full_name in visited: + continue # skip FSDP subtrees already traversed + visited.add(full_name) + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(full_name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + + def _sync_fsdp2_params_to_vllm(self, module: nn.Module): + # For FSDP2, module already covers all parameters, so no need for recursion + for name, param in module.items(): + if param.is_cpu: + param = param.to(torch.device("cuda")) + param = param.full_tensor() + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param) + elif self.vllm_mode == "colocate": + + pass + + pass + + @profiling_decorator + def _move_model_to_vllm(self): + # For DeepSpeed ZeRO-3 and FSDP, we need to gather all parameters before operations + deepspeed_plugin = self.accelerator.state.deepspeed_plugin + zero_stage_3 = deepspeed_plugin is not None and deepspeed_plugin.zero_stage == 3 + if zero_stage_3: + import deepspeed + + gather_if_zero3 = deepspeed.zero.GatheredParameters + else: + gather_if_zero3 = nullcontext + + if is_peft_model(self.model): + # With PEFT and FSDP/DeepSpeed ZeRO Stage 3, we must gather the full model at once before merging, as + # merging adapters in a sharded manner is not supported. + # TODO: does this work with FSDP? + with gather_if_zero3(list(self.model.parameters())): + self.model.merge_adapter() + + # Update vLLM weights while parameters are gathered + if self.is_fsdp_enabled: # note if using FSDP, gather_if_zero3 is nullcontext + # Update vLLM weights while parameters are gathered + # For PEFT with FSDP we need to use the memory efficient post-order traversal + fsdp_plugin = getattr(self.accelerator.state, "fsdp_plugin", None) + fsdp_version = getattr(fsdp_plugin, "fsdp_version", 1) if fsdp_plugin else 1 + if fsdp_version == 1: + self._sync_fsdp1_params_to_vllm( + self.model + ) # use memory-efficient post-order traversal for FSDP + elif fsdp_version == 2: + self._sync_fsdp2_params_to_vllm(self.model) + else: + # DeepSpeed ZeRO-3 with PEFT + for name, param in self.model.named_parameters(): + # When using PEFT, we need to recover the original parameter name and discard some parameters + name = name.removeprefix("base_model.model.").replace(".base_layer", "") + if self.model.prefix in name: + continue + # When module to save, remove its prefix and discard the original module + if "original_module" in name: + continue + name = self._fix_param_name_to_vllm(name, extra_prefixes=["modules_to_save.default."]) + + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + # Unmerge adapters while parameters are still gathered + self.model.unmerge_adapter() + # Parameters will automatically be repartitioned when exiting the context + else: + # For non-PEFT models, simply gather (if needed) and update each parameter individually. + if self.is_fsdp_enabled: + fsdp_plugin = getattr(self.accelerator.state, "fsdp_plugin", None) + fsdp_version = getattr(fsdp_plugin, "fsdp_version", 1) if fsdp_plugin else 1 + if fsdp_version == 1: + self._sync_fsdp1_params_to_vllm(self.model) # use memory-efficient post-order traversal for FSDP + elif fsdp_version == 2: + self._sync_fsdp2_params_to_vllm(self.model) + else: + for name, param in self.model.named_parameters(): + name = self._fix_param_name_to_vllm(name) + with gather_if_zero3([param]): + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.update_named_param(name, param.data) + elif self.vllm_mode == "colocate": + + pass + + pass + + # Reset cache on vLLM + if self.vllm_mode == "server" and self.accelerator.is_main_process: + self.vllm_client.reset_prefix_cache() + elif self.vllm_mode == "colocate": + self.llm.reset_prefix_cache() + + @profiling_decorator + def _prepare_inputs( + self, generation_batch: dict[str, Union[torch.Tensor, Any]] + ) -> dict[str, Union[torch.Tensor, Any]]: + # Prepares inputs for model training/evaluation by managing completion generation and batch handling. + # During training: + # - Receives the local generation batch (Per-GPU batch size × steps per generation) + # from the modified training dataloader instead of the standard local batch + # - Generates completions once for the entire generation batch and splits it into batches of size + # `per_device_train_batch_size` + # - Buffers these completions and returns the appropriate slice for the current accumulation step + # - Optimizes by regenerating completions only periodically (every steps_per_generation * num_iterations) + # During evaluation: + # - The input is treated as a standard local batch (no accumulation, no multiple iterations) + # - Completions are generated for each batch without buffering or reuse + # Returns a single local batch in both cases. + + mode = "train" if self.model.training else "eval" + if mode == "train": + generate_every = self.args.steps_per_generation * self.num_iterations + if self._step % generate_every == 0 or self._buffered_inputs is None: + # self._buffered_inputs=None can occur when resuming from a checkpoint + generation_batch = self._generate_and_score_completions(generation_batch) + generation_batch = split_pixel_values_by_grid(generation_batch) + + try: generation_batch = shuffle_sequence_dict(generation_batch) + + except: pass + generation_batches = split_tensor_dict(generation_batch, self.args.steps_per_generation) + self._buffered_inputs = [unsplit_pixel_values_by_grid(batch) for batch in generation_batches] + inputs = self._buffered_inputs[self._step % self.args.steps_per_generation] + self._step += 1 + else: + # In evaluation, there is neither batch grouping for generation, nor multiple iterations, hence + # local generation batch == local eval batch + inputs = self._generate_and_score_completions(generation_batch) + return inputs + + @profiling_decorator + def _calculate_rewards(self, inputs, prompts, completions, completion_ids_list): + device = self.accelerator.device + rewards_per_func = torch.zeros(len(prompts), len(self.reward_funcs), device=device) + + # Repeat all input columns (but "prompt", "completion", and "completion_ids") to match the num of generations + keys = [key for key in inputs[0] if key not in ["prompt", "completion", "completion_ids"]] + reward_kwargs = {key: [example[key] for example in inputs] for key in keys} + + # This allows for dynamic reward shaping based on training progress. + reward_kwargs["trainer_state"] = self.state + + for i, (reward_func, reward_processing_class, reward_func_name) in enumerate( + zip(self.reward_funcs, self.reward_processing_classes, self.reward_func_names) + ): + with profiling_context(self, reward_func_name): + if isinstance(reward_func, nn.Module): # Module (no PretrainedModel) for compat with compiled models + if is_conversational(inputs[0]): + messages = [{"messages": p + c} for p, c in zip(prompts, completions)] + texts = [apply_chat_template(x, reward_processing_class)["text"] for x in messages] + else: + texts = [p + c for p, c in zip(prompts, completions)] + reward_inputs = reward_processing_class( + text=texts, return_tensors="pt", padding=True, padding_side="right", add_special_tokens=False + ) + reward_inputs = super()._prepare_inputs(reward_inputs) + with torch.inference_mode(): + rewards_per_func[:, i] = reward_func(**reward_inputs).logits[:, 0] # Shape (B*G,) + else: + output_reward_func = reward_func( + prompts=prompts, completions=completions, completion_ids=completion_ids_list, **reward_kwargs + ) + # Convert None values to NaN + output_reward_func = [reward if reward is not None else torch.nan for reward in output_reward_func] + + rewards_per_func[:, i] = torch.tensor(output_reward_func, dtype=torch.float32, device=device) + + # If all reward functions return None for a given row, issue a detailed warning + if torch.isnan(rewards_per_func).all(dim=1).any(): + nan_row_idx = torch.isnan(rewards_per_func).all(dim=1).nonzero(as_tuple=True)[0][0] + row_reward_kwargs = { + key: value[nan_row_idx] for key, value in reward_kwargs.items() if key != "trainer_state" + } + row_reward_kwargs["prompt"] = prompts[nan_row_idx] + row_reward_kwargs["completion"] = completions[nan_row_idx] + logger.warning( + f"All reward functions returned None for the following kwargs:\n{row_reward_kwargs}\n" + "Please ensure that at least one reward function returns a valid reward." + ) + + # Gather the reward per function: this part is crucial, because the rewards are normalized per group and the + # completions may be distributed across processes + rewards_per_func = gather(rewards_per_func) + return rewards_per_func + + def _generate_and_score_completions( + self, inputs: list[dict[str, Union[torch.Tensor, Any]]] + ) -> dict[str, Union[torch.Tensor, Any]]: + device = self.accelerator.device + mode = "train" if self.model.training else "eval" + + prompts = [x["prompt"] for x in inputs] + + # We don't yet support visual reward models/function, so we keep a copy of the original text-only prompts for + # later use in the reward computation. If images are present, we insert {"type": "image"} as required by the + # VLM chat template. + original_prompts = copy.deepcopy(prompts) + + prompts_text = [maybe_apply_chat_template(example, self.processing_class)["prompt"] for example in inputs] + + prompt_inputs = self.processing_class( + text=prompts_text, + return_tensors="pt", + padding=True, + padding_side="left", + add_special_tokens=False, + ) + prompt_inputs = super()._prepare_inputs(prompt_inputs) + prompt_ids, prompt_mask = prompt_inputs["input_ids"], prompt_inputs["attention_mask"] + + if self.max_prompt_length is not None: + # If max_prompt_length is set, we trim the prompt to keep only the last `max_prompt_length` tokens. + # Then we decode those tokens back into text. We manually remove leading pad tokens from the decoded text, + # because we can't use `skip_special_tokens=True` (some special tokens are still needed for generation). + prompt_ids, prompt_mask = truncate_with_protected_tokens( + prompt_ids, prompt_mask, self.max_prompt_length, protected_tokens=[] + ) + + prompts_text = self.processing_class.batch_decode( + prompt_ids, skip_special_tokens=False, clean_up_tokenization_spaces=False + ) + prompts_text = [re.sub(rf"^({re.escape(self.pad_token)})+", "", text) for text in prompts_text] + + # Generate completions using either vLLM or regular generation + if self.use_vllm: + # First, update the vLLM weights if needed + if self.state.global_step != self._last_loaded_step: + self._move_model_to_vllm() + self._last_loaded_step = self.state.global_step + + # Generate completions using vLLM: gather all prompts and use them in a single call in the main process + if self.vllm_mode == "server": + all_prompts_text = gather_object(prompts_text) + + if self.accelerator.is_main_process: + # Since 'prompts' contains 'num_generations' duplicates, we first take unique prompts, and generate + # num_generations outputs for each one. This is faster than generating outputs for each duplicate + # prompt individually. + ordered_set_of_prompts = all_prompts_text[:: self.num_generations] + + with profiling_context(self, "vLLM.generate"): + completion_ids = self.vllm_client.generate( + prompts=ordered_set_of_prompts, + n=self.num_generations, + repetition_penalty=self.repetition_penalty, + temperature=self.temperature, + top_p=self.top_p, + top_k=-1 if self.top_k is None else self.top_k, + min_p=0.0 if self.min_p is None else self.min_p, + max_tokens=self.max_completion_length, + guided_decoding_regex=self.guided_decoding_regex, + generation_kwargs=self.args.generation_kwargs, + ) + else: + completion_ids = [None] * len(all_prompts_text) + # Broadcast the completions from the main process to all processes, ensuring each process receives its + # corresponding slice. + completion_ids = broadcast_object_list(completion_ids, from_process=0) + process_slice = slice( + self.accelerator.process_index * len(prompts), + (self.accelerator.process_index + 1) * len(prompts), + ) + completion_ids = completion_ids[process_slice] + + # Generate completions using colocated vLLM instances: each device holds vLLM copy and work on their own batch of prompts + elif self.vllm_mode == "colocate": + if self.guided_decoding_regex: + guided_decoding = GuidedDecodingParams(regex=self.guided_decoding_regex) + else: + guided_decoding = None + + generation_kwargs = { + "n": 1, # vLLM on each GPU generates only 1 in colocate mode + "repetition_penalty": self.repetition_penalty, + "temperature": self.temperature, + "top_p": self.top_p, + "top_k": -1 if self.top_k is None else self.top_k, + "min_p": 0.0 if self.min_p is None else self.min_p, + "max_tokens": self.max_completion_length, + "guided_decoding": guided_decoding, + } + if self.args.generation_kwargs is not None: + generation_kwargs.update(self.args.generation_kwargs) + sampling_params = SamplingParams(**generation_kwargs) + + if self.vllm_tensor_parallel_size > 1: + # Gather prompts from all ranks in the TP group and flatten. + # Each rank starts with its own prompts; after gathering, all ranks see the full group set. + orig_size = len(prompts_text) + gathered_prompts = [None for _ in range(self.vllm_tensor_parallel_size)] + torch.distributed.all_gather_object(gathered_prompts, prompts_text, group=self.tp_group) + all_prompts_text = [p for sublist in gathered_prompts for p in sublist] + + else: + all_prompts_text = prompts_text + + vllm_inputs = all_prompts_text + + with profiling_context(self, "vLLM.generate"): + all_outputs = self.llm.generate(vllm_inputs, sampling_params=sampling_params, use_tqdm=False, lora_request = self.model.load_lora('rloo_trainer_lora_model', load_tensors = True)) + + completion_ids = [output.token_ids for outputs in all_outputs for output in outputs.outputs] + + if self.vllm_tensor_parallel_size > 1: + # Slice completions for this rank within its TP group. + # Each rank generates all outputs — we keep only our share. + local_rank_in_group = torch.distributed.get_rank(group=self.tp_group) + tp_slice = slice(local_rank_in_group * orig_size, (local_rank_in_group + 1) * orig_size) + completion_ids = completion_ids[tp_slice] + + # Pad the completions, and concatenate them with the prompts + completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids] + completion_ids = pad(completion_ids, padding_value=self.pad_token_id) + prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1) + + elif self.use_transformers_paged: + # Re-process inputs for paged generation if needed + paged_prompt_inputs = self.processing_class(text=prompts_text) + previous_attn = self.model_wrapped.config._attn_implementation + + if is_flash_attn_2_available(): + self.model_wrapped.config._attn_implementation = "paged_attention" + else: + self.model_wrapped.config._attn_implementation = "sdpa_paged" + with ( + profiling_context(self, "transformers.generate_batch"), + unwrap_model_for_generation( + self.model_wrapped, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model, + torch.no_grad(), + FSDP.summon_full_params(self.model_wrapped, recurse=False) if self.is_fsdp_enabled else nullcontext(), + ): + # Cast to the appropriate dtype based on training configuration + if self.args.bf16: + unwrapped_model.to(torch.bfloat16) + elif self.args.fp16: + unwrapped_model.to(torch.float16) + with torch.inference_mode(): + all_outputs = unwrapped_model.generate_batch( + paged_prompt_inputs.input_ids, generation_config=self.generation_config, progress_bar=False + ) + completion_ids = [output.generated_tokens for output in all_outputs.values()] + completion_ids = [torch.tensor(ids, device=device) for ids in completion_ids] + completion_ids = pad(completion_ids, padding_value=self.pad_token_id, padding_side="right") + prompt_ids = [torch.tensor(ids, device=device) for ids in paged_prompt_inputs.input_ids] + prompt_ids = pad(prompt_ids, padding_value=self.pad_token_id, padding_side="left") + prompt_completion_ids = torch.cat([prompt_ids, completion_ids], dim=1) + # Restore the original attention implementation, training mode + self.model_wrapped.config._attn_implementation = previous_attn + else: + # Regular generation path + with ( + profiling_context(self, "transformers.generate"), + unwrap_model_for_generation( + self.model_wrapped, self.accelerator, gather_deepspeed3_params=self.args.ds3_gather_for_generation + ) as unwrapped_model, + torch.no_grad(), + FSDP.summon_full_params(self.model_wrapped, recurse=False) if self.is_fsdp_enabled else nullcontext(), + ): + prompt_inputs["input_ids"], prompt_inputs["attention_mask"] = prompt_ids, prompt_mask + prompt_completion_ids = unwrapped_model.generate( + **prompt_inputs, generation_config=self.generation_config, disable_compile=True + ) + # Compute prompt length and extract completion ids + prompt_length = prompt_ids.size(1) + prompt_ids = prompt_completion_ids[:, :prompt_length] + completion_ids = prompt_completion_ids[:, prompt_length:] + + # Mask everything after the first EOS token + is_eos = completion_ids == self.eos_token_id + eos_idx = torch.full((is_eos.size(0),), is_eos.size(1), dtype=torch.long, device=device) + eos_idx[is_eos.any(dim=1)] = is_eos.int().argmax(dim=1)[is_eos.any(dim=1)] + sequence_indices = torch.arange(is_eos.size(1), device=device).expand(is_eos.size(0), -1) + completion_mask = (sequence_indices <= eos_idx.unsqueeze(1)).int() + + # Convert tensor to a list of lists of token IDs. This will be passed to the reward function, avoiding the need + # to re-tokenize completions if the reward is computed from tokens. + completion_ids_list = [row[mask_row].tolist() for row, mask_row in zip(completion_ids, completion_mask.bool())] + + # Sum along sequence dimension (dim=1) to get completion length per sequence, used for logging + completion_lengths = completion_mask.sum(1) + + # If mask_truncated_completions is enabled, zero out truncated completions in completion_mask + if self.mask_truncated_completions: + truncated_completions = ~is_eos.any(dim=1) + completion_mask = completion_mask * (~truncated_completions).unsqueeze(1).int() + + # Concatenate prompt_mask with completion_mask for logit computation + attention_mask = torch.cat([prompt_mask, completion_mask], dim=1) # (B, P+C) + + logits_to_keep = completion_ids.size(1) # we only need to compute the logits for the completion tokens + batch_size = self.args.per_device_train_batch_size if mode == "train" else self.args.per_device_eval_batch_size + + with torch.no_grad(): + # Compute the per-token log probabilities for the current model + old_per_token_logps, _ = self._get_per_token_logps_and_entropies( + self.model, + prompt_completion_ids, + attention_mask, + logits_to_keep, + batch_size, + ) + old_logps = (old_per_token_logps * completion_mask).sum(1) # mask out padding and tokens after EOS + + # Compute the per-token log probabilities for the reference model + if self.beta != 0.0: + if self.ref_model is not None: + ref_per_token_logps, _ = self._get_per_token_logps_and_entropies( + self.ref_model, + prompt_completion_ids, + attention_mask, + logits_to_keep, + batch_size=batch_size, + ) + else: + with self.accelerator.unwrap_model(self.model).disable_adapter(): + ref_per_token_logps, _ = self._get_per_token_logps_and_entropies( + self.model, + prompt_completion_ids, + attention_mask, + logits_to_keep, + batch_size=batch_size, + ) + else: + ref_per_token_logps = None + + # Decode the generated completions + completions_text = self.processing_class.batch_decode(completion_ids, skip_special_tokens=True) + if is_conversational(inputs[0]): + completions = [] + for prompt, completion in zip(prompts, completions_text): + bootstrap = prompt.pop()["content"] if prompt[-1]["role"] == "assistant" else "" + completions.append([{"role": "assistant", "content": bootstrap + completion}]) + else: + completions = completions_text + + # Calculate rewards for each reward function. rewards_per_func aggregates rewards across all processes. This is + # important because rewards will be normalized per group, and completions are distributed. We will later slice + # rewards_per_func to extract each process's subset. + rewards_per_func = self._calculate_rewards(inputs, original_prompts, completions, completion_ids_list) + + # Apply weights to each reward function's output and sum + rewards = (rewards_per_func * self.reward_weights.to(device).unsqueeze(0)).nansum(dim=1) + + # Apply reward clipping if specified + if self.reward_clip_range: + rewards = rewards.clamp(min=self.reward_clip_range[0], max=self.reward_clip_range[1]) + + # Include the KL penalty in the reward + if self.beta != 0.0: + per_token_kl = old_per_token_logps - ref_per_token_logps + # Apply sequence-level KL penalty to rewards (sum KL across tokens first, then apply to each sequence) + kl = (per_token_kl * completion_mask).sum(-1) + kl = gather(kl) # rewards are gathered, so kl must be too + rewards = rewards - self.beta * kl + + grouped_rewards = rewards.view(-1, self.num_generations) + mean_grouped_rewards = grouped_rewards.mean(dim=1) + std_rewards = grouped_rewards.std(dim=1) + is_std_zero = torch.isclose(std_rewards, torch.zeros_like(std_rewards)) + + # RLOO advantages computation + grouped_sum = grouped_rewards.sum(dim=1, keepdim=True) # (num_prompts, 1) + baselines = (grouped_sum - grouped_rewards) / (self.num_generations - 1) # (num_prompts, num_generations) + baselines = baselines.view(-1) # Flatten back to match rewards shape + advantages = rewards - baselines + + # Normalize advantages + if self.normalize_advantages: + advantages = (advantages - advantages.mean()) / (advantages.std() + 1e-4) + + # Slice to keep only the local part of the data + process_slice = slice( + self.accelerator.process_index * len(prompts), + (self.accelerator.process_index + 1) * len(prompts), + ) + all_process_advantages = advantages.clone() # keep the aggregated advantages for logging + advantages = advantages[process_slice] + + # Log the metrics + if mode == "train": + self.state.num_input_tokens_seen += self.accelerator.gather(attention_mask.sum()).sum().item() + self._metrics[mode]["num_tokens"] = [self.state.num_input_tokens_seen] + + # Calculate and log the mean KL divergence between current and reference model + if self.beta != 0.0: + mean_kl = (per_token_kl * completion_mask).sum() / completion_mask.sum().clamp(min=1.0) + self._metrics[mode]["kl"].append(self.accelerator.gather(mean_kl).nanmean().item()) + + # Log completion lengths, mean, min, max + agg_completion_lengths = self.accelerator.gather(completion_lengths) + self._metrics[mode]["completions/mean_length"].append(agg_completion_lengths.float().mean().item()) + self._metrics[mode]["completions/min_length"].append(agg_completion_lengths.float().min().item()) + self._metrics[mode]["completions/max_length"].append(agg_completion_lengths.float().max().item()) + + # Identify sequences that terminated with EOS and log their lengths + agg_terminated_with_eos = self.accelerator.gather(is_eos.any(dim=1)) + term_completion_lengths = agg_completion_lengths[agg_terminated_with_eos] + clipped_completions_ratio = 1 - len(term_completion_lengths) / len(agg_completion_lengths) + self._metrics[mode]["completions/clipped_ratio"].append(clipped_completions_ratio) + if len(term_completion_lengths) == 0: # edge case where no terminated sequences are found + term_completion_lengths = torch.zeros(1, device=device) + self._metrics[mode]["completions/mean_terminated_length"].append(term_completion_lengths.float().mean().item()) + self._metrics[mode]["completions/min_terminated_length"].append(term_completion_lengths.float().min().item()) + self._metrics[mode]["completions/max_terminated_length"].append(term_completion_lengths.float().max().item()) + + # Calculate mean reward per function, but only for samples where the function was applied (non-NaN values) + for i, reward_func_name in enumerate(self.reward_func_names): + mean_rewards = torch.nanmean(rewards_per_func[:, i]).item() + self._metrics[mode][f"rewards/{reward_func_name}/mean"].append(mean_rewards) + std_func_rewards = nanstd(rewards_per_func[:, i]).item() + self._metrics[mode][f"rewards/{reward_func_name}/std"].append(std_func_rewards) + self._metrics[mode]["reward"].append(mean_grouped_rewards.mean().item()) + self._metrics[mode]["reward_std"].append(std_rewards.mean().item()) + self._metrics[mode]["frac_reward_zero_std"].append(is_std_zero.float().mean().item()) + + # Log prompt and completion texts + self._logs["prompt"].extend(gather_object(prompts_text)) + self._logs["completion"].extend(gather_object(completions_text)) + for i, name in enumerate(self.reward_func_names): + self._logs["rewards"][name].extend(rewards_per_func[:, i].tolist()) + self._logs["advantages"].extend(all_process_advantages.tolist()) + + output = { + "prompt_ids": prompt_ids, + "prompt_mask": prompt_mask, + "completion_ids": completion_ids, + "completion_mask": completion_mask, + "old_logps": old_logps, + "advantages": advantages, + } + return output + + @profiling_decorator + def compute_loss(self, model, inputs, return_outputs=False, num_items_in_batch=None): + if return_outputs: + raise ValueError("The RLOOTrainer does not support returning outputs") + return self._compute_loss(model, inputs) + + def _compute_loss(self, model, inputs): + # Compute the per-token log probabilities for the model + prompt_ids, prompt_mask = inputs["prompt_ids"], inputs["prompt_mask"] + completion_ids, completion_mask = inputs["completion_ids"], inputs["completion_mask"] + input_ids = torch.cat([prompt_ids, completion_ids], dim=1) + attention_mask = torch.cat([prompt_mask, completion_mask], dim=1) + logits_to_keep = completion_ids.size(1) # we only need to compute the logits for the completion tokens + + # Compute the per_token_logps and the entropy at each position in the completion + per_token_logps, entropies = self._get_per_token_logps_and_entropies( + model, + input_ids, + attention_mask, + logits_to_keep, + compute_entropy=True, + ) + logps = (per_token_logps * completion_mask).sum(1) # mask out padding and tokens after EOS + old_logps = inputs["old_logps"] + log_ratio = logps - old_logps + + # Compute the loss + advantages = inputs["advantages"] + coef_1 = torch.exp(log_ratio) + coef_2 = torch.clamp(coef_1, 1 - self.epsilon_low, 1 + self.epsilon_high) + per_sequence_loss1 = coef_1 * advantages + per_sequence_loss2 = coef_2 * advantages + per_sequence_loss = -torch.min(per_sequence_loss1, per_sequence_loss2) + loss = per_sequence_loss.mean() + + # Log the metrics + mode = "train" if self.model.training else "eval" + + # Entropy + mean_entropy = (entropies * completion_mask).sum() / completion_mask.sum().clamp(min=1.0) + self._metrics[mode]["entropy"].append(self.accelerator.gather(mean_entropy).nanmean().item()) + + # Compute the clipped probability ratios + is_low_clipped = (coef_1 < 1 - self.epsilon_low) & (advantages < 0) + is_high_clipped = (coef_1 > 1 + self.epsilon_high) & (advantages > 0) + is_region_clipped = is_low_clipped | is_high_clipped + gathered_low_clip = self.accelerator.gather(is_low_clipped.float().mean()) + self._metrics[mode]["clip_ratio/low_mean"].append(gathered_low_clip.nanmean().item()) + self._metrics[mode]["clip_ratio/low_min"].append(nanmin(gathered_low_clip).item()) + gathered_high_clip = self.accelerator.gather(is_high_clipped.float().mean()) + self._metrics[mode]["clip_ratio/high_mean"].append(gathered_high_clip.nanmean().item()) + self._metrics[mode]["clip_ratio/high_max"].append(nanmax(gathered_high_clip).item()) + gathered_clip_ratio = self.accelerator.gather(is_region_clipped.float().mean()) + self._metrics[mode]["clip_ratio/region_mean"].append(gathered_clip_ratio.nanmean().item()) + return loss + + def prediction_step(self, model, inputs, prediction_loss_only, ignore_keys: Optional[list[str]] = None): + inputs = self._prepare_inputs(inputs) + with torch.no_grad(): + with self.compute_loss_context_manager(): + loss = self.compute_loss(model, inputs) + loss = loss.mean().detach() + return loss, None, None + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + mode = "train" if self.model.training else "eval" + metrics = {key: sum(val) / len(val) for key, val in self._metrics[mode].items()} # average the metrics + + # This method can be called both in training and evaluation. When called in evaluation, the keys in `logs` + # start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format. + if mode == "eval": + metrics = {f"eval_{key}": val for key, val in metrics.items()} + + logs = {**logs, **metrics} + super().log(logs, start_time) + self._metrics[mode].clear() + + if self.accelerator.is_main_process and self.log_completions: + if is_rich_available(): + print_prompt_completions_sample( + self._logs["prompt"], + self._logs["completion"], + self._logs["rewards"], + self._logs["advantages"], + self.state.global_step, + self.num_completions_to_print, + ) + + if self.args.report_to and "wandb" in self.args.report_to and wandb.run is not None: + import pandas as pd + + table = { + "step": [str(self.state.global_step)] * len(self._logs["prompt"]), + "prompt": self._logs["prompt"], + "completion": self._logs["completion"], + **self._logs["rewards"], + "advantage": self._logs["advantages"], + } + + df = pd.DataFrame(table) + if self.wandb_log_unique_prompts: + df = df.drop_duplicates(subset=["prompt"]) + wandb.log({"completions": wandb.Table(dataframe=df)}) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent( + """\ + @inproceedings{ahmadian2024back, + title = {{Back to Basics: Revisiting REINFORCE-Style Optimization for Learning from Human Feedback in LLMs}}, + author = {Arash Ahmadian and Chris Cremer and Matthias Gall{\'{e}} and Marzieh Fadaee and Julia Kreutzer and Olivier Pietquin and Ahmet {\"{U}}st{\"{u}}n and Sara Hooker}, + year = 2024, + booktitle = {Proceedings of the 62nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers), {ACL} 2024, Bangkok, Thailand, August 11-16, 2024}, + pages = {12248--12267}, + publisher = {Association for Computational Linguistics}, + editor = {Lun{-}Wei Ku and Andre Martins and Vivek Srikumar}, + } + """ + ) + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="RLOO", + trainer_citation=citation, + paper_title="Back to Basics: Revisiting REINFORCE-Style Optimization for Learning from Human Feedback in LLMs", + paper_id="2402.14740", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothRLOOTrainer(_UnslothRLOOTrainer): + """ + +Trainer for the Reinforce Leave One Out (RLOO) method. This algorithm was initially proposed in the paper [Back to +Basics: Revisiting REINFORCE Style Optimization for Learning from Human Feedback in LLMs] +(https://huggingface.co/papers/2402.14740). + +Example: + +```python +from datasets import load_dataset +from trl import RLOOTrainer + +dataset = load_dataset("trl-lib/tldr", split="train") +def reward_func(completions, **kwargs): + # Dummy reward function that rewards completions with more unique letters. + return [float(len(set(completion))) for completion in completions] +trainer = RLOOTrainer( + model="Qwen/Qwen2-0.5B-Instruct", + reward_funcs=reward_func, + train_dataset=dataset, +) + +trainer.train() +``` + +Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in + `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. Only causal language models are supported. + reward_funcs (`Union[RewardFunc, list[RewardFunc]]`): + Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward + functions with the prompts and completions and sum the rewards. Can be either: + + - A single reward function, such as: + - A string: The *model ID* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using [`~transformers.AutoModelForSequenceClassification.from_pretrained`] with `num_labels=1` and the + keyword arguments in `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object: Only sequence classification models are supported. + - A custom reward function: The function is provided with the prompts and the generated completions, + plus any additional columns in the dataset. It should return a list of rewards. Custom reward + functions can also return `None` when the reward is not applicable to those samples. This is useful + for multi-task training where different reward functions apply to different types of samples. When a + reward function returns `None` for a sample, that reward function is excluded from the reward + calculation for that sample. For more details, see [Using a custom reward + function](#using-a-custom-reward-function). + + The trainer's state is also passed to the reward function. The trainer's state is an instance of + [`~transformers.TrainerState`] and can be accessed by accessing the `trainer_state` argument to the + reward function's signature. + - A list of reward functions, where each item can independently be any of the above types. Mixing different + types within the list (e.g., a string model ID and a custom reward function) is allowed. + args ([`RLOOConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is + ignored. The format of the samples can be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`): + Processing class used to process the data. The padding side must be set to "left". If `None`, the + processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A + padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token, + `tokenizer.eos_token` will be used as the default. + reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`): + Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either: + + - A single processing class: Used when `reward_funcs` contains only one reward function. + - A list of processing classes: Must match the order and length of the reward functions in `reward_funcs`. + If set to `None`, or if an element of the list corresponding to a [`~transformers.PreTrainedModel`] is + `None`, the tokenizer for the model is automatically loaded using + [`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward + functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes` + are ignored. + callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your + model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`. + peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + + """ + def __init__( + self, + model = None, + reward_funcs = None, + args = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + reward_processing_classes = None, + callbacks = None, + peft_config = None, + config = None, + reward_model = None, + policy = None, + ref_policy = None, + data_collator = None, + **kwargs + ): + if args is None: args = UnslothRLOOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('rloo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + reward_funcs = reward_funcs, + args = args, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + reward_processing_classes = reward_processing_classes, + callbacks = callbacks, + peft_config = peft_config, + config = config, + reward_model = reward_model, + policy = policy, + ref_policy = ref_policy, + data_collator = data_collator,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothRewardTrainer.py b/unsloth_compiled_cache/UnslothRewardTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..d4aabeaf05bfef153ac5cb032efe1f78d8e4d304 --- /dev/null +++ b/unsloth_compiled_cache/UnslothRewardTrainer.py @@ -0,0 +1,1066 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.reward_trainer import (Any, BaseImageProcessor, Callable, DataCollator, Dataset, EvalPrediction, FeatureExtractionMixin, FrozenInstanceError, Optional, PartialState, Path, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, RewardConfig, RewardDataCollatorWithPadding, RewardTrainer, Trainer, TrainerCallback, Union, _tokenize, compute_accuracy, decode_and_strip_padding, defaultdict, disable_dropout_in_model, gather_object, generate_model_card, get_comet_experiment_url, is_rich_available, is_wandb_available, log_table_to_comet_experiment, logger, logging, maybe_apply_chat_template, nested_detach, nn, os, pd, prepare_peft_model, print_rich_table, replace, torch, Optional, PreTrainedModel, Trainer, logger, os, torch) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothRewardConfig(RewardConfig): + """ + +Configuration class for the [`RewardTrainer`]. + +This class includes only the parameters that are specific to Reward training. For a full list of training +arguments, please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this +class may differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the sequences (prompt + completion) in the batch, filters out entries that exceed the + limit. This argument is required if you want to use the default data collator. + disable_dropout (`bool`, *optional*, defaults to `True`): + Whether to disable dropout in the model. + dataset_num_proc (`int`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + center_rewards_coefficient (`float`, *optional*, defaults to `None`): + Coefficient to incentivize the reward model to output mean-zero rewards (proposed by + https://huggingface.co/papers/2312.09244, Eq. 2). Recommended value: `0.01`. + remove_unused_columns (`bool`, *optional*, defaults to `False`): + Whether to remove the columns that are not used by the model's forward pass. Can be `True` only if the + dataset is pretokenized. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = False, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + max_length = 1024, + disable_dropout = True, + dataset_num_proc = None, + center_rewards_coefficient = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + max_length = max_length, + disable_dropout = disable_dropout, + dataset_num_proc = dataset_num_proc, + center_rewards_coefficient = center_rewards_coefficient,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothRewardTrainer(Trainer): + """ + Trainer for custom reward. + + Args: + model ([`~transformers.PreTrainedModel`] or `torch.nn.Module`, *optional*): + Model to be trained, preferably an [`~transformers.AutoModelForSequenceClassification`]. + args ([`RewardConfig`], *optional*): + Training arguments. + data_collator ([`~transformers.DataCollator`], *optional*): + The data collator to use for training. If None is specified, the default data collator + [`~trainer.utils.RewardDataCollatorWithPadding`] will be used which will pad the sequences to the maximum + length of the sequences in the batch, given a dataset of paired sequences. + train_dataset ([`~datasets.Dataset`], *optional*): + The dataset to use for training. + eval_dataset ([`~datasets.Dataset`], *optional*): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`, *optional*): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + compute_metrics (`Callable[[transformers.EvalPrediction], dict]`, *optional*, defaults to [`~trainer.utils.compute_accuracy`]): + Function to compute metrics at evaluation. Must take in an [`~transformers.EvalPrediction`] and return a + dictionary string to float. + callbacks (`list` of [`~transformers.TrainerCallback`], *optional*): + Callbacks to use during training. + optimizers (`tuple` of `torch.optim.Optimizer` and `torch.optim.lr_scheduler.LambdaLR`, *optional*, defaults to `(None, None)`): + Tuple containing the optimizer and the learning rate scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*): + Function to preprocess the logits before computing the metrics. Must take in the `logits` and `labels` and + return the logits to be used for metrics computation. + peft_config (`dict`, *optional*): + PEFT configuration to use PEFT for training. If `None`, PEFT is not used. If provided, the `model` will be + wrapped with the specified PEFT adapter. + """ + + _tag_names = ["trl", "reward-trainer"] + + def __init__( + self, + model: Optional[Union[PreTrainedModel, nn.Module]] = None, + args: Optional[RewardConfig] = None, + data_collator: Optional[DataCollator] = None, + train_dataset: Optional[Dataset] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + model_init: Optional[Callable[[], PreTrainedModel]] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = ( + None, + None, + ), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional[dict] = None, + ): + if False: + model = prepare_peft_model(model, peft_config, args) + + # Disable dropout in the model + if args.disable_dropout: + disable_dropout_in_model(model) + + if compute_metrics is None: + compute_metrics = compute_accuracy + + if data_collator is None: + if processing_class is None: + raise ValueError( + "A processing_class must be specified when using the default RewardDataCollatorWithPadding" + ) + + max_length = args.max_length + + data_collator = RewardDataCollatorWithPadding(processing_class) + + if args.remove_unused_columns: + try: # for bc before https://github.com/huggingface/transformers/pull/25435 + args.remove_unused_columns = False + except FrozenInstanceError: + args = replace(args, remove_unused_columns=False) + # warn users + logger.warning( + "When using RewardDataCollatorWithPadding, you should set `remove_unused_columns=False` in your RewardConfig" + " we have set it for you, but you should do it yourself in the future.", + ) + + self.use_reward_data_collator = True + else: + self.use_reward_data_collator = False + + # The trainer estimates the number of FLOPs [floating-point operations] using the number of elements in the + # input tensor associated with the key "input_ids". However, in Reward, the sampled data does not include the + # "input_ids" key. Instead, the available keys are "input_ids_chosen" and "input_ids_rejected". As a result, + # the trainer issues the warning: "Could not estimate the number of tokens of the input, floating-point + # operations will not be computed." To suppress this warning, we set the "estimate_tokens" key in the model's + # "warnings_issued" dictionary to True. This acts as a flag to indicate that the warning has already been + # issued. + model.warnings_issued["estimate_tokens"] = True + + if "input_ids_chosen" not in train_dataset.column_names: + with PartialState().main_process_first(): + fn_kwargs = {"tokenizer": processing_class} + train_dataset = train_dataset.map(maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class}) + train_dataset = train_dataset.map( + _tokenize, + batched=True, + fn_kwargs=fn_kwargs, + num_proc=args.dataset_num_proc, + ) + # This filter is important because otherwise you get samples that exceed the model's context length and + # get truncated => noisy signal the chosen/rejected label gets lost. The downside is that the + # user might get surprised if N samples are missing from training. + train_dataset = train_dataset.filter( + lambda x: len(x["input_ids_chosen"]) <= max_length and len(x["input_ids_rejected"]) <= max_length, + num_proc=args.dataset_num_proc, + ) + if eval_dataset is not None: + eval_dataset = eval_dataset.map( + maybe_apply_chat_template, fn_kwargs={"tokenizer": processing_class} + ) + eval_dataset = eval_dataset.map( + _tokenize, + fn_kwargs=fn_kwargs, + batched=True, + num_proc=args.dataset_num_proc, + ) + # This filter is important because otherwise you get samples that exceed the model's context length and + # get truncated => noisy signal the chosen/rejected label gets lost. The downside is that the + # user might get surprised if N samples are missing from training. + eval_dataset = eval_dataset.filter( + lambda x: len(x["input_ids_chosen"]) <= max_length + and len(x["input_ids_rejected"]) <= max_length, + num_proc=args.dataset_num_proc, + ) + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + model_init=model_init, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + def compute_loss( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + return_outputs=False, + num_items_in_batch=None, + ) -> Union[torch.Tensor, tuple[torch.Tensor, dict[str, torch.Tensor]]]: + rewards_chosen = model( + input_ids=inputs["input_ids_chosen"], + attention_mask=inputs["attention_mask_chosen"], + return_dict=True, + )["logits"] + rewards_rejected = model( + input_ids=inputs["input_ids_rejected"], + attention_mask=inputs["attention_mask_rejected"], + return_dict=True, + )["logits"] + # calculate loss, optionally modulate with margin + if "margin" in inputs: + loss = -nn.functional.logsigmoid(rewards_chosen - rewards_rejected - inputs["margin"]).mean() + else: + loss = -nn.functional.logsigmoid(rewards_chosen - rewards_rejected).mean() + + if self.args.center_rewards_coefficient is not None: + loss += self.args.center_rewards_coefficient * torch.mean((rewards_chosen + rewards_rejected) ** 2) + + if return_outputs: + return loss, { + "rewards_chosen": rewards_chosen, + "rewards_rejected": rewards_rejected, + } + return loss + + def prediction_step( + self, + model: Union[PreTrainedModel, nn.Module], + inputs: dict[str, Union[torch.Tensor, Any]], + prediction_loss_only: bool, + ignore_keys: Optional[list[str]] = None, + ) -> tuple[Optional[torch.Tensor], Optional[torch.Tensor], Optional[torch.Tensor]]: + inputs = self._prepare_inputs(inputs) + if ignore_keys is None: + if hasattr(self.model, "config"): + ignore_keys = getattr(self.model.config, "keys_to_ignore_at_inference", []) + else: + ignore_keys = [] + + with torch.no_grad(): + loss, logits_dict = self.compute_loss(model, inputs, return_outputs=True) + + if prediction_loss_only: + return (loss, None, None) + + loss = loss.detach() + logits = tuple(v for k, v in logits_dict.items() if k not in ignore_keys) + logits = nested_detach(logits) + # Stack accepted against rejected, mean over logits + # and softmax to get preferences between accepted and rejected to sum to 1 + logits = torch.stack(logits).mean(dim=2).softmax(dim=0).T + + labels = torch.zeros(logits.shape[0]) + labels = self._prepare_inputs(labels) + + return loss, logits, labels + + def evaluate(self, *args, **kwargs): + num_print_samples = kwargs.pop("num_print_samples", 4) + self.visualize_samples(num_print_samples) + return super().evaluate(*args, **kwargs) + + def visualize_samples(self, num_print_samples: int): + """ + Visualize the reward model logits prediction + + Args: + num_print_samples (`int`, defaults to `4`): + The number of samples to print. Set to `-1` to print all samples. + """ + eval_dataloader = self.get_eval_dataloader() + table = defaultdict(list) + for _, inputs in enumerate(eval_dataloader): + _, logits, _ = self.prediction_step(self.model, inputs, prediction_loss_only=False) + chosen_text = decode_and_strip_padding(inputs["input_ids_chosen"], self.processing_class) + rejected_text = decode_and_strip_padding(inputs["input_ids_rejected"], self.processing_class) + table["chosen_text"].extend(gather_object(chosen_text)) + table["rejected_text"].extend(gather_object(rejected_text)) + table["logits"].extend( + gather_object([[round(inner_item, 4) for inner_item in item] for item in logits.tolist()]) + ) + if num_print_samples >= 0 and len(table["chosen_text"]) >= num_print_samples: + break + df = pd.DataFrame(table) + if self.accelerator.process_index == 0: + if is_rich_available(): + print_rich_table(df[:num_print_samples]) + if "wandb" in self.args.report_to: + import wandb + + if wandb.run is not None: + wandb.log({"completions": wandb.Table(dataframe=df)}) + + if "comet_ml" in self.args.report_to: + log_table_to_comet_experiment( + name="completions.csv", + table=df, + ) + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="Reward", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothRewardTrainer(_UnslothRewardTrainer): + """ + +Trainer for custom reward. + +Args: + model ([`~transformers.PreTrainedModel`] or `torch.nn.Module`, *optional*): + Model to be trained, preferably an [`~transformers.AutoModelForSequenceClassification`]. + args ([`RewardConfig`], *optional*): + Training arguments. + data_collator ([`~transformers.DataCollator`], *optional*): + The data collator to use for training. If None is specified, the default data collator + [`~trainer.utils.RewardDataCollatorWithPadding`] will be used which will pad the sequences to the maximum + length of the sequences in the batch, given a dataset of paired sequences. + train_dataset ([`~datasets.Dataset`], *optional*): + The dataset to use for training. + eval_dataset ([`~datasets.Dataset`], *optional*): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + model_init (`Callable[[], transformers.PreTrainedModel]`, *optional*): + The model initializer to use for training. If None is specified, the default model initializer will be + used. + compute_metrics (`Callable[[transformers.EvalPrediction], dict]`, *optional*, defaults to [`~trainer.utils.compute_accuracy`]): + Function to compute metrics at evaluation. Must take in an [`~transformers.EvalPrediction`] and return a + dictionary string to float. + callbacks (`list` of [`~transformers.TrainerCallback`], *optional*): + Callbacks to use during training. + optimizers (`tuple` of `torch.optim.Optimizer` and `torch.optim.lr_scheduler.LambdaLR`, *optional*, defaults to `(None, None)`): + Tuple containing the optimizer and the learning rate scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*): + Function to preprocess the logits before computing the metrics. Must take in the `logits` and `labels` and + return the logits to be used for metrics computation. + peft_config (`dict`, *optional*): + PEFT configuration to use PEFT for training. If `None`, PEFT is not used. If provided, the `model` will be + wrapped with the specified PEFT adapter. + + """ + def __init__( + self, + model = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + model_init = None, + compute_metrics = None, + callbacks = None, + preprocess_logits_for_metrics = None, + peft_config = None, + **kwargs + ): + if args is None: args = UnslothRewardConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('reward_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + model_init = model_init, + compute_metrics = compute_metrics, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothSFTTrainer.py b/unsloth_compiled_cache/UnslothSFTTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..25830fad7d842e32b02eafcd50c15b6a1efdeb23 --- /dev/null +++ b/unsloth_compiled_cache/UnslothSFTTrainer.py @@ -0,0 +1,1553 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.sft_trainer import (Any, AutoConfig, AutoProcessor, Callable, DataCollator, DataCollatorForLanguageModeling, DataCollatorForVisionLanguageModeling, Dataset, EvalPrediction, IterableDataset, Optional, Path, PeftConfig, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SFTConfig, SFTTrainer, Trainer, TrainerCallback, TrainingArguments, Union, clone_chat_template, contextlib, dataclass, defaultdict, dft_loss, generate_model_card, get_act_offloading_ctx_manager, get_comet_experiment_url, is_conversational, is_wandb_available, logger, logging, nn, os, pack_dataset, pad, prepare_peft_model, selective_log_softmax, torch, transformers, Callable, DataCollator, DataCollatorForLanguageModeling, Dataset, IterableDataset, Optional, Union, os, pack_dataset, pad, transformers, Optional, PreTrainedModel, Trainer, logger, os, torch, os) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothSFTConfig(SFTConfig): + """ + +Configuration class for the [`SFTTrainer`]. + +This class includes only the parameters that are specific to SFT training. For a full list of training arguments, +please refer to the [`~transformers.TrainingArguments`] documentation. Note that default values in this class may +differ from those in [`~transformers.TrainingArguments`]. + +Using [`~transformers.HfArgumentParser`] we can turn this class into +[argparse](https://docs.python.org/3/library/argparse#module-argparse) arguments that can be specified on the +command line. + +Parameters: + > Parameters that control the model + + model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model` + argument of the [`SFTTrainer`] is provided as a string. If you're training a MoE architecture and want to + include the load balancing/auxilliary loss as a part of the final loss, remember to set + `output_router_logits=True` in this dictionary. + chat_template_path (`str` or `None`, *optional*, defaults to `None`): + If specified, sets the model's chat template. This can either be the path to a tokenizer (local directory + or Hugging Face Hub model) or a direct path to a Jinja template file. When using a Jinja file, you must + ensure that any special tokens referenced in the template are added to the tokenizer and that the model's + embedding layer is resized accordingly. + + > Parameters that control the data preprocessing + + dataset_text_field (`str`, *optional*, defaults to `"text"`): + Name of the column that contains text data in the dataset. + dataset_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`): + Dictionary of optional keyword arguments for the dataset preparation. The only supported key is + `skip_prepare_dataset`. When the model is a VLM, `skip_prepare_dataset` is automatically treated as `True` + regardless of the provided value, since preprocessing is done on the fly. + dataset_num_proc (`int` or `None`, *optional*, defaults to `None`): + Number of processes to use for processing the dataset. + eos_token (`str` or `None`, *optional*, defaults to `None`): + Token used to indicate the end of a turn or sequence. If `None`, it defaults to + `processing_class.eos_token`. + pad_token (`int` or `None`, *optional*, defaults to `None`): + Token used for padding. If `None`, it defaults to `processing_class.pad_token`, or if that is also `None`, + it falls back to `processing_class.eos_token`. + max_length (`int` or `None`, *optional*, defaults to `1024`): + Maximum length of the tokenized sequence. Sequences longer than `max_length` are truncated from the right. + If `None`, no truncation is applied. When packing is enabled, this value sets the sequence length. + packing (`bool`, *optional*, defaults to `False`): + Whether to group multiple sequences into fixed-length blocks to improve computational efficiency and reduce + padding. Uses `max_length` to define sequence length. + packing_strategy (`str`, *optional*, defaults to `"bfd"`): + Strategy for packing sequences. Can be either `"bfd"` (best-fit decreasing, default), or `"wrapped"`. + padding_free (`bool`, *optional*, defaults to `False`): + Whether to perform forward passes without padding by flattening all sequences in the batch into a single + continuous sequence. This reduces memory usage by eliminating padding overhead. Currently, this is only + supported with the FlashAttention 2 or 3, which can efficiently handle the flattened batch structure. When + packing is enabled with strategy `"bfd"`, padding-free is enabled, regardless of the value of this + parameter. + pad_to_multiple_of (`int` or `None`, *optional*, defaults to `None`): + If set, the sequences will be padded to a multiple of this value. + eval_packing (`bool` or `None`, *optional*, defaults to `None`): + Whether to pack the eval dataset. If `None`, uses the same value as `packing`. + + > Parameters that control the training + + completion_only_loss (`bool` or `None`, *optional*, defaults to `None`): + Whether to compute loss only on the completion part of the sequence. If set to `True`, loss is computed + only on the completion, which is supported only for [prompt-completion](#prompt-completion) datasets. If + `False`, loss is computed on the entire sequence. If `None` (default), the behavior depends on the dataset: + loss is computed on the completion for [prompt-completion](#prompt-completion) datasets, and on the full + sequence for [language modeling](#language-modeling) datasets. + assistant_only_loss (`bool`, *optional*, defaults to `False`): + Whether to compute loss only on the assistant part of the sequence. If set to `True`, loss is computed only + on the assistant responses, which is supported only for [conversational](#conversational) datasets. If + `False`, loss is computed on the entire sequence. + loss_type (`str`, *optional*, defaults to `"nll"`): + Type of loss to use. Possible values are `"nll"` (negative log-likelihood, default) and `"dft"` (Dynamic + Fine-Tuning, as described in [this paper](https://huggingface.co/papers/2508.05629)). + activation_offloading (`bool`, *optional*, defaults to `False`): + Whether to offload the activations to the CPU. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + model_init_kwargs = None, + chat_template_path = None, + dataset_text_field = 'text', + dataset_kwargs = None, + dataset_num_proc = None, + eos_token = None, + pad_token = None, + max_length = 1024, + packing = False, + packing_strategy = 'bfd', + padding_free = False, + pad_to_multiple_of = None, + eval_packing = None, + completion_only_loss = None, + assistant_only_loss = False, + loss_type = 'nll', + activation_offloading = False, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if os.environ.get('UNSLOTH_ENABLE_FLEX_ATTENTION', '0') == '1': + from unsloth_zoo.flex_attention import HAS_FLEX_ATTENTION + if HAS_FLEX_ATTENTION and pad_to_multiple_of is None: + from unsloth_zoo.flex_attention import FLEX_ATTENTION_BLOCK_SIZE + pad_to_multiple_of = FLEX_ATTENTION_BLOCK_SIZE + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + model_init_kwargs = model_init_kwargs, + chat_template_path = chat_template_path, + dataset_text_field = dataset_text_field, + dataset_kwargs = dataset_kwargs, + dataset_num_proc = dataset_num_proc, + eos_token = eos_token, + pad_token = pad_token, + max_length = max_length, + packing = packing, + packing_strategy = packing_strategy, + padding_free = padding_free, + pad_to_multiple_of = pad_to_multiple_of, + eval_packing = eval_packing, + completion_only_loss = completion_only_loss, + assistant_only_loss = assistant_only_loss, + loss_type = loss_type, + activation_offloading = activation_offloading,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothSFTTrainer(Trainer): + """ + Trainer for Supervised Fine-Tuning (SFT) method. + + This class is a wrapper around the [`~transformers.Trainer`] class and inherits all of its attributes and methods. + + Example: + + ```python + from datasets import load_dataset + from trl import SFTTrainer + + dataset = load_dataset("roneneldan/TinyStories", split="train[:1%]") + + trainer = SFTTrainer(model="Qwen/Qwen2-0.5B-Instruct", train_dataset=dataset) + trainer.train() + ``` + + Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using `.from_pretrained` (where `` is derived from the model + config) with the keyword arguments in `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. + If you're training a model with an MoE architecture and want to include the load balancing/auxilliary loss + as a part of the final loss, remember to set the `output_router_logits` config of the model to `True`. + args ([`SFTConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + data_collator ([`~transformers.DataCollator`] or `None`, *optional*): + Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`. + Will default to [`~trainer.sft_trainer.DataCollatorForLanguageModeling`] if the model is a language model + and [`~trainer.sft_trainer.DataCollatorForVisionLanguageModeling`] if the model is a vision-language model. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. SFT supports both [language modeling](#language-modeling) type and + [prompt-completion](#prompt-completion) type. The format of the samples can be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + + The trainer also supports processed datasets (tokenized) as long as they contain an `input_ids` field. + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`): + Processing class used to process the data. If `None`, the processing class is loaded from the model's name + with [`~transformers.AutoProcessor.from_pretrained`]. A padding token, `tokenizer.pad_token`, must be set. + If the processing class has not set a padding token, `tokenizer.eos_token` will be used as the default. + compute_loss_func (`Callable` or `None`, *optional*, defaults to `None`): + A function that accepts the raw model outputs, labels, and the number of items in the entire accumulated + batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default [loss + function](https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618) + used by [`Trainer`]. + compute_metrics (`Callable[[EvalPrediction], dict]` or `None`, *optional*, defaults to `None`): + The function that will be used to compute metrics at evaluation. Must take a + [`~transformers.EvalPrediction`] and return a dictionary string to metric values. When passing + [`SFTConfig`] with `batch_eval_metrics` set to `True`, your `compute_metrics` function must take a boolean + `compute_result` argument. This will be triggered after the last eval batch to signal that the function + needs to calculate and return the global summary statistics rather than accumulating the batch-level + statistics. + callbacks (list of [`~transformers.TrainerCallback`] or `None`, *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of `AdamW` on your + model and a scheduler given by [`~transformers.get_linear_schedule_with_warmup`] controlled by `args`. + optimizer_cls_and_kwargs (`tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`): + A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in + `args`. Incompatible with the `optimizers` argument. + + Unlike `optimizers`, this argument avoids the need to place model parameters on the correct devices before + initializing the Trainer. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`): + A function that preprocess the logits right before caching them at each evaluation step. Must take two + tensors, the logits and the labels, and return the logits once processed as desired. The modifications made + by this function will be reflected in the predictions received by `compute_metrics`. + + Note that the labels (second parameter) will be `None` if the dataset does not have them. + peft_config ([`~peft.PeftConfig`] or `None`, *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + formatting_func (`Callable` or `None`, *optional*, defaults to `None`): + Formatting function applied to the dataset before tokenization. Applying the formatting function explicitly + converts the dataset into a [language modeling](#language-modeling) type. + """ + + _tag_names = ["trl", "sft"] + + def __init__( + self, + model: Union[str, nn.Module, PreTrainedModel], + args: Optional[Union[SFTConfig, TrainingArguments]] = None, + data_collator: Optional[DataCollator] = None, # type: ignore + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[Union[PreTrainedTokenizerBase, ProcessorMixin]] = None, + compute_loss_func: Optional[Callable] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]] = (None, None), + optimizer_cls_and_kwargs: Optional[tuple[type[torch.optim.Optimizer], dict[str, Any]]] = None, + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + peft_config: Optional["PeftConfig"] = None, + formatting_func: Optional[Callable[[dict], str]] = None, + ): + # Args + if args is None: + model_name = model if isinstance(model, str) else model.config._name_or_path + model_name = model_name.split("/")[-1] + args = SFTConfig(f"{model_name}-SFT") + elif isinstance(args, TrainingArguments) and not isinstance(args, SFTConfig): + dict_args = args.to_dict() + dict_args["hub_token"] = args.hub_token # to_dict hides the hub_token + dict_args.pop("push_to_hub_token") + args = SFTConfig(**dict_args) + + # Model + model_init_kwargs = args.model_init_kwargs or {} + if isinstance(model, str): + model_id = model + dtype = model_init_kwargs.get("dtype") + if isinstance(dtype, torch.dtype) or dtype == "auto" or dtype is None: + pass # dtype is already a torch.dtype or "auto" or None + elif isinstance(dtype, str) and dtype in ["bfloat16", "float16", "float32"]: + dtype = getattr(torch, dtype) + model_init_kwargs["dtype"] = dtype + else: + raise ValueError( + "Invalid `dtype` passed to `SFTConfig`. Expected either 'auto' or a string representing " + f"a valid `torch.dtype` (e.g., 'float32'), but got {dtype}." + ) + config = AutoConfig.from_pretrained(model_id) + architecture = getattr(transformers, config.architectures[0]) + model = architecture.from_pretrained(model_id, **model_init_kwargs) + else: + model_id = model.config._name_or_path + if args.model_init_kwargs is not None: + logger.warning( + "You passed `model_init_kwargs` to the `SFTConfig`, but your model is already instantiated. " + "The `model_init_kwargs` will be ignored." + ) + + # Processing class + if processing_class is None: + processing_class = AutoProcessor.from_pretrained(model_id) + + # Handle pad token for processors or tokenizers + if isinstance(processing_class, ProcessorMixin): + tokenizer = processing_class.tokenizer + self._is_vlm = False + elif isinstance(processing_class, PreTrainedTokenizerBase): + tokenizer = processing_class + self._is_vlm = False + else: + raise TypeError("The `processing_class` must be either a `PreTrainedTokenizerBase` or a `ProcessorMixin`") + + if args.eos_token is not None: + eos_token = args.eos_token + eos_token_id = tokenizer.convert_tokens_to_ids(eos_token) + if eos_token_id is None: + raise ValueError( + f"The specified `eos_token` ('{eos_token}') is not found in the vocabulary of the given " + f"`processing_class` ({processing_class.__class__.__name__}). Ensure that the `eos_token` exists " + "in the vocabulary before using it as an EOS token." + ) + tokenizer.eos_token_id = eos_token_id + + if args.chat_template_path is not None: + if os.path.isfile(args.chat_template_path) and args.chat_template_path.endswith((".jinja", ".j2")): + with open(args.chat_template_path, encoding="utf-8") as chat_template_file: + processing_class.chat_template = chat_template_file.read() + added_tokens = [] + else: + model, processing_class, added_tokens = clone_chat_template( + model, processing_class, args.chat_template_path + ) + else: + added_tokens = [] + + # Catch some wrong configurations related to VLMs + if self._is_vlm and args.packing: + raise ValueError( + "Packing is not supported for vision-language models. Please set `packing=False` in the SFTConfig." + ) + if self._is_vlm and args.padding_free: + raise ValueError( + "Padding-free training is yet not supported for vision-language models. Please set " + "`padding_free=False` in the `SFTConfig`." + ) + if self._is_vlm and args.assistant_only_loss: + raise ValueError( + "Assistant-only loss is not yet supported for vision-language models. Please set " + "`assistant_only_loss=False` in the `SFTConfig`." + ) + + # PEFT configuration and model wrapping + if False: + if added_tokens: + # Ensure that the added tokens are trainable + if peft_config.trainable_token_indices is None: + peft_config.trainable_token_indices = {"embed_tokens": added_tokens} + elif "embed_tokens" not in peft_config.trainable_token_indices: + peft_config.trainable_token_indices["embed_tokens"] = added_tokens + else: + peft_config.trainable_token_indices["embed_tokens"].extend(added_tokens) + + # Ensure that the lm_head is trainable + if peft_config.modules_to_save is None or "lm_head" not in peft_config.modules_to_save: + logger.warning( + "Cloning chat template added new tokens to the tokenizer, but 'lm_head' is not in PEFT's " + "`modules_to_save`. As a result, the model may not learn to generate outputs with these new " + "tokens, leading to degraded generation quality. To fix this, add " + "`modules_to_save=['lm_head']` to your PEFT configuration." + ) + + if peft_config.modules_to_save is None: + peft_config.modules_to_save = ["lm_head"] + else: + peft_config.modules_to_save.append("lm_head") + + # In Prompt Tuning a small set of trainable virtual tokens [continuous prompt embeddings] is prepended to the + # input. We store the number of these tokens so we can account for them correctly when calculating accuracy. + self.num_virtual_tokens = 0 + + if False: + model = prepare_peft_model(model, peft_config, args) + if model.active_adapter in model.peft_config: + peft_model_config = model.peft_config[model.active_adapter] + self.num_virtual_tokens = getattr(peft_model_config, "num_virtual_tokens", 0) + + # Data collator + # BFD packing requires padding-free mode; otherwise, the collator outputs padded attention masks, causing + # FlashAttention to ignore position_ids and recompute them incorrectly from the padded attention mask. + self.padding_free = args.padding_free or (args.packing and args.packing_strategy == "bfd") + use_flash_attention = model.config._attn_implementation in [ + "flash_attention_2", + "flash_attention_3", + "kernels-community/vllm-flash-attn3", + ] + if self.padding_free: + if data_collator is not None: + raise ValueError("Passing a custom data collator is not supported when using padding-free.") + if args.packing and args.packing_strategy == "wrapped": + logger.warning( + "You are passing `padding_free=True` with the 'wrapped' packing strategy, which is not " + "recommended. Please refer to the documentation to understand why this is not recommended." + ) + if not use_flash_attention: + logger.warning( + "Padding-free training is enabled, but the attention implementation is not set to " + "'flash_attention_2'. Padding-free training flattens batches into a single sequence, and " + "'flash_attention_2' is the only known attention mechanism that reliably supports this. Using " + "other implementations may lead to unexpected behavior. To ensure compatibility, set " + "`attn_implementation='flash_attention_2'` in the model configuration, or verify that your " + "attention mechanism can handle flattened sequences." + ) + if args.per_device_train_batch_size == 1 and not args.packing: + logger.warning( + "You are using a per_device_train_batch_size of 1 with padding-free training. Using a batch size " + "of 1 anihilate the benefits of padding-free training. Please consider increasing the batch size " + "to at least 2." + ) + + # Decide whether to use completion-only loss: if not specified, then it is set to True if the dataset format + # is prompt-completion, and False if the dataset format is language modeling. + dataset_sample = next(iter(train_dataset)) + if args.completion_only_loss is None: + self.completion_only_loss = "prompt" in dataset_sample and "completion" in dataset_sample + else: + self.completion_only_loss = args.completion_only_loss + + if data_collator is None and not self._is_vlm: + # Get the pad token: if not provided, use the one from the processing class or the eos token + # if the processing class does not have a pad token. + pad_token = args.pad_token or tokenizer.pad_token or tokenizer.eos_token + pad_token_id = tokenizer.convert_tokens_to_ids(pad_token) + if pad_token_id is None: + raise ValueError( + f"The specified `pad_token` ('{pad_token}') is not found in the vocabulary of the given " + f"`processing_class` ({processing_class.__class__.__name__}). Ensure that the `pad_token` exists " + "in the vocabulary before using it as a padding token." + ) + data_collator = DataCollatorForLanguageModeling( + pad_token_id=pad_token_id, + completion_only_loss=self.completion_only_loss, + padding_free=self.padding_free, + # Using position_ids without flash_attn hurts the training + return_position_ids=use_flash_attention, + pad_to_multiple_of=args.pad_to_multiple_of, + ) + elif data_collator is None and self._is_vlm: + data_collator = DataCollatorForVisionLanguageModeling( + processor=processing_class, + max_length=args.max_length, + completion_only_loss=self.completion_only_loss, + pad_to_multiple_of=args.pad_to_multiple_of, + dataset_text_field=args.dataset_text_field, + ) + + if args.packing and args.packing_strategy == "bfd" and not use_flash_attention: + logger.warning( + "You are using packing, but the attention implementation is not set to 'flash_attention_2' or " + "'kernels-community/vllm-flash-attn3'. Packing flattens batches into a single sequence, and Flash " + "Attention is the only known attention mechanisms that reliably support this. Using other " + "implementations may lead to cross-contamination between batches. To avoid this, either disable " + "packing by setting `packing=False`, or set `attn_implementation='flash_attention_2'` or " + "`attn_implementation='kernels-community/vllm-flash-attn3'` in the model configuration." + ) + if args.assistant_only_loss and not is_conversational(dataset_sample): + raise ValueError( + "You set `assistant_only_loss=True`, but the dataset is not conversational. This option is only " + "supported for conversational datasets." + ) + + # Dataset + # Skip dataset preparation if `skip_prepare_dataset=True` in `dataset_kwargs`, or if it's a VLM, where + # preprocessing [e.g., image-to-pixel conversion] is too costly and done on the fly instead. + skip_prepare_dataset = ( + args.dataset_kwargs is not None and args.dataset_kwargs.get("skip_prepare_dataset", False) or self._is_vlm + ) + if not skip_prepare_dataset: + if self.completion_only_loss and formatting_func: + raise ValueError( + "A formatting function was provided while `completion_only_loss=True`, which is incompatible. " + "Using a formatter converts the dataset to a language modeling type, conflicting with " + "completion-only loss. To resolve this, apply your formatting function before passing the " + "dataset, or disable `completion_only_loss` in `SFTConfig`." + ) + train_dataset = self._prepare_dataset( + train_dataset, processing_class, args, args.packing, formatting_func, "train" + ) + if eval_dataset is not None: + packing = args.packing if args.eval_packing is None else args.eval_packing + if isinstance(eval_dataset, dict): + eval_dataset = { + key: self._prepare_dataset(dataset, processing_class, args, packing, formatting_func, key) + for key, dataset in eval_dataset.items() + } + else: + eval_dataset = self._prepare_dataset( + eval_dataset, processing_class, args, packing, formatting_func, "eval" + ) + + # Loss function + if args.loss_type == "nll": + pass # use the default loss + elif args.loss_type == "dft": + if compute_loss_func is not None: + raise ValueError( + "You passed a `compute_loss_func` together with `loss_type='dft'` to the `SFTTrainer`. " + "When using `loss_type='dft'`, the loss function is internally set to the DFT loss, so passing a " + "`compute_loss_func` is not allowed." + ) + compute_loss_func = dft_loss + else: + raise ValueError(f"Invalid `loss_type` {args.loss_type} passed. Supported values are 'nll' and 'dft'.") + + # Initialize the metrics + self._metrics = {"train": defaultdict(list), "eval": defaultdict(list)} + self._total_train_tokens = 0 + + # Initialize the Trainer. Parent class will handle: + # - DeepSpeed configuration [through create_accelerator_and_postprocess] + # - FSDP setup + # - Distributed training setup + # - Optimizer and scheduler creation + + super().__init__( + model=model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + compute_loss_func=compute_loss_func, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + optimizer_cls_and_kwargs=optimizer_cls_and_kwargs, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + # Initialize activation offloading context + if self.args.activation_offloading: + self.maybe_activation_offload_context = get_act_offloading_ctx_manager(model=self.model) + else: + self.maybe_activation_offload_context = contextlib.nullcontext() + + # Add tags for models that have been loaded with the correct transformers version + if hasattr(self.model, "add_model_tags"): + self.model.add_model_tags(self._tag_names) + + self.aux_loss_enabled = getattr(model.config, "output_router_logits", False) + self.aux_loss_coef = getattr(model.config, "router_aux_loss_coef", 0.0) + if self.aux_loss_enabled and self.aux_loss_coef == 0.0: + logger.warning( + "You set `output_router_logits` to `True` in the model config, but `router_aux_loss_coef` is set to " + "`0.0`, meaning the auxiliary loss will not be used. Either set `router_aux_loss_coef` to a value " + "greater than `0.0`, or set `output_router_logits` to `False` if you don't want to use the auxiliary " + "loss.", + ) + + def _prepare_dataset( + self, + dataset: Union[Dataset, IterableDataset], + processing_class, + args, + packing: bool, + formatting_func: Optional[Callable[[dict], str]], + dataset_name: str, + ) -> Union[Dataset, IterableDataset]: + # All Unsloth Zoo code licensed under LGPLv3 + try: + if isinstance(dataset, ConstantLengthDataset): return dataset + except: + pass + + map_kwargs = {} + use_desc = isinstance(dataset, Dataset) + is_vlm = hasattr(processing_class, "tokenizer") + tokenizer = processing_class + if is_vlm: tokenizer = processing_class.tokenizer + + # Get max length + max_seq_length = getattr(args, "max_length", 0) + if max_seq_length == 0: max_seq_length = getattr(args, "max_seq_length", 0) + if max_seq_length == 0: max_seq_length = getattr(self, "max_seq_length", 0) + if max_seq_length == 0: max_seq_length = getattr(self, "max_seq", 0) + if max_seq_length == 0: raise RuntimeError("Unsloth: max_seq_length is 0! Please specify one!") + dataset_text_field = getattr(args, "dataset_text_field", "text") + do_truncation = max_seq_length != 0 + do_formatting_func = False + do_tokenize = True + + # Get correct column names + column_names = set(next(iter(dataset)).keys()) + used_column_names = ["input_ids"] + if "attention_mask" in column_names: + used_column_names.append("attention_mask") + + # Check if already tokenized so skip + from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling + if "labels" in column_names: + # Most likely forgot data collator! + if is_vlm and not hasattr(tokenizer, "pad"): + # Check if processing_class has a .pad, if not, use tokenizer.tokenizer + raise RuntimeError(f"Unsloth: {processing_class.__class__} does not have .pad!") + self.data_collator = DataCollatorForSeq2Seq(tokenizer) + used_column_names.append("labels") + do_tokenize = False + elif "input_ids" in column_names: + # Skip dataset prep, and set data collator + if is_vlm and not hasattr(tokenizer, "pad"): + # Check if processing_class has a .pad, if not, use tokenizer.tokenizer + raise RuntimeError(f"Unsloth: {processing_class.__class__} does not have .pad!") + self.data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False) + do_tokenize = False + elif dataset_text_field not in column_names: + do_formatting_func = True + if formatting_func is None: + raise RuntimeError("Unsloth: You must specify a `formatting_func`") + pass + + if do_tokenize: + # Check double BOS tokens + if do_formatting_func: + test_text = formatting_func(next(iter(dataset))) + if not isinstance(test_text, list): + raise ValueError( + "Unsloth: The `formatting_func` should return a list of processed strings." + ) + test_text = test_text[0] + else: + test_text = next(iter(dataset))[dataset_text_field][0] + + # Get chat template + chat_template = getattr(processing_class, 'chat_template', '') + if chat_template == '' and is_vlm: + chat_template = getattr(tokenizer, 'chat_template', '') + if chat_template is None: + chat_template = '' + + # Get bos_token + add_special_tokens = True + bos_token_1 = getattr(processing_class, 'bos_token', None) + bos_token_2 = getattr(tokenizer, 'bos_token', None) + bos_token = bos_token_1 or bos_token_2 + + if bos_token is not None: + if test_text.startswith(bos_token) or bos_token in chat_template: + add_special_tokens = False + print("Unsloth: We found double BOS tokens - we shall remove one automatically.") + pass + + # Create tokenize function + def _tokenize(example): + return tokenizer( + example[dataset_text_field] if not do_formatting_func else formatting_func(example), + truncation = do_truncation, + max_length = max_seq_length, + return_token_type_ids = False, + add_special_tokens = add_special_tokens, + ) + pass + + if not isinstance(dataset, IterableDataset): + dataset_num_proc = getattr(args, "dataset_num_proc", None) + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = max(cpu_count()+4, 2) + map_kwargs["num_proc"] = dataset_num_proc + else: + map_kwargs["batch_size"] = dataset._ex_iterable.batch_size + + if use_desc: map_kwargs["desc"] = f'Unsloth: Tokenizing ["{dataset_text_field}"]' + dataset = dataset.map(_tokenize, batched = True, **map_kwargs) + + # If VLM, switch data collator since .pad is needed! + if is_vlm and not hasattr(processing_class, "pad"): + data_collator = DataCollatorForLanguageModeling(tokenizer, mlm = False) + self.data_collator = data_collator + pass + pass + if packing: + # Try using new packing which works in TRL + try: + pack_dataset + except: + print("Unsloth: Hugging Face's packing is currently buggy - we're disabling it for now!") + return dataset + + if max_seq_length == 0: + raise ValueError("When packing is enabled, `max_seq_length` can't be `None`.") + + if use_desc: map_kwargs["desc"] = f"Unsloth: Packing {dataset_name} dataset" + dataset = pack_dataset( + dataset.select_columns(used_column_names), + max_seq_length, + getattr(args, "packing_strategy", "bfd"), + map_kwargs, + ) + pass + return dataset + + def _set_signature_columns_if_needed(self): + # If `self.args.remove_unused_columns` is True, non-signature columns are removed. + # By default, this method sets `self._signature_columns` to the model's expected inputs (usually, "input_ids" + # and "attention_mask"). When using `train_on_completion_only` we add a "completion_mask" column to the + # dataset. So we need to override the default signature columns to include "completion_mask" as well. + if self._signature_columns is None: + if self._is_vlm: + self._signature_columns = ["messages", "prompt", "completion", "images"] + else: + self._signature_columns = ["input_ids", "labels", "seq_lengths", "completion_mask", "assistant_masks"] + + def compute_loss(self, model, inputs, return_outputs = False, num_items_in_batch = None): + outputs = super().compute_loss( + model, + inputs, + return_outputs = return_outputs, + num_items_in_batch = num_items_in_batch, + ) + return outputs + + # Override training step to add activation offloading context. + def training_step(self, *args, **kwargs): + with self.maybe_activation_offload_context: + return super().training_step(*args, **kwargs) + + def log(self, logs: dict[str, float], start_time: Optional[float] = None) -> None: + mode = "train" if self.model.training else "eval" + metrics = {key: sum(val) / len(val) for key, val in self._metrics[mode].items()} # average the metrics + + # This method can be called both in training and evaluation. When called in evaluation, the keys in `logs` + # start with "eval_". We need to add the prefix "eval_" to the keys in `metrics` to match the format. + if mode == "eval": + metrics = {f"eval_{key}": val for key, val in metrics.items()} + + logs.update(metrics) + super().log(logs, start_time) + self._metrics[mode].clear() + + # Ensure the model card is saved along with the checkpoint + def _save_checkpoint(self, model, trial): + if self.args.hub_model_id is None: + model_name = Path(self.args.output_dir).name + else: + model_name = self.args.hub_model_id.split("/")[-1] + self.create_model_card(model_name=model_name) + super()._save_checkpoint(model, trial) + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=list(tags), + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="SFT", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothSFTTrainer(_UnslothSFTTrainer): + """ + +Trainer for Supervised Fine-Tuning (SFT) method. + +This class is a wrapper around the [`~transformers.Trainer`] class and inherits all of its attributes and methods. + +Example: + +```python +from datasets import load_dataset +from trl import SFTTrainer + +dataset = load_dataset("roneneldan/TinyStories", split="train[:1%]") + +trainer = SFTTrainer(model="Qwen/Qwen2-0.5B-Instruct", train_dataset=dataset) +trainer.train() +``` + +Args: + model (`Union[str, PreTrainedModel]`): + Model to be trained. Can be either: + + - A string, being the *model id* of a pretrained model hosted inside a model repo on huggingface.co, or a + path to a *directory* containing model weights saved using + [`~transformers.PreTrainedModel.save_pretrained`], e.g., `'./my_model_directory/'`. The model is loaded + using `.from_pretrained` (where `` is derived from the model + config) with the keyword arguments in `args.model_init_kwargs`. + - A [`~transformers.PreTrainedModel`] object. + If you're training a model with an MoE architecture and want to include the load balancing/auxilliary loss + as a part of the final loss, remember to set the `output_router_logits` config of the model to `True`. + args ([`SFTConfig`], *optional*, defaults to `None`): + Configuration for this trainer. If `None`, a default configuration is used. + data_collator ([`~transformers.DataCollator`] or `None`, *optional*): + Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`. + Will default to [`~trainer.sft_trainer.DataCollatorForLanguageModeling`] if the model is a language model + and [`~trainer.sft_trainer.DataCollatorForVisionLanguageModeling`] if the model is a vision-language model. + train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]): + Dataset to use for training. SFT supports both [language modeling](#language-modeling) type and + [prompt-completion](#prompt-completion) type. The format of the samples can be either: + + - [Standard](dataset_formats#standard): Each sample contains plain text. + - [Conversational](dataset_formats#conversational): Each sample contains structured messages (e.g., role + and content). + + The trainer also supports processed datasets (tokenized) as long as they contain an `input_ids` field. + eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`): + Dataset to use for evaluation. It must meet the same requirements as `train_dataset`. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`): + Processing class used to process the data. If `None`, the processing class is loaded from the model's name + with [`~transformers.AutoProcessor.from_pretrained`]. A padding token, `tokenizer.pad_token`, must be set. + If the processing class has not set a padding token, `tokenizer.eos_token` will be used as the default. + compute_loss_func (`Callable` or `None`, *optional*, defaults to `None`): + A function that accepts the raw model outputs, labels, and the number of items in the entire accumulated + batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default [loss + function](https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618) + used by [`Trainer`]. + compute_metrics (`Callable[[EvalPrediction], dict]` or `None`, *optional*, defaults to `None`): + The function that will be used to compute metrics at evaluation. Must take a + [`~transformers.EvalPrediction`] and return a dictionary string to metric values. When passing + [`SFTConfig`] with `batch_eval_metrics` set to `True`, your `compute_metrics` function must take a boolean + `compute_result` argument. This will be triggered after the last eval batch to signal that the function + needs to calculate and return the global summary statistics rather than accumulating the batch-level + statistics. + callbacks (list of [`~transformers.TrainerCallback`] or `None`, *optional*, defaults to `None`): + List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed + in [here](https://huggingface.co/docs/transformers/main_classes/callback). + + If you want to remove one of the default callbacks used, use the [`~transformers.Trainer.remove_callback`] + method. + optimizers (`tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]]`, *optional*, defaults to `(None, None)`): + A tuple containing the optimizer and the scheduler to use. Will default to an instance of `AdamW` on your + model and a scheduler given by [`~transformers.get_linear_schedule_with_warmup`] controlled by `args`. + optimizer_cls_and_kwargs (`tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`): + A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in + `args`. Incompatible with the `optimizers` argument. + + Unlike `optimizers`, this argument avoids the need to place model parameters on the correct devices before + initializing the Trainer. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`): + A function that preprocess the logits right before caching them at each evaluation step. Must take two + tensors, the logits and the labels, and return the logits once processed as desired. The modifications made + by this function will be reflected in the predictions received by `compute_metrics`. + + Note that the labels (second parameter) will be `None` if the dataset does not have them. + peft_config ([`~peft.PeftConfig`] or `None`, *optional*, defaults to `None`): + PEFT configuration used to wrap the model. If `None`, the model is not wrapped. + formatting_func (`Callable` or `None`, *optional*, defaults to `None`): + Formatting function applied to the dataset before tokenization. Applying the formatting function explicitly + converts the dataset into a [language modeling](#language-modeling) type. + + """ + def __init__( + self, + model, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + compute_loss_func = None, + compute_metrics = None, + callbacks = None, + optimizer_cls_and_kwargs = None, + preprocess_logits_for_metrics = None, + peft_config = None, + formatting_func = None, + **kwargs + ): + if args is None: args = UnslothSFTConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if 'max_length' not in locals() and not hasattr(args, 'max_length'): + pass + else: + if hasattr(args, 'max_seq_length') and args.max_seq_length is not None and args.max_seq_length > 0: + if hasattr(args, 'max_length'): + args.max_length = args.max_seq_length + max_length = args.max_length + else: + model_max_length = getattr(model, 'max_seq_length', None) + if model_max_length is None: model_max_length = getattr(model, 'max_length', None) + if model_max_length is not None: + args.max_length = model_max_length + max_length = args.max_length + elif hasattr(args, 'max_length') and args.max_length is not None: + max_length = args.max_length + # if we are here, then we are in a weird case where max_length is set but max_seq_length is not set + setattr(model, 'max_seq_length', max_length) + else: + print('Unsloth: We did not find `max_seq_length` or `max_length` in the model or args. We will set it to 1024.') + args.max_length = 1024 + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('sft_trainer', other_metrics) + IGNORED_TOKENIZER_NAMES = os.environ.get('UNSLOTH_IGNORED_TOKENIZER_NAMES', '').split('\n') + from unsloth_zoo.tokenizer_utils import fix_untrained_tokens + from unsloth_zoo.training_utils import fix_zero_training_loss + if 'tokenizer' not in locals(): tokenizer = processing_class + fix_untrained_tokens(model, tokenizer, train_dataset, IGNORED_TOKENIZER_NAMES, eps = 1e-16) + fix_zero_training_loss(model, tokenizer, train_dataset) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + compute_loss_func = compute_loss_func, + compute_metrics = compute_metrics, + callbacks = callbacks, + optimizer_cls_and_kwargs = optimizer_cls_and_kwargs, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + peft_config = peft_config, + formatting_func = formatting_func,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass + + +if hasattr(logger, "addFilter"): + import logging + class HideLoggingMessage(logging.Filter): + def __init__(self, text): self.text = text + def filter(self, x): return not (self.text in x.getMessage()) + pass + logger.addFilter(HideLoggingMessage("`use_cache=True`")) + diff --git a/unsloth_compiled_cache/UnslothXPOTrainer.py b/unsloth_compiled_cache/UnslothXPOTrainer.py new file mode 100644 index 0000000000000000000000000000000000000000..4d3c790edb26dd8619b46ffe2a978ee9227d0afe --- /dev/null +++ b/unsloth_compiled_cache/UnslothXPOTrainer.py @@ -0,0 +1,1334 @@ +""" +2025.11.2 +2025.11.1 +4.57.2 +0.23.0 +__UNSLOTH_VERSIONING__ +""" + +# Unsloth auto generated code +# Copyright 2023-present Daniel Han-Chen, Michael Han-Chen & the Unsloth team. All rights reserved. +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public License +# along with this program. If not, see . + +from torch import Tensor +import torch +import torch.nn as nn +from torch.nn import functional as F +from typing import Any, List, Optional, Tuple, Union, Dict, Set, Callable +from trl.trainer.xpo_trainer import (Any, BaseImageProcessor, BasePairwiseJudge, Callable, Dataset, EvalPrediction, F, FeatureExtractionMixin, IterableDataset, OnlineDPOTrainer, OptimizerNames, Optional, PeftModel, PreTrainedModel, PreTrainedTokenizerBase, ProcessorMixin, SIMPLE_CHAT_TEMPLATE, TrainerCallback, Union, XPOConfig, XPOTrainer, empty_cache, generate_model_card, get_comet_experiment_url, get_reward, is_conversational, is_peft_available, is_wandb_available, jinja2, maybe_apply_chat_template, nn, os, selective_log_softmax, textwrap, torch, truncate_right, unwrap_model_for_generation) + + +import os +from typing import * +from dataclasses import dataclass, field +from packaging.version import Version +import torch +import numpy as np +from contextlib import nullcontext +from torch.nn import functional as F +from transformers import DataCollatorForSeq2Seq, DataCollatorForLanguageModeling as TransformersDataCollatorForLanguageModeling +from transformers.training_args import ParallelMode + +# Wrap trainer with padding to right and enable training mode +import functools +from types import MethodType +def prepare_for_training_mode(f): + @functools.wraps(f) + def wrapper(self, *args, **kwargs): + # Enable training mode + if hasattr(self, 'model') and hasattr(self.model, "for_training"): + self.model.for_training() + output = f(self, *args, **kwargs) + # Return inference mode + if hasattr(self, 'model') and hasattr(self.model, "for_inference"): + self.model.for_inference() + return output + return wrapper +pass + +torch_compile_options = { + "epilogue_fusion" : True, + "max_autotune" : False, + "shape_padding" : True, + "trace.enabled" : False, + "triton.cudagraphs" : False, +} + +@torch.compile(dynamic = True, fullgraph = True, options = torch_compile_options,) +def chunked_selective_log_softmax(logits, index): + # Split into 4 chunks only + chunked_logits = torch.chunk(logits.reshape(-1, logits.shape[-1]), chunks = 4, dim = 0) + chunked_index = torch.chunk(index.reshape(-1), chunks = 4, dim = 0) + all_per_token_logps = [] + # Below loop does the same as selective_log_softmax(chunk_logits, chunk_index) + for chunk_logits, chunk_index in zip(chunked_logits, chunked_index): + chunk_logits = chunk_logits.to(torch.float32) + selected_logits = torch.gather(chunk_logits, dim = -1, index = chunk_index.unsqueeze(-1)).squeeze(-1) + logsumexp_values = torch.logsumexp(chunk_logits, dim = -1) + per_token_logps = selected_logits - logsumexp_values + all_per_token_logps.append(per_token_logps) + pass + all_per_token_logps = torch.concat(all_per_token_logps) + all_per_token_logps = all_per_token_logps.reshape((logits.shape[0], logits.shape[1])) + return all_per_token_logps + +def calculate_pad_tokens_in_prompt( + input_ids: torch.Tensor, + logits_to_keep: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given prompt tensor, it returns all the left padded tokens in that sequence. so [pad, pad, pad, cat] = 3 tokens + """ + if logits_to_keep >= input_ids.shape[1]: + raise ValueError("logits_to_keep must be smaller than the sequence length.") + + prompt_section = input_ids[:, :-logits_to_keep] + + padding_mask = (prompt_section == pad_token_id) + + pad_token_counts = padding_mask.sum(dim=1) + + return pad_token_counts + +def create_completion_attention_mask( + completion_input_ids: torch.Tensor, + left_pad_tokens_per_prompt: torch.Tensor, + max_left_pad: int, + pad_token_id: int +) -> torch.Tensor: + """ + Given that we have a sequence, [p,p,p,c,c,c,pad,pad,pad] + + Where p are extra prompt tokens we got from slicing the torch tensor, c is completion tokens + and pad are pad tokens, this function would make a completion mask that would 0 out the pad + and p tokens. so in this example [0,0,0,1,1,1,0,0,0] + """ + batch_size, completion_len = completion_input_ids.shape + device = completion_input_ids.device + + num_tokens_to_mask = max_left_pad - left_pad_tokens_per_prompt + + indices = torch.arange(completion_len, device=device).unsqueeze(0) + shift_mask = indices >= num_tokens_to_mask.unsqueeze(1) + + non_padding_mask = (completion_input_ids != pad_token_id) + + final_mask = shift_mask & non_padding_mask + + return final_mask + +def left_pack_padding(tensor: torch.Tensor, pad_id: int) -> torch.Tensor: + """ + Moves all padding tokens in each sequence of a batch to the right. + """ + mask = (tensor != pad_id) + # Must do stable=True since binary mark is unordered + sorted_indices = torch.argsort(mask, dim=1, descending=True, stable=True) + packed_tensor = torch.gather(tensor, 1, sorted_indices) + return packed_tensor + +def align_logprobs_with_mask( + logprob_tensor: torch.Tensor, + attention_mask: torch.Tensor, + pad_value: float = 0.0 +) -> torch.Tensor: + """ + Aligns a log probability tensor with a given attention mask. + """ + + device = logprob_tensor.device + batch_size, logprob_seq_len = logprob_tensor.shape + mask_seq_len = attention_mask.shape[1] + + padded_logprobs = torch.full( + attention_mask.shape, + fill_value=pad_value, + dtype=logprob_tensor.dtype, + device=device + ) + + left_pad_counts = torch.argmax(attention_mask, dim=1) + + cols = torch.arange(logprob_seq_len, device=device) + dest_indices = left_pad_counts.unsqueeze(1) + cols + + # Create destination row indices + # Shape: [batch_size, logprob_seq_len] + row_indices = torch.arange(batch_size, device=device).unsqueeze(1).expand_as(dest_indices) + + # --- 4. Filter out-of-bounds indices and perform assignment --- + # Create a mask to identify only the indices that are within the bounds + # of the target tensor's sequence length. + valid_mask = dest_indices < mask_seq_len + + # Use this mask to select only the valid row indices, column indices, + # and the corresponding values from the logprob tensor. + # This flattens the selected elements into 1D tensors. + valid_rows = row_indices[valid_mask] + valid_cols = dest_indices[valid_mask] + valid_vals = logprob_tensor[valid_mask] + + # Place the valid values into their correct positions in the padded tensor + # using a single, efficient advanced indexing operation. + padded_logprobs[valid_rows, valid_cols] = valid_vals + + return padded_logprobs +@dataclass +class UnslothXPOConfig(XPOConfig): + """ + +Configuration class for the [`XPOTrainer`]. + +Subclass of [`OnlineDPOConfig`] we can use all its arguments and add the following: + +Parameters: + alpha (`float` or `list[float]`, *optional*, defaults to `1e-5`): + Weight of the XPO loss term. If a list of floats is provided then the alpha is selected for each new epoch + and the last alpha is used for the rest of the epochs. + + """ + vllm_sampling_params: Optional[Any] = field( + default = None, + metadata = {'help': 'vLLM SamplingParams'}, + ) + unsloth_num_chunks : Optional[int] = field( + default = -1, + metadata = {'help': 'Chunk size to reduce memory usage. -1 is most efficient.'}, + ) + max_seq_length : Optional[int] = field( + default = None, + metadata = {'help': 'Maximum sequence length to truncate to.'}, + ) + def __init__( + self, + output_dir = None, + overwrite_output_dir = None, + do_train = False, + do_eval = False, + do_predict = False, + eval_strategy = 'no', + prediction_loss_only = False, + per_device_train_batch_size = 4, + per_device_eval_batch_size = 4, + per_gpu_train_batch_size = None, + per_gpu_eval_batch_size = None, + gradient_accumulation_steps = 2, + eval_accumulation_steps = 2, + eval_delay = 0, + torch_empty_cache_steps = 250, + learning_rate = 5e-05, + weight_decay = 0.01, + adam_beta1 = 0.9, + adam_beta2 = 0.999, + adam_epsilon = 1e-08, + max_grad_norm = 1.0, + num_train_epochs = 3.0, + max_steps = -1, + lr_scheduler_type = 'linear', + warmup_ratio = 0.1, + warmup_steps = 0, + log_level = 'passive', + log_level_replica = 'warning', + log_on_each_node = True, + logging_dir = None, + logging_strategy = 'steps', + logging_first_step = False, + logging_steps = 1, + logging_nan_inf_filter = False, + save_strategy = 'steps', + save_steps = 500, + save_total_limit = None, + save_safetensors = True, + save_on_each_node = False, + save_only_model = False, + restore_callback_states_from_checkpoint = False, + no_cuda = False, + use_cpu = False, + use_mps_device = False, + seed = 3407, + data_seed = 3407, + jit_mode_eval = False, + bf16 = False, + fp16 = False, + fp16_opt_level = 'O1', + half_precision_backend = 'auto', + bf16_full_eval = False, + fp16_full_eval = False, + tf32 = None, + local_rank = -1, + ddp_backend = None, + tpu_num_cores = None, + tpu_metrics_debug = False, + debug = '', + dataloader_drop_last = False, + eval_steps = None, + dataloader_num_workers = 0, + dataloader_prefetch_factor = None, + past_index = -1, + run_name = None, + disable_tqdm = None, + remove_unused_columns = True, + label_names = None, + load_best_model_at_end = False, + metric_for_best_model = None, + greater_is_better = None, + ignore_data_skip = False, + fsdp = None, + fsdp_min_num_params = 0, + fsdp_config = None, + fsdp_transformer_layer_cls_to_wrap = None, + accelerator_config = None, + parallelism_config = None, + deepspeed = None, + label_smoothing_factor = 0.0, + optim = 'adamw_8bit', + optim_args = None, + adafactor = False, + group_by_length = False, + length_column_name = 'length', + report_to = None, + project = 'huggingface', + trackio_space_id = 'trackio', + ddp_find_unused_parameters = None, + ddp_bucket_cap_mb = None, + ddp_broadcast_buffers = None, + dataloader_pin_memory = True, + dataloader_persistent_workers = False, + skip_memory_metrics = True, + use_legacy_prediction_loop = False, + push_to_hub = False, + resume_from_checkpoint = None, + hub_model_id = None, + hub_strategy = 'every_save', + hub_token = None, + hub_private_repo = None, + hub_always_push = False, + hub_revision = None, + gradient_checkpointing = True, + gradient_checkpointing_kwargs = None, + include_inputs_for_metrics = False, + eval_do_concat_batches = True, + fp16_backend = 'auto', + push_to_hub_model_id = None, + push_to_hub_organization = None, + push_to_hub_token = None, + mp_parameters = '', + auto_find_batch_size = False, + full_determinism = False, + torchdynamo = None, + ray_scope = 'last', + ddp_timeout = 1800, + torch_compile = False, + torch_compile_backend = None, + torch_compile_mode = None, + include_tokens_per_second = False, + include_num_input_tokens_seen = False, + neftune_noise_alpha = None, + optim_target_modules = None, + batch_eval_metrics = False, + eval_on_start = False, + use_liger_kernel = False, + liger_kernel_config = None, + eval_use_gather_object = False, + average_tokens_across_devices = True, + reward_model_path = None, + judge = None, + max_new_tokens = 64, + max_length = 512, + temperature = 0.9, + top_p = 1.0, + top_k = None, + min_p = None, + repetition_penalty = 1.0, + generation_kwargs = {}, + use_transformers_paged = False, + cache_implementation = None, + missing_eos_penalty = None, + loss_type = 'sigmoid', + disable_dropout = True, + use_vllm = False, + vllm_model_impl = 'vllm', + vllm_guided_decoding_regex = None, + vllm_gpu_memory_utilization = 0.55, + vllm_mode = 'colocate', + vllm_server_base_url = None, + vllm_server_host = '0.0.0.0', + vllm_server_port = 8000, + vllm_server_timeout = 240.0, + vllm_tensor_parallel_size = 1, + ds3_gather_for_generation = True, + model_init_kwargs = None, + reward_weights = None, + dataset_num_proc = None, + gpu_memory_utilization = None, + vllm_sampling_params = None, + unsloth_num_chunks = -1, + max_seq_length = None, + **kwargs, + ): + if learning_rate < 1e-7: print(f'Unsloth: Your learning rate of `{learning_rate}` is too small and less than 1e-7! Consider increasing it, otherwise gradient updates will be close to 0!') + if learning_rate > 1: print(f'Unsloth: Your learning rate of `{learning_rate}` is way too larger > 1! Consider decreasing it to 1e-1, otherwise gradient updates will explode!') + if output_dir is None and save_strategy == 'steps' and save_steps == 500: + output_dir = 'unsloth_training_checkpoints' + save_strategy = 'no' + if dataset_num_proc is None: + from multiprocessing import cpu_count + dataset_num_proc = min(max(cpu_count()+4, 2), 64) + if temperature <= 0: + raise MathError('Unsloth: Please set a positive non-zero temperature since your results will be wrong.') + elif temperature >= 10: + raise MathError('Unsloth: Please set a positive non-zero temperature less than 10, since sampling will be quite erratic.') + + + super().__init__( + output_dir = output_dir, + overwrite_output_dir = overwrite_output_dir, + do_train = do_train, + do_eval = do_eval, + do_predict = do_predict, + eval_strategy = eval_strategy, + prediction_loss_only = prediction_loss_only, + per_device_train_batch_size = per_device_train_batch_size, + per_device_eval_batch_size = per_device_eval_batch_size, + per_gpu_train_batch_size = per_gpu_train_batch_size, + per_gpu_eval_batch_size = per_gpu_eval_batch_size, + gradient_accumulation_steps = gradient_accumulation_steps, + eval_accumulation_steps = eval_accumulation_steps, + eval_delay = eval_delay, + torch_empty_cache_steps = torch_empty_cache_steps, + learning_rate = learning_rate, + weight_decay = weight_decay, + adam_beta1 = adam_beta1, + adam_beta2 = adam_beta2, + adam_epsilon = adam_epsilon, + max_grad_norm = max_grad_norm, + num_train_epochs = num_train_epochs, + max_steps = max_steps, + lr_scheduler_type = lr_scheduler_type, + warmup_ratio = warmup_ratio, + warmup_steps = warmup_steps, + log_level = log_level, + log_level_replica = log_level_replica, + log_on_each_node = log_on_each_node, + logging_dir = logging_dir, + logging_strategy = logging_strategy, + logging_first_step = logging_first_step, + logging_steps = logging_steps, + logging_nan_inf_filter = logging_nan_inf_filter, + save_strategy = save_strategy, + save_steps = save_steps, + save_total_limit = save_total_limit, + save_safetensors = save_safetensors, + save_on_each_node = save_on_each_node, + save_only_model = save_only_model, + restore_callback_states_from_checkpoint = restore_callback_states_from_checkpoint, + no_cuda = no_cuda, + use_cpu = use_cpu, + use_mps_device = use_mps_device, + seed = seed, + data_seed = data_seed, + jit_mode_eval = jit_mode_eval, + bf16 = bf16, + fp16 = fp16, + fp16_opt_level = fp16_opt_level, + half_precision_backend = half_precision_backend, + bf16_full_eval = bf16_full_eval, + fp16_full_eval = fp16_full_eval, + tf32 = tf32, + local_rank = local_rank, + ddp_backend = ddp_backend, + tpu_num_cores = tpu_num_cores, + tpu_metrics_debug = tpu_metrics_debug, + debug = debug, + dataloader_drop_last = dataloader_drop_last, + eval_steps = eval_steps, + dataloader_num_workers = dataloader_num_workers, + dataloader_prefetch_factor = dataloader_prefetch_factor, + past_index = past_index, + run_name = run_name, + disable_tqdm = disable_tqdm, + remove_unused_columns = remove_unused_columns, + label_names = label_names, + load_best_model_at_end = load_best_model_at_end, + metric_for_best_model = metric_for_best_model, + greater_is_better = greater_is_better, + ignore_data_skip = ignore_data_skip, + fsdp = fsdp, + fsdp_min_num_params = fsdp_min_num_params, + fsdp_config = fsdp_config, + fsdp_transformer_layer_cls_to_wrap = fsdp_transformer_layer_cls_to_wrap, + accelerator_config = accelerator_config, + parallelism_config = parallelism_config, + deepspeed = deepspeed, + label_smoothing_factor = label_smoothing_factor, + optim = optim, + optim_args = optim_args, + adafactor = adafactor, + group_by_length = group_by_length, + length_column_name = length_column_name, + report_to = report_to, + project = project, + trackio_space_id = trackio_space_id, + ddp_find_unused_parameters = ddp_find_unused_parameters, + ddp_bucket_cap_mb = ddp_bucket_cap_mb, + ddp_broadcast_buffers = ddp_broadcast_buffers, + dataloader_pin_memory = dataloader_pin_memory, + dataloader_persistent_workers = dataloader_persistent_workers, + skip_memory_metrics = skip_memory_metrics, + use_legacy_prediction_loop = use_legacy_prediction_loop, + push_to_hub = push_to_hub, + resume_from_checkpoint = resume_from_checkpoint, + hub_model_id = hub_model_id, + hub_strategy = hub_strategy, + hub_token = hub_token, + hub_private_repo = hub_private_repo, + hub_always_push = hub_always_push, + hub_revision = hub_revision, + gradient_checkpointing = gradient_checkpointing, + gradient_checkpointing_kwargs = gradient_checkpointing_kwargs, + include_inputs_for_metrics = include_inputs_for_metrics, + eval_do_concat_batches = eval_do_concat_batches, + fp16_backend = fp16_backend, + push_to_hub_model_id = push_to_hub_model_id, + push_to_hub_organization = push_to_hub_organization, + push_to_hub_token = push_to_hub_token, + mp_parameters = mp_parameters, + auto_find_batch_size = auto_find_batch_size, + full_determinism = full_determinism, + torchdynamo = torchdynamo, + ray_scope = ray_scope, + ddp_timeout = ddp_timeout, + torch_compile = torch_compile, + torch_compile_backend = torch_compile_backend, + torch_compile_mode = torch_compile_mode, + include_tokens_per_second = include_tokens_per_second, + include_num_input_tokens_seen = include_num_input_tokens_seen, + neftune_noise_alpha = neftune_noise_alpha, + optim_target_modules = optim_target_modules, + batch_eval_metrics = batch_eval_metrics, + eval_on_start = eval_on_start, + use_liger_kernel = use_liger_kernel, + liger_kernel_config = liger_kernel_config, + eval_use_gather_object = eval_use_gather_object, + average_tokens_across_devices = average_tokens_across_devices, + reward_model_path = reward_model_path, + judge = judge, + max_new_tokens = max_new_tokens, + max_length = max_length, + temperature = temperature, + top_p = top_p, + top_k = top_k, + min_p = min_p, + repetition_penalty = repetition_penalty, + generation_kwargs = generation_kwargs, + use_transformers_paged = use_transformers_paged, + cache_implementation = cache_implementation, + missing_eos_penalty = missing_eos_penalty, + loss_type = loss_type, + disable_dropout = disable_dropout, + use_vllm = use_vllm, + vllm_model_impl = vllm_model_impl, + vllm_guided_decoding_regex = vllm_guided_decoding_regex, + vllm_gpu_memory_utilization = vllm_gpu_memory_utilization, + vllm_mode = vllm_mode, + vllm_server_base_url = vllm_server_base_url, + vllm_server_host = vllm_server_host, + vllm_server_port = vllm_server_port, + vllm_server_timeout = vllm_server_timeout, + vllm_tensor_parallel_size = vllm_tensor_parallel_size, + ds3_gather_for_generation = ds3_gather_for_generation, + model_init_kwargs = model_init_kwargs, + reward_weights = reward_weights, + dataset_num_proc = dataset_num_proc, + gpu_memory_utilization = gpu_memory_utilization,**kwargs) + self.vllm_sampling_params = vllm_sampling_params + self.unsloth_num_chunks = unsloth_num_chunks + self.max_seq_length = max_seq_length +pass + +class _UnslothXPOTrainer(OnlineDPOTrainer): + r""" + Initialize XPOTrainer as a subclass of [`OnlineDPOConfig`]. + + Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForCausalLM`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + reward_funcs (`transformers.PreTrainedModel`): + The reward model to score completions with, preferably an `AutoModelForSequenceClassification`. + judge (`BasePairwiseJudge`): + The judge to use for pairwise comparison of model completions. + args (`XPOConfig`): + The XPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + peft_config (`dict`): + The peft config to use for training. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + + .. deprecated:: 0.22.0 + The following parameters are deprecated and will be removed in a future version: + + * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`. + * `reward_processing_class`: Use `reward_processing_classes` instead. For example, change + `reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`. + """ + + _tag_names = ["trl", "xpo"] + + def __init__( + self, + model: Union[PreTrainedModel, nn.Module] = None, + ref_model: Union[PreTrainedModel, nn.Module] = None, + reward_funcs: Optional[nn.Module] = None, + judge: Optional[BasePairwiseJudge] = None, + args: Optional[XPOConfig] = None, + data_collator: Optional[Callable] = None, + train_dataset: Optional[Union[Dataset, IterableDataset]] = None, + eval_dataset: Optional[Union[Dataset, dict[str, Dataset]]] = None, + processing_class: Optional[ + Union[PreTrainedTokenizerBase, BaseImageProcessor, FeatureExtractionMixin, ProcessorMixin] + ] = None, + reward_processing_classes: Optional[Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]] = None, + peft_config: Optional[dict] = None, + compute_metrics: Optional[Callable[[EvalPrediction], dict]] = None, + callbacks: Optional[list[TrainerCallback]] = None, + optimizers: tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR] = (None, None), + preprocess_logits_for_metrics: Optional[Callable[[torch.Tensor, torch.Tensor], torch.Tensor]] = None, + # Deprecated parameters + reward_model: Optional[Union[PreTrainedModel, nn.Module]] = None, + ) -> None: + super().__init__( + model=model, + ref_model=ref_model, + judge=judge, + reward_funcs=reward_funcs, + reward_model=reward_model, + args=args, + data_collator=data_collator, + train_dataset=train_dataset, + eval_dataset=eval_dataset, + processing_class=processing_class, + reward_processing_classes=reward_processing_classes, + peft_config=peft_config, + compute_metrics=compute_metrics, + callbacks=callbacks, + optimizers=optimizers, + preprocess_logits_for_metrics=preprocess_logits_for_metrics, + ) + + self._alpha = self.args.alpha + + # Overwrite the stats dictionary to include XPO specific statistics + self.stats = { + # Remove "non_score_reward", "rlhf_reward", "scores" + # Add "loss/dpo", "loss/xpo" + "loss/dpo": [], + "loss/xpo": [], + "objective/kl": [], + "objective/entropy": [], + "rewards/chosen": [], + "rewards/rejected": [], + "rewards/accuracies": [], + "rewards/margins": [], + "logps/chosen": [], + "logps/rejected": [], + # Replace "contain_eos_token" by "model_contain_eos_token" and "ref_contain_eos_token" + "val/model_contain_eos_token": [], + "val/ref_contain_eos_token": [], + "alpha": [], + "beta": [], + } + if self.reward_funcs is not None: + if len(self.reward_funcs) != 1: + raise ValueError("XPOTrainer only supports one reward function/model.") + self.reward_funcs = self.reward_funcs[0] + self.stats["objective/model_scores"] = [] + self.stats["objective/ref_scores"] = [] + self.stats["objective/scores_margin"] = [] + + @property + def alpha(self): + if isinstance(self._alpha, list): + epoch = self.state.epoch + return self._alpha[epoch] if epoch < len(self._alpha) else self._alpha[-1] + else: + return self._alpha + + def _generate_completions(self, prompts, model): + with unwrap_model_for_generation(model, self.accelerator) as unwrapped_policy_model_for_gen: + model_output = unwrapped_policy_model_for_gen.generate( + input_ids=prompts["input_ids"], + attention_mask=prompts["attention_mask"], + generation_config=self.generation_config, + ) + + actual_model_for_ref_generation: torch.nn.Module + if self.ref_model is None: + unwrapped_main_model_for_ref_logic = self.accelerator.unwrap_model(model) + + if is_peft_available() and isinstance(unwrapped_main_model_for_ref_logic, PeftModel): + actual_model_for_ref_generation = unwrapped_main_model_for_ref_logic.get_base_model() + else: + actual_model_for_ref_generation = unwrapped_main_model_for_ref_logic + else: + actual_model_for_ref_generation = self.accelerator.unwrap_model(self.ref_model) + + with unwrap_model_for_generation(actual_model_for_ref_generation, self.accelerator) as final_ref_model_for_gen: + ref_output = final_ref_model_for_gen.generate( + input_ids=prompts["input_ids"], + attention_mask=prompts["attention_mask"], + generation_config=self.generation_config, + ) + + return model_output, ref_output + + def _process_completions(self, model_output, ref_output, prompts): + context_length = prompts["input_ids"].shape[1] + + # Process model completions + model_completion_ids = model_output[:, context_length:] + model_completion_ids, model_completion_mask = truncate_right( + model_completion_ids, self.processing_class.eos_token_id, self.processing_class.pad_token_id + ) + model_data = { + "input_ids": torch.cat((prompts["input_ids"], model_completion_ids), dim=1), + "attention_mask": torch.cat((prompts["attention_mask"], model_completion_mask), dim=1), + "raw": prompts["raw"], + } + + # Process reference model completions + ref_completion_ids = ref_output[:, context_length:] + ref_completion_ids, ref_completion_mask = truncate_right( + ref_completion_ids, self.processing_class.eos_token_id, self.processing_class.pad_token_id + ) + ref_data = { + "input_ids": torch.cat((prompts["input_ids"], ref_completion_ids), dim=1), + "attention_mask": torch.cat((prompts["attention_mask"], ref_completion_mask), dim=1), + "raw": prompts["raw"], + } + + return model_data, ref_data + + def _compute_rewards(self, model_data, ref_data, context_length): + with torch.no_grad(): + _, model_scores, _ = get_reward( + self.reward_funcs, model_data["input_ids"], self.processing_class.pad_token_id, context_length + ) + _, ref_scores, _ = get_reward( + self.reward_funcs, ref_data["input_ids"], self.processing_class.pad_token_id, context_length + ) + + # Apply EOS penalty if needed + if self.args.missing_eos_penalty is not None: + model_contain_eos = torch.any(model_data["input_ids"] == self.processing_class.eos_token_id, dim=-1) + ref_contain_eos = torch.any(ref_data["input_ids"] == self.processing_class.eos_token_id, dim=-1) + model_scores[~model_contain_eos] -= self.args.missing_eos_penalty + ref_scores[~ref_contain_eos] -= self.args.missing_eos_penalty + + return model_scores, ref_scores + + def _compute_judge(self, model_data, ref_data, context_length): + prompts = model_data["raw"] + model_data_completions = self.processing_class.batch_decode( + model_data["input_ids"][:, context_length:], skip_special_tokens=True + ) + model_data_completions = [completion.strip() for completion in model_data_completions] + + ref_data_completions = self.processing_class.batch_decode( + ref_data["input_ids"][:, context_length:], skip_special_tokens=True + ) + ref_data_completions = [completion.strip() for completion in ref_data_completions] + + if is_conversational({"prompt": prompts[0]}): + model_data_completions = [ + [{"role": "assistant", "content": completion}] for completion in model_data_completions + ] + environment = jinja2.Environment() + template = environment.from_string(SIMPLE_CHAT_TEMPLATE) + prompts = [template.render(messages=message) for message in prompts] + model_data_completions = [template.render(messages=completion) for completion in model_data_completions] + + ref_data_completions = [ + [{"role": "assistant", "content": completion}] for completion in ref_data_completions + ] + ref_data_completions = [template.render(messages=completion) for completion in ref_data_completions] + + ranks_of_first_completion = self.judge.judge( + prompts, + list(zip(model_data_completions, ref_data_completions)), + ) + # convert ranks to a True/False mask: + # when rank == 0, it means the first completion is the best + # when rank == 1, it means the second completion is the best + return torch.tensor([rank == 0 for rank in ranks_of_first_completion], device=model_data["input_ids"].device) + + def _compute_logprobs(self, model, model_data, ref_data, context_length): + def compute_logprobs_for_data(m, data): + output = m(data["input_ids"], attention_mask=data["attention_mask"]) + logits = output.logits[:, context_length - 1 : -1] + token_logprobs = selective_log_softmax(logits, data["input_ids"][:, context_length:]) + return token_logprobs + + # Compute logprobs for model completions + model_logprobs_model_data = compute_logprobs_for_data(model, model_data) + # Compute logprobs for model on reference completions (for XPO loss) + model_logprobs_ref_data = compute_logprobs_for_data(model, ref_data) + + # Compute logprobs for reference model completions + with torch.no_grad(): + if self.ref_model is None: + with model.disable_adapter(): + ref_logprobs_model_data = compute_logprobs_for_data(model, model_data) + ref_logprobs_ref_data = compute_logprobs_for_data(model, ref_data) + else: + ref_logprobs_model_data = compute_logprobs_for_data(self.ref_model, model_data) + ref_logprobs_ref_data = compute_logprobs_for_data(self.ref_model, ref_data) + + # Mask padding tokens + model_padding_mask = model_data["attention_mask"][:, context_length:] == 0 + ref_padding_mask = ref_data["attention_mask"][:, context_length:] == 0 + model_logprobs_model_data = model_logprobs_model_data.masked_fill(model_padding_mask, 0.0) + model_logprobs_ref_data = model_logprobs_ref_data.masked_fill(ref_padding_mask, 0.0) + ref_logprobs_ref_data = ref_logprobs_ref_data.masked_fill(ref_padding_mask, 0.0) + ref_logprobs_model_data = ref_logprobs_model_data.masked_fill(model_padding_mask, 0.0) + + return model_logprobs_model_data, model_logprobs_ref_data, ref_logprobs_ref_data, ref_logprobs_model_data + + def _compute_losses( + self, + model_logprobs_model_data, + model_logprobs_ref_data, + ref_logprobs_ref_data, + ref_logprobs_model_data, + chosen_mask, + ): + # Compute log probs + model_logprobs_model_data_sum = model_logprobs_model_data.sum(1) + model_logprobs_ref_data_sum = model_logprobs_ref_data.sum(1) + ref_logprobs_ref_data_sum = ref_logprobs_ref_data.sum(1) + ref_logprobs_model_data_sum = ref_logprobs_model_data.sum(1) + + chosen_model_logprobs = torch.where(chosen_mask, model_logprobs_model_data_sum, model_logprobs_ref_data_sum) + chosen_ref_logprobs = torch.where(chosen_mask, ref_logprobs_model_data_sum, ref_logprobs_ref_data_sum) + chosen_log_ratios = chosen_model_logprobs - chosen_ref_logprobs + + rejected_model_logprobs = torch.where(~chosen_mask, model_logprobs_model_data_sum, model_logprobs_ref_data_sum) + rejected_ref_logprobs = torch.where(~chosen_mask, ref_logprobs_model_data_sum, ref_logprobs_ref_data_sum) + rejected_log_ratios = rejected_model_logprobs - rejected_ref_logprobs + + # Compute logits as the difference between chosen and rejected log ratios + logits = chosen_log_ratios - rejected_log_ratios + + if self.args.loss_type == "sigmoid": + dpo_losses = -F.logsigmoid(self.beta * logits) + elif self.args.loss_type == "ipo": + dpo_losses = (logits - 1 / (2 * self.beta)) ** 2 + else: + raise NotImplementedError(f"invalid loss type {self.args.loss_type}") + + # Compute XPO specific loss + xpo_losses = self.alpha * model_logprobs_ref_data_sum + + # Total loss + loss = (dpo_losses + xpo_losses).mean() + + return loss, dpo_losses, xpo_losses + + def _log_statistics( + self, + model_data, + ref_data, + model_logprobs_model_data, + model_logprobs_ref_data, + ref_logprobs_ref_data, + ref_logprobs_model_data, + chosen_mask, + dpo_losses, + xpo_losses, + context_length, + model_scores=None, + ref_scores=None, + ): + # Helper function to gather and compute mean + def gather_mean(tensor): + return self.accelerator.gather_for_metrics(tensor).mean().item() + + # Log losses + self.stats["loss/dpo"].append(gather_mean(dpo_losses)) + self.stats["loss/xpo"].append(gather_mean(xpo_losses)) + + # Log scores + if self.reward_funcs is not None: + self.stats["objective/model_scores"].append(gather_mean(model_scores)) + self.stats["objective/ref_scores"].append(gather_mean(ref_scores)) + self.stats["objective/scores_margin"].append(gather_mean(model_scores - ref_scores)) + + # Log logprobs + model_logprobs_model_data_sum = model_logprobs_model_data.sum(1) + model_logprobs_ref_data_sum = model_logprobs_ref_data.sum(1) + ref_logprobs_ref_data_sum = ref_logprobs_ref_data.sum(1) + ref_logprobs_model_data_sum = ref_logprobs_model_data.sum(1) + + chosen_model_logprobs = torch.where(chosen_mask, model_logprobs_model_data_sum, model_logprobs_ref_data_sum) + chosen_ref_logprobs = torch.where(chosen_mask, ref_logprobs_model_data_sum, ref_logprobs_ref_data_sum) + chosen_log_ratios = chosen_model_logprobs - chosen_ref_logprobs + + rejected_model_logprobs = torch.where(~chosen_mask, model_logprobs_model_data_sum, model_logprobs_ref_data_sum) + rejected_ref_logprobs = torch.where(~chosen_mask, ref_logprobs_model_data_sum, ref_logprobs_ref_data_sum) + rejected_log_ratios = rejected_model_logprobs - rejected_ref_logprobs + + self.stats["logps/chosen"].append(gather_mean(chosen_model_logprobs.mean() + chosen_ref_logprobs.mean())) + self.stats["logps/rejected"].append(gather_mean(rejected_model_logprobs.mean() + rejected_ref_logprobs.mean())) + + # Log rewards + # Compute various statistics + chosen_rewards = chosen_log_ratios * self.beta + rejected_rewards = rejected_log_ratios * self.beta + self.stats["rewards/chosen"].append(gather_mean(chosen_rewards.mean())) + self.stats["rewards/rejected"].append(gather_mean(rejected_rewards.mean())) + + # Calculate KL divergence for model and ref data + kl_model_data = model_logprobs_model_data - ref_logprobs_model_data + kl_ref_data = model_logprobs_ref_data - ref_logprobs_ref_data + mean_kl = (kl_model_data.sum(1) + kl_ref_data.sum(1)).mean() / 2 + self.stats["objective/kl"].append(gather_mean(mean_kl)) + + # Calculate entropy for model and ref data + entropy_model_data = -model_logprobs_model_data.sum(1) + entropy_ref_data = -model_logprobs_ref_data.sum(1) + mean_entropy = (entropy_model_data.mean() + entropy_ref_data.mean()) / 2 + self.stats["objective/entropy"].append(gather_mean(mean_entropy)) + + # Calculate margins + margin = chosen_rewards - rejected_rewards + self.stats["rewards/margins"].append(gather_mean(margin.mean())) + + # Calculate accuracy + accuracy = (margin > 0).float() + self.stats["rewards/accuracies"].append(gather_mean(accuracy.mean())) + + # Log EOS token statistics + model_eos = (model_data["input_ids"][:, context_length:] == self.processing_class.eos_token_id).any(dim=1) + ref_eos = (ref_data["input_ids"][:, context_length:] == self.processing_class.eos_token_id).any(dim=1) + self.stats["val/model_contain_eos_token"].append(gather_mean(model_eos.float())) + self.stats["val/ref_contain_eos_token"].append(gather_mean(ref_eos.float())) + + # Log alpha and beta + self.stats["alpha"].append(self.alpha) + self.stats["beta"].append(self.beta) + + def training_step( + self, model: nn.Module, inputs: dict[str, Union[torch.Tensor, Any]], num_items_in_batch: Optional[int] = None + ) -> torch.Tensor: + model.train() + + # Apply chat template and tokenize the input + batch_size = len(next(iter(inputs.values()))) + prompts = inputs["prompt"] + inputs = [{k: v[i] for k, v in inputs.items()} for i in range(batch_size)] + inputs = [maybe_apply_chat_template(x, self.processing_class) for x in inputs] + inputs = [self.tokenize_row(x, self.model.config.is_encoder_decoder, self.processing_class) for x in inputs] + inputs = self.data_collator(inputs) + + # need the prompt_ only + inputs = self._prepare_inputs(inputs) + context_length = inputs["prompt_input_ids"].shape[1] + prompts = { + "input_ids": inputs["prompt_input_ids"], + "attention_mask": inputs["prompt_attention_mask"], + "raw": prompts, + } + del inputs + + # Sample completions from both the model and the reference model + model_output, ref_output = self._generate_completions(prompts, model) + + # Process model completions + model_data, ref_data = self._process_completions(model_output, ref_output, prompts) + + # Compute rewards + if self.reward_funcs is not None: + model_scores, ref_scores = self._compute_rewards(model_data, ref_data, context_length) + chosen_mask = model_scores >= ref_scores + else: + model_scores, ref_scores = None, None + chosen_mask = self._compute_judge(model_data, ref_data, context_length) + + # Compute logprobs + model_logprobs_model_data, model_logprobs_ref_data, ref_logprobs_ref_data, ref_logprobs_model_data = ( + self._compute_logprobs(model, model_data, ref_data, context_length) + ) + + # Compute loss + loss, dpo_losses, xpo_losses = self._compute_losses( + model_logprobs_model_data, + model_logprobs_ref_data, + ref_logprobs_ref_data, + ref_logprobs_model_data, + chosen_mask, + ) + + # Log everything + self._log_statistics( + model_data, + ref_data, + model_logprobs_model_data.detach(), + model_logprobs_ref_data.detach(), + ref_logprobs_ref_data, + ref_logprobs_model_data, + chosen_mask, + dpo_losses.detach(), + xpo_losses.detach(), + context_length, + model_scores, + ref_scores, + ) + + if ( + self.args.torch_empty_cache_steps is not None + and self.state.global_step % self.args.torch_empty_cache_steps == 0 + ): + empty_cache() + + kwargs = {} + # For LOMO optimizers you need to explicitly use the learning rate + if self.args.optim in [OptimizerNames.LOMO, OptimizerNames.ADALOMO]: + kwargs["learning_rate"] = self._get_learning_rate() + + if self.args.n_gpu > 1: + loss = loss.mean() # mean() to average on multi-gpu parallel training + + if self.use_apex: + with amp.scale_loss(loss, self.optimizer) as scaled_loss: + scaled_loss.backward() + else: + self.accelerator.backward(loss, **kwargs) + + return loss.detach() / self.args.gradient_accumulation_steps + + def create_model_card( + self, + model_name: Optional[str] = None, + dataset_name: Optional[str] = None, + tags: Union[str, list[str], None] = None, + ): + """ + Creates a draft of a model card using the information available to the `Trainer`. + + Args: + model_name (`str` or `None`, *optional*, defaults to `None`): + Name of the model. + dataset_name (`str` or `None`, *optional*, defaults to `None`): + Name of the dataset used for training. + tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`): + Tags to be associated with the model card. + """ + if not self.is_world_process_zero(): + return + + if hasattr(self.model.config, "_name_or_path") and not os.path.isdir(self.model.config._name_or_path): + base_model = self.model.config._name_or_path + else: + base_model = None + + # normalize `tags` to a mutable set + if tags is None: + tags = set() + elif isinstance(tags, str): + tags = {tags} + else: + tags = set(tags) + + if hasattr(self.model.config, "unsloth_version"): + tags.add("unsloth") + + if "JOB_ID" in os.environ: + tags.add("hf_jobs") + + tags.update(self._tag_names) + + # docstyle-ignore + citation = textwrap.dedent("""\ + @article{jung2024binary, + title = {{Exploratory Preference Optimization: Harnessing Implicit Q*-Approximation for Sample-Efficient RLHF}}, + author = {Tengyang Xie and Dylan J. Foster and Akshay Krishnamurthy and Corby Rosset and Ahmed Awadallah and Alexander Rakhlin}, + year = 2024, + eprint = {arXiv:2405.21046} + }""") + + model_card = generate_model_card( + base_model=base_model, + model_name=model_name, + hub_model_id=self.hub_model_id, + dataset_name=dataset_name, + tags=tags, + wandb_url=wandb.run.url if is_wandb_available() and wandb.run is not None else None, + comet_url=get_comet_experiment_url(), + trainer_name="XPO", + trainer_citation=citation, + paper_title="Exploratory Preference Optimization: Harnessing Implicit Q*-Approximation for Sample-Efficient RLHF", + paper_id="2405.21046", + ) + + model_card.save(os.path.join(self.args.output_dir, "README.md")) +class UnslothXPOTrainer(_UnslothXPOTrainer): + """ + +Initialize XPOTrainer as a subclass of [`OnlineDPOConfig`]. + +Args: + model (`transformers.PreTrainedModel`): + The model to train, preferably an `AutoModelForCausalLM`. + ref_model (`PreTrainedModelWrapper`): + Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation + and loss. If no reference model is provided, the trainer will create a reference model with the same + architecture as the model to be optimized. + reward_funcs (`transformers.PreTrainedModel`): + The reward model to score completions with, preferably an `AutoModelForSequenceClassification`. + judge (`BasePairwiseJudge`): + The judge to use for pairwise comparison of model completions. + args (`XPOConfig`): + The XPO config arguments to use for training. + data_collator (`transformers.DataCollator`): + The data collator to use for training. If None is specified, the default data collator + (`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the + sequences in the batch, given a dataset of paired sequences. + train_dataset (`datasets.Dataset`): + The dataset to use for training. + eval_dataset (`datasets.Dataset`): + The dataset to use for evaluation. + processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`): + Processing class used to process the data. If provided, will be used to automatically process the inputs + for the model, and it will be saved along the model to make it easier to rerun an interrupted training or + reuse the fine-tuned model. + peft_config (`dict`): + The peft config to use for training. + compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*): + The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to + metric values. + callbacks (`list[transformers.TrainerCallback]`): + The callbacks to use for training. + optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`): + The optimizer and scheduler to use for training. + preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`): + The function to use to preprocess the logits before computing the metrics. + +.. deprecated:: 0.22.0 + The following parameters are deprecated and will be removed in a future version: + + * `reward_model`: Use `reward_funcs` instead. For example, change `reward_model=model` to `reward_funcs=model`. + * `reward_processing_class`: Use `reward_processing_classes` instead. For example, change + `reward_processing_class=tokenizer` to `reward_processing_classes=tokenizer`. + + """ + def __init__( + self, + model = None, + ref_model = None, + reward_funcs = None, + judge = None, + args = None, + data_collator = None, + train_dataset = None, + eval_dataset = None, + processing_class = None, + reward_processing_classes = None, + peft_config = None, + compute_metrics = None, + callbacks = None, + preprocess_logits_for_metrics = None, + reward_model = None, + **kwargs + ): + if args is None: args = UnslothXPOConfig() + use_bf16 = getattr(args, 'bf16', False) + if type(use_bf16) is not bool: use_bf16 = False + use_fp16 = getattr(args, 'fp16', False) + if type(use_fp16) is not bool: use_fp16 = False + force_float32 = False + full_finetuning = os.environ.get('UNSLOTH_ENABLE_FULL_FINETUNING', '0') == '1' + if not full_finetuning and (os.environ.get('UNSLOTH_FORCE_FLOAT32', '0') == '1'): + print('Unsloth: Switching to float32 training since model cannot work with float16') + force_float32 = True + mixed_precision_dtype = os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') + dtype = getattr(model.config, 'dtype', None) or getattr(model.config, 'torch_dtype', None) + if dtype is None: dtype = model.get_input_embeddings().dtype + from unsloth_zoo.utils import _get_dtype + dtype = _get_dtype(dtype) + float16 = dtype == torch.float16 + if not force_float32 and (float16 and use_bf16): raise TypeError('Unsloth: Model is in float16 precision but you want to use bfloat16 precision. Set fp16 to `True` and bf16 to `False`') + if not force_float32 and (not float16 and use_fp16): raise TypeError('Unsloth: Model is in bfloat16 precision but you want to use float16 precision. Set fp16 to `False` and bf16 to `True`') + if force_float32: + # Forced float32 training + args.fp16 = False + args.bf16 = False + os.environ['ACCELERATE_MIXED_PRECISION'] = 'no' + elif (not use_bf16 and not use_fp16) and mixed_precision_dtype == 'float32': + # Mixed precision training + args.fp16 = float16 + args.bf16 = not float16 + os.environ['ACCELERATE_MIXED_PRECISION'] = 'fp16' if float16 else 'bf16' + if getattr(args, 'eval_dataset', None) is not None and getattr(args, 'eval_strategy', 'no') == 'no': + args.eval_strategy = 'steps' + if getattr(args, 'eval_steps', None) is None: args.eval_steps = 0.1 + ga_steps = getattr(args, 'gradient_accumulation_steps', None) + if ga_steps is not None and ga_steps > 1: + from transformers import __version__ as transformers_version + if Version(transformers_version) <= Version('4.45.2'): + print('**** Unsloth: Please use our fixed gradient_accumulation_steps by updating transformers, TRL and Unsloth!\n' + '`pip install --upgrade --no-cache-dir --force-reinstall --no-deps unsloth transformers trl unsloth_zoo`') + if getattr(args, 'eval_strategy', 'no') != 'no': + eval_bsz = getattr(args, 'per_device_eval_batch_size', 8) + if eval_bsz == 8 and args.per_device_train_batch_size < eval_bsz: args.per_device_eval_batch_size = args.per_device_train_batch_size + if getattr(args, 'eval_accumulation_steps', None) is None and ga_steps is not None: args.eval_accumulation_steps = ga_steps + fp16_full_eval = getattr(args, 'fp16_full_eval', False) + if type(fp16_full_eval) is not bool: fp16_full_eval = False + bf16_full_eval = getattr(args, 'bf16_full_eval', False) + if type(bf16_full_eval) is not bool: bf16_full_eval = False + if args.fp16 and bf16_full_eval: args.bf16_full_eval = False; args.fp16_full_eval = True + if args.bf16 and fp16_full_eval: args.bf16_full_eval = True; args.fp16_full_eval = False + if force_float32: + args.bf16_full_eval = False + args.fp16_full_eval = False + elif os.environ.get('UNSLOTH_MIXED_PRECISION', 'float32') == 'bfloat16': + args.bf16_full_eval = True + args.fp16_full_eval = False + elif not bf16_full_eval and not fp16_full_eval: + args.bf16_full_eval = args.bf16 + args.fp16_full_eval = args.fp16 + _output_logits = False + if locals().get('compute_metrics', None) is not None: _output_logits = True + if locals().get('preprocess_logits_for_metrics', None) is not None: _output_logits = True + if _output_logits: + os.environ['UNSLOTH_RETURN_LOGITS'] = '1' + if 'max_seq_length' not in locals() and not hasattr(args, 'max_seq_length'): + pass + else: + model_max_seq_length = getattr(model, 'max_seq_length', None) + args_max_seq_length = getattr(args, 'max_seq_length', None) + if args_max_seq_length is None and model_max_seq_length is not None: + max_seq_length = model.max_seq_length + if hasattr(args, 'max_seq_length'): args.max_seq_length = max_seq_length + if model is not None and hasattr(model, 'for_training'): + model.for_training() + if 'tokenizer' in locals() and hasattr(tokenizer, 'padding_side'): tokenizer.padding_side = 'right' + if 'processing_class' in locals(): + if hasattr(processing_class, 'padding_side'): processing_class.padding_side = 'right' + if hasattr(processing_class, 'tokenizer') and hasattr(processing_class.tokenizer, 'padding_side'): processing_class.tokenizer.padding_side = 'right' + __tokenizer = processing_class if 'processing_class' in locals() else tokenizer + from unsloth_zoo.vision_utils import UnslothVisionDataCollator + if not isinstance(data_collator, UnslothVisionDataCollator): + if isinstance(data_collator, DataCollatorForSeq2Seq) and 'labels' not in train_dataset.column_names: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + elif isinstance(data_collator, TransformersDataCollatorForLanguageModeling) and 'labels' in train_dataset.column_names: + data_collator = DataCollatorForSeq2Seq( + __tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + if hasattr(args, 'remove_unused_columns'): args.remove_unused_columns = False + if hasattr(args, 'dataset_text_field'): args.dataset_text_field = '' + if hasattr(args, 'dataset_kwargs'): args.dataset_kwargs = {'skip_prepare_dataset': True} + if not isinstance(data_collator, UnslothVisionDataCollator): + if not hasattr(__tokenizer, 'pad') and hasattr(__tokenizer, 'tokenizer'): + if isinstance(data_collator, DataCollatorForSeq2Seq): + data_collator = DataCollatorForSeq2Seq( + __tokenizer.tokenizer, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + else: + data_collator = TransformersDataCollatorForLanguageModeling( + __tokenizer.tokenizer, + mlm = False, + mlm_probability = 0.0, + pad_to_multiple_of = getattr(args, 'pad_to_multiple_of', None), + ) + other_metrics = [] + + from unsloth_zoo.logging_utils import PatchRLStatistics + PatchRLStatistics('xpo_trainer', other_metrics) + + # [TODO] Fix up DataParallel multiplying batch sizes + # [TODO] DDP works, but DP seems to not work? [TODO] + if getattr(args, "parallel_mode", None) == ParallelMode.NOT_DISTRIBUTED and args.n_gpu > 1: + if getattr(args, "_n_gpu", 1) != 1: + args._n_gpu = 1 + if "model" in locals() and hasattr(model, "for_training"): + model.for_training() + super().__init__( + model = model, + ref_model = ref_model, + reward_funcs = reward_funcs, + judge = judge, + args = args, + data_collator = data_collator, + train_dataset = train_dataset, + eval_dataset = eval_dataset, + processing_class = processing_class, + reward_processing_classes = reward_processing_classes, + peft_config = peft_config, + compute_metrics = compute_metrics, + callbacks = callbacks, + preprocess_logits_for_metrics = preprocess_logits_for_metrics, + reward_model = reward_model,**kwargs) + if "model" in locals() and hasattr(model, "for_inference"): + model.for_inference() + if hasattr(self, 'neftune_hook_handle'): + self.neftune_hook_handle.remove() + if hasattr(self, 'neftune_hook_handle'): del self.neftune_hook_handle + if getattr(args, 'neftune_noise_alpha', None) is not None: + model.get_input_embeddings().neftune_noise_alpha = self.neftune_noise_alpha + pass + if hasattr(self, 'accelerator'): + scaler = self.accelerator.scaler + current_model = model + while hasattr(current_model, 'model'): + current_model.accelerator_scaler = scaler + current_model = current_model.model + current_model.accelerator_scaler = scaler + pass + if hasattr(self, 'train'): + self.train = MethodType(prepare_for_training_mode(self.__class__.train), self) + pass + +pass